]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to d267e10a43b2 bcachefs: __bch2_sb_field_to_text()
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 5 Jan 2024 17:38:14 +0000 (12:38 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Fri, 5 Jan 2024 18:01:34 +0000 (13:01 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
49 files changed:
.bcachefs_revision
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/backpointers.c
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bkey_methods.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_locking.c
libbcachefs/btree_trans_commit.c
libbcachefs/btree_update_interior.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/chardev.c
libbcachefs/darray.h
libbcachefs/debug.c
libbcachefs/disk_groups.c
libbcachefs/ec.c
libbcachefs/ec.h
libbcachefs/error.c
libbcachefs/extents.h
libbcachefs/fs-common.c
libbcachefs/fs-io.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/opts.h
libbcachefs/printbuf.c
libbcachefs/recovery.c
libbcachefs/reflink.c
libbcachefs/reflink.h
libbcachefs/sb-downgrade.c
libbcachefs/sb-downgrade.h
libbcachefs/sb-errors_types.h
libbcachefs/sb-members.c
libbcachefs/snapshot.c
libbcachefs/snapshot.h
libbcachefs/super-io.c
libbcachefs/super-io.h
libbcachefs/super.c
libbcachefs/thread_with_file.c [new file with mode: 0644]
libbcachefs/thread_with_file.h [new file with mode: 0644]
libbcachefs/thread_with_file_types.h [new file with mode: 0644]
libbcachefs/trace.h
libbcachefs/util.c
libbcachefs/util.h

index 595b9eff9254db89f237b8ee9b371779a8362aa5..bb0353efe1bc9a731693efeff791c954a996541c 100644 (file)
@@ -1 +1 @@
-2a6125decb436ddc5e022c2428f64cf68dc974de
+d267e10a43b2e9ab37da6c9c991ca021142f6324
index 1a127b0a08b314e4f9f14289ba01fa57ae827d1c..a09b9d00226a4e1dd510c0c097ac59e7cb7d3c77 100644 (file)
@@ -749,95 +749,177 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
        return ret;
 }
 
-int bch2_trans_mark_alloc(struct btree_trans *trans,
-                         enum btree_id btree_id, unsigned level,
-                         struct bkey_s_c old, struct bkey_i *new,
-                         unsigned flags)
+int bch2_trigger_alloc(struct btree_trans *trans,
+                      enum btree_id btree, unsigned level,
+                      struct bkey_s_c old, struct bkey_s new,
+                      unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct bch_alloc_v4 old_a_convert, *new_a;
-       const struct bch_alloc_v4 *old_a;
-       u64 old_lru, new_lru;
        int ret = 0;
 
-       /*
-        * Deletion only happens in the device removal path, with
-        * BTREE_TRIGGER_NORUN:
-        */
-       BUG_ON(new->k.type != KEY_TYPE_alloc_v4);
+       if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
+                                      "alloc key for invalid device or bucket"))
+               return -EIO;
 
-       old_a = bch2_alloc_to_v4(old, &old_a_convert);
-       new_a = &bkey_i_to_alloc_v4(new)->v;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
 
-       new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
+       struct bch_alloc_v4 old_a_convert;
+       const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
 
-       if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
-               new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
-               new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
-               SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
-               SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
-       }
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
 
-       if (data_type_is_empty(new_a->data_type) &&
-           BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
-           !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) {
-               new_a->gen++;
-               SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
-       }
+               new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
 
-       if (old_a->data_type != new_a->data_type ||
-           (new_a->data_type == BCH_DATA_free &&
-            alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
-               ret =   bch2_bucket_do_index(trans, old, old_a, false) ?:
-                       bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true);
-               if (ret)
-                       return ret;
-       }
+               if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
+                       new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+                       new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
+                       SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
+                       SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
+               }
 
-       if (new_a->data_type == BCH_DATA_cached &&
-           !new_a->io_time[READ])
-               new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+               if (data_type_is_empty(new_a->data_type) &&
+                   BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
+                   !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
+                       new_a->gen++;
+                       SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
+               }
 
-       old_lru = alloc_lru_idx_read(*old_a);
-       new_lru = alloc_lru_idx_read(*new_a);
+               if (old_a->data_type != new_a->data_type ||
+                   (new_a->data_type == BCH_DATA_free &&
+                    alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
+                       ret =   bch2_bucket_do_index(trans, old, old_a, false) ?:
+                               bch2_bucket_do_index(trans, new.s_c, new_a, true);
+                       if (ret)
+                               return ret;
+               }
 
-       if (old_lru != new_lru) {
-               ret = bch2_lru_change(trans, new->k.p.inode,
-                                     bucket_to_u64(new->k.p),
-                                     old_lru, new_lru);
-               if (ret)
-                       return ret;
-       }
+               if (new_a->data_type == BCH_DATA_cached &&
+                   !new_a->io_time[READ])
+                       new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
 
-       new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a,
-                                       bch_dev_bkey_exists(c, new->k.p.inode));
+               u64 old_lru = alloc_lru_idx_read(*old_a);
+               u64 new_lru = alloc_lru_idx_read(*new_a);
+               if (old_lru != new_lru) {
+                       ret = bch2_lru_change(trans, new.k->p.inode,
+                                             bucket_to_u64(new.k->p),
+                                             old_lru, new_lru);
+                       if (ret)
+                               return ret;
+               }
 
-       if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
-               ret = bch2_lru_change(trans,
-                               BCH_LRU_FRAGMENTATION_START,
-                               bucket_to_u64(new->k.p),
-                               old_a->fragmentation_lru, new_a->fragmentation_lru);
-               if (ret)
-                       return ret;
+               new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a,
+                                               bch_dev_bkey_exists(c, new.k->p.inode));
+               if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
+                       ret = bch2_lru_change(trans,
+                                       BCH_LRU_FRAGMENTATION_START,
+                                       bucket_to_u64(new.k->p),
+                                       old_a->fragmentation_lru, new_a->fragmentation_lru);
+                       if (ret)
+                               return ret;
+               }
+
+               if (old_a->gen != new_a->gen) {
+                       ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
+                       if (ret)
+                               return ret;
+               }
+
+               /*
+                * need to know if we're getting called from the invalidate path or
+                * not:
+                */
+
+               if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
+                   old_a->cached_sectors) {
+                       ret = bch2_update_cached_sectors_list(trans, new.k->p.inode,
+                                                             -((s64) old_a->cached_sectors));
+                       if (ret)
+                               return ret;
+               }
        }
 
-       if (old_a->gen != new_a->gen) {
-               ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen);
-               if (ret)
-                       return ret;
+       if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+               struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
+               u64 journal_seq = trans->journal_res.seq;
+               u64 bucket_journal_seq = new_a->journal_seq;
+
+               if ((flags & BTREE_TRIGGER_INSERT) &&
+                   data_type_is_empty(old_a->data_type) !=
+                   data_type_is_empty(new_a->data_type) &&
+                   new.k->type == KEY_TYPE_alloc_v4) {
+                       struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v;
+
+                       /*
+                        * If the btree updates referring to a bucket weren't flushed
+                        * before the bucket became empty again, then the we don't have
+                        * to wait on a journal flush before we can reuse the bucket:
+                        */
+                       v->journal_seq = bucket_journal_seq =
+                               data_type_is_empty(new_a->data_type) &&
+                               (journal_seq == v->journal_seq ||
+                                bch2_journal_noflush_seq(&c->journal, v->journal_seq))
+                               ? 0 : journal_seq;
+               }
+
+               if (!data_type_is_empty(old_a->data_type) &&
+                   data_type_is_empty(new_a->data_type) &&
+                   bucket_journal_seq) {
+                       ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+                                       c->journal.flushed_seq_ondisk,
+                                       new.k->p.inode, new.k->p.offset,
+                                       bucket_journal_seq);
+                       if (ret) {
+                               bch2_fs_fatal_error(c,
+                                       "error setting bucket_needs_journal_commit: %i", ret);
+                               return ret;
+                       }
+               }
+
+               percpu_down_read(&c->mark_lock);
+               if (new_a->gen != old_a->gen)
+                       *bucket_gen(ca, new.k->p.offset) = new_a->gen;
+
+               bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
+
+               if (new_a->data_type == BCH_DATA_free &&
+                   (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
+                       closure_wake_up(&c->freelist_wait);
+
+               if (new_a->data_type == BCH_DATA_need_discard &&
+                   (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
+                       bch2_do_discards(c);
+
+               if (old_a->data_type != BCH_DATA_cached &&
+                   new_a->data_type == BCH_DATA_cached &&
+                   should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+                       bch2_do_invalidates(c);
+
+               if (new_a->data_type == BCH_DATA_need_gc_gens)
+                       bch2_do_gc_gens(c);
+               percpu_up_read(&c->mark_lock);
        }
 
-       /*
-        * need to know if we're getting called from the invalidate path or
-        * not:
-        */
+       if ((flags & BTREE_TRIGGER_GC) &&
+           (flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) {
+               struct bch_alloc_v4 new_a_convert;
+               const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert);
 
-       if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
-           old_a->cached_sectors) {
-               ret = bch2_update_cached_sectors_list(trans, new->k.p.inode,
-                                                     -((s64) old_a->cached_sectors));
-               if (ret)
-                       return ret;
+               percpu_down_read(&c->mark_lock);
+               struct bucket *g = gc_bucket(ca, new.k->p.offset);
+
+               bucket_lock(g);
+
+               g->gen_valid            = 1;
+               g->gen                  = new_a->gen;
+               g->data_type            = new_a->data_type;
+               g->stripe               = new_a->stripe;
+               g->stripe_redundancy    = new_a->stripe_redundancy;
+               g->dirty_sectors        = new_a->dirty_sectors;
+               g->cached_sectors       = new_a->cached_sectors;
+
+               bucket_unlock(g);
+               percpu_up_read(&c->mark_lock);
        }
 
        return 0;
@@ -1150,9 +1232,6 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
        unsigned i, gens_offset, gens_end_offset;
        int ret;
 
-       if (c->sb.version < bcachefs_metadata_version_bucket_gens)
-               return 0;
-
        bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
 
        k = bch2_btree_iter_peek_slot(bucket_gens_iter);
index 96671f166dd8053a7842fdda13e9afd14e62144b..e7f7e842ee1b725f1373e4782cc34e1c9b83afa7 100644 (file)
@@ -182,24 +182,21 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 #define bch2_bkey_ops_alloc ((struct bkey_ops) {       \
        .key_invalid    = bch2_alloc_v1_invalid,        \
        .val_to_text    = bch2_alloc_to_text,           \
-       .trans_trigger  = bch2_trans_mark_alloc,        \
-       .atomic_trigger = bch2_mark_alloc,              \
+       .trigger        = bch2_trigger_alloc,           \
        .min_val_size   = 8,                            \
 })
 
 #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) {    \
        .key_invalid    = bch2_alloc_v2_invalid,        \
        .val_to_text    = bch2_alloc_to_text,           \
-       .trans_trigger  = bch2_trans_mark_alloc,        \
-       .atomic_trigger = bch2_mark_alloc,              \
+       .trigger        = bch2_trigger_alloc,           \
        .min_val_size   = 8,                            \
 })
 
 #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) {    \
        .key_invalid    = bch2_alloc_v3_invalid,        \
        .val_to_text    = bch2_alloc_to_text,           \
-       .trans_trigger  = bch2_trans_mark_alloc,        \
-       .atomic_trigger = bch2_mark_alloc,              \
+       .trigger        = bch2_trigger_alloc,           \
        .min_val_size   = 16,                           \
 })
 
@@ -207,8 +204,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
        .key_invalid    = bch2_alloc_v4_invalid,        \
        .val_to_text    = bch2_alloc_to_text,           \
        .swab           = bch2_alloc_v4_swab,           \
-       .trans_trigger  = bch2_trans_mark_alloc,        \
-       .atomic_trigger = bch2_mark_alloc,              \
+       .trigger        = bch2_trigger_alloc,           \
        .min_val_size   = 48,                           \
 })
 
@@ -232,8 +228,8 @@ static inline bool bkey_is_alloc(const struct bkey *k)
 
 int bch2_alloc_read(struct bch_fs *);
 
-int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
-                         struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
+                      struct bkey_s_c, struct bkey_s, unsigned);
 int bch2_check_alloc_info(struct bch_fs *);
 int bch2_check_alloc_to_lru_refs(struct bch_fs *);
 void bch2_do_discards(struct bch_fs *);
index a97fc2b61ee2dc48e64180b253064a750f669381..e358a2ffffdea48c80eee18ab299cd7103d72991 100644 (file)
@@ -467,8 +467,7 @@ missing:
        prt_printf(&buf, "\nbp pos ");
        bch2_bpos_to_text(&buf, bp_iter.pos);
 
-       if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers ||
-           c->opts.reconstruct_alloc ||
+       if (c->opts.reconstruct_alloc ||
            fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
                ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
 
index 840f605eff1f8dc693b17eacc1d7145224a843cf..dac383e3718163b6566eb2e6a4ff305fb65da715 100644 (file)
@@ -425,6 +425,7 @@ BCH_DEBUG_PARAMS_DEBUG()
        x(btree_node_merge)                     \
        x(btree_node_sort)                      \
        x(btree_node_read)                      \
+       x(btree_node_read_done)                 \
        x(btree_interior_update_foreground)     \
        x(btree_interior_update_total)          \
        x(btree_gc)                             \
@@ -464,6 +465,7 @@ enum bch_time_stats {
 #include "replicas_types.h"
 #include "subvolume_types.h"
 #include "super_types.h"
+#include "thread_with_file_types.h"
 
 /* Number of nodes btree coalesce will try to coalesce at once */
 #define GC_MERGE_NODES         4U
@@ -478,12 +480,6 @@ enum bch_time_stats {
 
 struct btree;
 
-struct log_output {
-       spinlock_t              lock;
-       wait_queue_head_t       wait;
-       struct printbuf         buf;
-};
-
 enum gc_phase {
        GC_PHASE_NOT_RUNNING,
        GC_PHASE_START,
@@ -607,9 +603,6 @@ struct bch_dev {
 };
 
 /*
- * fsck_done - kill?
- *
- * replace with something more general from enumated fsck passes/errors:
  * initial_gc_unfixed
  * error
  * topology error
@@ -625,7 +618,7 @@ struct bch_dev {
        x(going_ro)                     \
        x(write_disable_complete)       \
        x(clean_shutdown)               \
-       x(fsck_done)                    \
+       x(fsck_running)                 \
        x(initial_gc_unfixed)           \
        x(need_another_gc)              \
        x(need_delete_dead_snapshots)   \
@@ -739,8 +732,8 @@ struct bch_fs {
        struct super_block      *vfs_sb;
        dev_t                   dev;
        char                    name[40];
-       struct log_output       *output;
-       struct task_struct      *output_filter;
+       struct stdio_redirect   *stdio;
+       struct task_struct      *stdio_filter;
 
        /* ro/rw, add/remove/resize devices: */
        struct rw_semaphore     state_lock;
@@ -1252,6 +1245,15 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
        return dev < c->sb.nr_devices && c->devs[dev];
 }
 
+static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
+{
+       struct stdio_redirect *stdio = c->stdio;
+
+       if (c->stdio_filter && c->stdio_filter != current)
+               stdio = NULL;
+       return stdio;
+}
+
 #define BKEY_PADDED_ONSTACK(key, pad)                          \
        struct { struct bkey_i key; __u64 key ## _pad[pad]; }
 
index e7a2d25dfe049e87e004b6c11906fc2229cfb78f..0d5ac4184fbcef5a2b7ae618d6bdf81478f09530 100644 (file)
@@ -1672,73 +1672,41 @@ struct bch_sb_field_downgrade {
 #define BCH_VERSION_MINOR(_v)          ((__u16) ((_v) & ~(~0U << 10)))
 #define BCH_VERSION(_major, _minor)    (((_major) << 10)|(_minor) << 0)
 
-#define RECOVERY_PASS_ALL_FSCK         (1ULL << 63)
-
 /*
  * field 1:            version name
  * field 2:            BCH_VERSION(major, minor)
  * field 3:            recovery passess required on upgrade
  */
 #define BCH_METADATA_VERSIONS()                                                \
-       x(bkey_renumber,                BCH_VERSION(0, 10),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(inode_btree_change,           BCH_VERSION(0, 11),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(snapshot,                     BCH_VERSION(0, 12),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(inode_backpointers,           BCH_VERSION(0, 13),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(btree_ptr_sectors_written,    BCH_VERSION(0, 14),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(snapshot_2,                   BCH_VERSION(0, 15),             \
-         BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)|         \
-         BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)|             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(reflink_p_fix,                BCH_VERSION(0, 16),             \
-         BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p))                     \
-       x(subvol_dirent,                BCH_VERSION(0, 17),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(inode_v2,                     BCH_VERSION(0, 18),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(freespace,                    BCH_VERSION(0, 19),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(alloc_v4,                     BCH_VERSION(0, 20),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(new_data_types,               BCH_VERSION(0, 21),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(backpointers,                 BCH_VERSION(0, 22),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(inode_v3,                     BCH_VERSION(0, 23),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(unwritten_extents,            BCH_VERSION(0, 24),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(bucket_gens,                  BCH_VERSION(0, 25),             \
-         BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)|                  \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(lru_v2,                       BCH_VERSION(0, 26),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(fragmentation_lru,            BCH_VERSION(0, 27),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(no_bps_in_alloc_keys,         BCH_VERSION(0, 28),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(snapshot_trees,               BCH_VERSION(0, 29),             \
-         RECOVERY_PASS_ALL_FSCK)                                       \
-       x(major_minor,                  BCH_VERSION(1,  0),             \
-         0)                                                            \
-       x(snapshot_skiplists,           BCH_VERSION(1,  1),             \
-         BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))                   \
-       x(deleted_inodes,               BCH_VERSION(1,  2),             \
-         BIT_ULL(BCH_RECOVERY_PASS_check_inodes))                      \
-       x(rebalance_work,               BCH_VERSION(1,  3),             \
-         BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))            \
-       x(member_seq,                   BCH_VERSION(1,  4),             \
-         0)                                                            \
-       x(disk_accounting_v2,           BCH_VERSION(1,  5),             \
-         BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info))
+       x(bkey_renumber,                BCH_VERSION(0, 10))             \
+       x(inode_btree_change,           BCH_VERSION(0, 11))             \
+       x(snapshot,                     BCH_VERSION(0, 12))             \
+       x(inode_backpointers,           BCH_VERSION(0, 13))             \
+       x(btree_ptr_sectors_written,    BCH_VERSION(0, 14))             \
+       x(snapshot_2,                   BCH_VERSION(0, 15))             \
+       x(reflink_p_fix,                BCH_VERSION(0, 16))             \
+       x(subvol_dirent,                BCH_VERSION(0, 17))             \
+       x(inode_v2,                     BCH_VERSION(0, 18))             \
+       x(freespace,                    BCH_VERSION(0, 19))             \
+       x(alloc_v4,                     BCH_VERSION(0, 20))             \
+       x(new_data_types,               BCH_VERSION(0, 21))             \
+       x(backpointers,                 BCH_VERSION(0, 22))             \
+       x(inode_v3,                     BCH_VERSION(0, 23))             \
+       x(unwritten_extents,            BCH_VERSION(0, 24))             \
+       x(bucket_gens,                  BCH_VERSION(0, 25))             \
+       x(lru_v2,                       BCH_VERSION(0, 26))             \
+       x(fragmentation_lru,            BCH_VERSION(0, 27))             \
+       x(no_bps_in_alloc_keys,         BCH_VERSION(0, 28))             \
+       x(snapshot_trees,               BCH_VERSION(0, 29))             \
+       x(major_minor,                  BCH_VERSION(1,  0))             \
+       x(snapshot_skiplists,           BCH_VERSION(1,  1))             \
+       x(deleted_inodes,               BCH_VERSION(1,  2))             \
+       x(rebalance_work,               BCH_VERSION(1,  3))             \
+       x(member_seq,                   BCH_VERSION(1,  4))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
-#define x(t, n, upgrade_passes)        bcachefs_metadata_version_##t = n,
+#define x(t, n)        bcachefs_metadata_version_##t = n,
        BCH_METADATA_VERSIONS()
 #undef x
        bcachefs_metadata_version_max
index 912adadfb4dd40a3435d7f6a82eba365a750fa67..ee82283722b759bbce174b2d902403c0024fe574 100644 (file)
@@ -28,10 +28,8 @@ struct bkey_ops {
        void            (*swab)(struct bkey_s);
        bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
        bool            (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-       int             (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned,
-                                        struct bkey_s_c, struct bkey_i *, unsigned);
-       int             (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned,
-                                         struct bkey_s_c, struct bkey_s_c, unsigned);
+       int             (*trigger)(struct btree_trans *, enum btree_id, unsigned,
+                                  struct bkey_s_c, struct bkey_s, unsigned);
        void            (*compat)(enum btree_id id, unsigned version,
                                  unsigned big_endian, int write,
                                  struct bkey_s);
@@ -78,82 +76,86 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b
 
 bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
-static inline int bch2_mark_key(struct btree_trans *trans,
-               enum btree_id btree, unsigned level,
-               struct bkey_s_c old, struct bkey_s_c new,
-               unsigned flags)
-{
-       const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
-
-       return ops->atomic_trigger
-               ? ops->atomic_trigger(trans, btree, level, old, new, flags)
-               : 0;
-}
-
 enum btree_update_flags {
        __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
        __BTREE_UPDATE_NOJOURNAL,
        __BTREE_UPDATE_KEY_CACHE_RECLAIM,
 
-       __BTREE_TRIGGER_NORUN,          /* Don't run triggers at all */
-
+       __BTREE_TRIGGER_NORUN,
+       __BTREE_TRIGGER_TRANSACTIONAL,
        __BTREE_TRIGGER_INSERT,
        __BTREE_TRIGGER_OVERWRITE,
-
        __BTREE_TRIGGER_GC,
        __BTREE_TRIGGER_BUCKET_INVALIDATE,
-       __BTREE_TRIGGER_NOATOMIC,
 };
 
 #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
 #define BTREE_UPDATE_NOJOURNAL         (1U << __BTREE_UPDATE_NOJOURNAL)
 #define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
 
+/* Don't run triggers at all */
 #define BTREE_TRIGGER_NORUN            (1U << __BTREE_TRIGGER_NORUN)
 
+/*
+ * If set, we're running transactional triggers as part of a transaction commit:
+ * triggers may generate new updates
+ *
+ * If cleared, and either BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE are set,
+ * we're running atomic triggers during a transaction commit: we have our
+ * journal reservation, we're holding btree node write locks, and we know the
+ * transaction is going to commit (returning an error here is a fatal error,
+ * causing us to go emergency read-only)
+ */
+#define BTREE_TRIGGER_TRANSACTIONAL    (1U << __BTREE_TRIGGER_TRANSACTIONAL)
+
+/* @new is entering the btree */
 #define BTREE_TRIGGER_INSERT           (1U << __BTREE_TRIGGER_INSERT)
+
+/* @old is leaving the btree */
 #define BTREE_TRIGGER_OVERWRITE                (1U << __BTREE_TRIGGER_OVERWRITE)
 
+/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
 #define BTREE_TRIGGER_GC               (1U << __BTREE_TRIGGER_GC)
+
+/* signal from bucket invalidate path to alloc trigger */
 #define BTREE_TRIGGER_BUCKET_INVALIDATE        (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-#define BTREE_TRIGGER_NOATOMIC         (1U << __BTREE_TRIGGER_NOATOMIC)
 
-static inline int bch2_trans_mark_key(struct btree_trans *trans,
-                                     enum btree_id btree_id, unsigned level,
-                                     struct bkey_s_c old, struct bkey_i *new,
-                                     unsigned flags)
+static inline int bch2_key_trigger(struct btree_trans *trans,
+               enum btree_id btree, unsigned level,
+               struct bkey_s_c old, struct bkey_s new,
+               unsigned flags)
 {
-       const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new->k.type);
+       const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
 
-       return ops->trans_trigger
-               ? ops->trans_trigger(trans, btree_id, level, old, new, flags)
+       return ops->trigger
+               ? ops->trigger(trans, btree, level, old, new, flags)
                : 0;
 }
 
-static inline int bch2_trans_mark_old(struct btree_trans *trans,
-                                     enum btree_id btree_id, unsigned level,
-                                     struct bkey_s_c old, unsigned flags)
+static inline int bch2_key_trigger_old(struct btree_trans *trans,
+                                      enum btree_id btree_id, unsigned level,
+                                      struct bkey_s_c old, unsigned flags)
 {
        struct bkey_i deleted;
 
        bkey_init(&deleted.k);
        deleted.k.p = old.k->p;
 
-       return bch2_trans_mark_key(trans, btree_id, level, old, &deleted,
-                                  BTREE_TRIGGER_OVERWRITE|flags);
+       return bch2_key_trigger(trans, btree_id, level, old, bkey_i_to_s(&deleted),
+                               BTREE_TRIGGER_OVERWRITE|flags);
 }
 
-static inline int bch2_trans_mark_new(struct btree_trans *trans,
-                                     enum btree_id btree_id, unsigned level,
-                                     struct bkey_i *new, unsigned flags)
+static inline int bch2_key_trigger_new(struct btree_trans *trans,
+                                      enum btree_id btree_id, unsigned level,
+                                      struct bkey_s new, unsigned flags)
 {
        struct bkey_i deleted;
 
        bkey_init(&deleted.k);
-       deleted.k.p = new->k.p;
+       deleted.k.p = new.k->p;
 
-       return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
-                                  BTREE_TRIGGER_INSERT|flags);
+       return bch2_key_trigger(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
+                               BTREE_TRIGGER_INSERT|flags);
 }
 
 void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
index 9574c8c4d70864b8617df8d68dff6d8e47bdf855..8e2488a4b58d00a45f78a7c64a6c1e83f4b0ff59 100644 (file)
@@ -719,12 +719,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        if (IS_ERR(b))
                return b;
 
-       /*
-        * Btree nodes read in from disk should not have the accessed bit set
-        * initially, so that linear scans don't thrash the cache:
-        */
-       clear_btree_node_accessed(b);
-
        bkey_copy(&b->key, k);
        if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
                /* raced with another fill: */
index 9f27cb3ea5633c355dc07a936408908389ac10d7..49b4ade758c3623ed35557a02a00afd31b0bec52 100644 (file)
 #define DROP_THIS_NODE         10
 #define DROP_PREV_NODE         11
 
+static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
+{
+       return (struct bkey_s) {{{
+               (struct bkey *) k.k,
+               (struct bch_val *) k.v
+       }}};
+}
+
 static bool should_restart_for_topology_repair(struct bch_fs *c)
 {
        return c->opts.fix_errors != FSCK_FIX_no &&
@@ -805,9 +813,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
        struct bch_fs *c = trans->c;
        struct bkey deleted = KEY(0, 0, 0);
        struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
-       unsigned flags =
-               BTREE_TRIGGER_GC|
-               (initial ? BTREE_TRIGGER_NOATOMIC : 0);
        int ret = 0;
 
        deleted.p = k->k->p;
@@ -829,7 +834,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
        }
 
        ret = commit_do(trans, NULL, NULL, 0,
-                       bch2_mark_key(trans, btree_id, level, old, *k, flags));
+                       bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
 fsck_err:
 err:
        bch_err_fn(c, ret);
@@ -1589,7 +1594,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
                if (!r->refcount)
                        new->k.type = KEY_TYPE_deleted;
                else
-                       *bkey_refcount(new) = cpu_to_le64(r->refcount);
+                       *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
        }
 fsck_err:
        printbuf_exit(&buf);
index 38d27cae49ea5421a00b976c4e0bfbe1f8991d72..378579bbe2ede6da15591b89a3b2a08c9f9fd37e 100644 (file)
@@ -942,6 +942,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
        unsigned ptr_written = btree_ptr_sectors_written(&b->key);
        struct printbuf buf = PRINTBUF;
        int ret = 0, retry_read = 0, write = READ;
+       u64 start_time = local_clock();
 
        b->version_ondisk = U16_MAX;
        /* We might get called multiple times on read retry: */
@@ -1209,6 +1210,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 out:
        mempool_free(iter, &c->fill_iter);
        printbuf_exit(&buf);
+       bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
        return retry_read;
 fsck_err:
        if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
@@ -1645,7 +1647,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
 
                if (sync) {
                        submit_bio_wait(bio);
-
+                       bch2_latency_acct(ca, rb->start_time, READ);
                        btree_node_read_work(&rb->work);
                } else {
                        submit_bio(bio);
index 7e5c797cfaf240e0258259cddae26bdb8bb3fcef..6e8e9ba5805d2239ad74ddf6cdfb2359ff5be33d 100644 (file)
@@ -897,7 +897,8 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
 
        bch2_bkey_buf_reassemble(out, c, k);
 
-       if (flags & BTREE_ITER_PREFETCH)
+       if ((flags & BTREE_ITER_PREFETCH) &&
+           c->opts.btree_node_prefetch)
                ret = btree_path_prefetch_j(trans, path, &jiter);
 
        bch2_btree_and_journal_iter_exit(&jiter);
@@ -929,7 +930,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
                bch2_bkey_buf_unpack(&tmp, c, l->b,
                                 bch2_btree_node_iter_peek(&l->iter, l->b));
 
-               if (flags & BTREE_ITER_PREFETCH) {
+               if ((flags & BTREE_ITER_PREFETCH) &&
+                   c->opts.btree_node_prefetch) {
                        ret = btree_path_prefetch(trans, path);
                        if (ret)
                                goto err;
@@ -2816,11 +2818,34 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        return p;
 }
 
+#include "sb-members.h"
+
 static inline void check_srcu_held_too_long(struct btree_trans *trans)
 {
-       WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
-            "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
-            (jiffies - trans->srcu_lock_time) / HZ);
+       if (trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10)) {
+               struct printbuf buf = PRINTBUF;
+
+               prt_str(&buf, "btree node read time:\n");
+               bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read]);
+
+               prt_str(&buf, "btree node read_done time:\n");
+               bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read_done]);
+
+               for_each_member_device(trans->c, ca) {
+                       prt_printf(&buf, "device %u read time:\n", ca->dev_idx);
+                       bch2_time_stats_to_text(&buf, &ca->io_latency[READ]);
+               }
+
+               struct btree_transaction_stats *s = btree_trans_stats(trans);
+               prt_str(&buf, "transaction duration:\n");
+               bch2_time_stats_to_text(&buf, &s->duration);
+
+               WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+                    "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
+                    (jiffies - trans->srcu_lock_time) / HZ);
+               bch2_print_string_as_lines(KERN_ERR, buf.buf);
+               printbuf_exit(&buf);
+       }
 }
 
 void bch2_trans_srcu_unlock(struct btree_trans *trans)
index 1ed8327a9fa2cc409cba3eb8c2ac1848999c7508..2d1c95c42f240cc88b31c2728d7a970560e4865a 100644 (file)
@@ -86,8 +86,14 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
        prt_printf(out, "Found lock cycle (%u entries):", g->nr);
        prt_newline(out);
 
-       for (i = g->g; i < g->g + g->nr; i++)
+       for (i = g->g; i < g->g + g->nr; i++) {
+               struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
+               if (!task)
+                       continue;
+
                bch2_btree_trans_to_text(out, i->trans);
+               bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1);
+       }
 }
 
 static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
@@ -144,8 +150,7 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g)
        return false;
 }
 
-static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans,
-                                unsigned long ip)
+static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans)
 {
        struct bch_fs *c = trans->c;
 
@@ -157,7 +162,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans
                buf.atomic++;
                print_cycle(&buf, g);
 
-               trace_trans_restart_would_deadlock(trans, ip, buf.buf);
+               trace_trans_restart_would_deadlock(trans, buf.buf);
                printbuf_exit(&buf);
        }
 }
@@ -165,7 +170,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans
 static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
 {
        if (i == g->g) {
-               trace_would_deadlock(g, i->trans, _RET_IP_);
+               trace_would_deadlock(g, i->trans);
                return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
        } else {
                i->trans->lock_must_abort = true;
@@ -222,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
                        prt_printf(&buf, "backtrace:");
                        prt_newline(&buf);
                        printbuf_indent_add(&buf, 2);
-                       bch2_prt_task_backtrace(&buf, trans->locking_wait.task);
+                       bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2);
                        printbuf_indent_sub(&buf, 2);
                        prt_newline(&buf);
                }
@@ -291,7 +296,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
                if (cycle)
                        return -1;
 
-               trace_would_deadlock(&g, trans, _RET_IP_);
+               trace_would_deadlock(&g, trans);
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
        }
 
index 3472882bf9db794f3fc2f8566f02c71f7ecf8297..80505554498cf96697cd8c8108207a92e854a8aa 100644 (file)
@@ -451,20 +451,15 @@ static int run_one_mem_trigger(struct btree_trans *trans,
        if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
                return 0;
 
-       if (old_ops->atomic_trigger == new_ops->atomic_trigger) {
-               ret   = bch2_mark_key(trans, i->btree_id, i->level,
-                               old, bkey_i_to_s_c(new),
+       if (old_ops->trigger == new_ops->trigger) {
+               ret   = bch2_key_trigger(trans, i->btree_id, i->level,
+                               old, bkey_i_to_s(new),
                                BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
        } else {
-               struct bkey             _deleted = POS_KEY((trans->paths + i->path)->pos);
-               struct bkey_s_c         deleted = (struct bkey_s_c) { &_deleted, NULL };
-
-               ret   = bch2_mark_key(trans, i->btree_id, i->level,
-                               deleted, bkey_i_to_s_c(new),
-                               BTREE_TRIGGER_INSERT|flags) ?:
-                       bch2_mark_key(trans, i->btree_id, i->level,
-                               old, deleted,
-                               BTREE_TRIGGER_OVERWRITE|flags);
+               ret   = bch2_key_trigger_new(trans, i->btree_id, i->level,
+                               bkey_i_to_s(new), flags) ?:
+                       bch2_key_trigger_old(trans, i->btree_id, i->level,
+                               old, flags);
        }
 
        return ret;
@@ -482,6 +477,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
        struct bkey_s_c old = { &old_k, i->old_v };
        const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type);
        const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type);
+       unsigned flags = i->flags|BTREE_TRIGGER_TRANSACTIONAL;
 
        verify_update_old_key(trans, i);
 
@@ -491,19 +487,18 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
 
        if (!i->insert_trigger_run &&
            !i->overwrite_trigger_run &&
-           old_ops->trans_trigger == new_ops->trans_trigger) {
+           old_ops->trigger == new_ops->trigger) {
                i->overwrite_trigger_run = true;
                i->insert_trigger_run = true;
-               return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k,
-                                          BTREE_TRIGGER_INSERT|
-                                          BTREE_TRIGGER_OVERWRITE|
-                                          i->flags) ?: 1;
+               return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k),
+                                       BTREE_TRIGGER_INSERT|
+                                       BTREE_TRIGGER_OVERWRITE|flags) ?: 1;
        } else if (overwrite && !i->overwrite_trigger_run) {
                i->overwrite_trigger_run = true;
-               return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1;
+               return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1;
        } else if (!overwrite && !i->insert_trigger_run) {
                i->insert_trigger_run = true;
-               return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1;
+               return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1;
        } else {
                return 0;
        }
index 2a93eb92d1129680724681ef62c12bd207c56ae9..44f9dfa28a09d89984150b19d3831077a18485f1 100644 (file)
@@ -568,7 +568,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
        for_each_keylist_key(&as->old_keys, k) {
                unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
 
-               ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0);
+               ret = bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k),
+                                          BTREE_TRIGGER_TRANSACTIONAL);
                if (ret)
                        return ret;
        }
@@ -576,7 +577,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
        for_each_keylist_key(&as->new_keys, k) {
                unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
 
-               ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0);
+               ret = bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k),
+                                          BTREE_TRIGGER_TRANSACTIONAL);
                if (ret)
                        return ret;
        }
@@ -2156,13 +2158,12 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
        int ret;
 
        if (!skip_triggers) {
-               ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1,
-                                         bkey_i_to_s_c(&b->key), 0);
-               if (ret)
-                       return ret;
-
-               ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1,
-                                         new_key, 0);
+               ret   = bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
+                                            bkey_i_to_s_c(&b->key),
+                                            BTREE_TRIGGER_TRANSACTIONAL) ?:
+                       bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
+                                            bkey_i_to_s(new_key),
+                                            BTREE_TRIGGER_TRANSACTIONAL);
                if (ret)
                        return ret;
        }
index c0dac04253f7c8787570fdb50f137b0ae20ba26a..67b7e79648b15b4629b9c664eb35a84327be21c0 100644 (file)
@@ -296,10 +296,10 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
        }
 }
 
-static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
-                                 struct bch_alloc_v4 old,
-                                 struct bch_alloc_v4 new,
-                                 u64 journal_seq, bool gc)
+void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
+                          const struct bch_alloc_v4 *old,
+                          const struct bch_alloc_v4 *new,
+                          u64 journal_seq, bool gc)
 {
        struct bch_fs_usage *fs_usage;
        struct bch_dev_usage *u;
@@ -307,24 +307,24 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
        preempt_disable();
        fs_usage = fs_usage_ptr(c, journal_seq, gc);
 
-       if (data_type_is_hidden(old.data_type))
+       if (data_type_is_hidden(old->data_type))
                fs_usage->hidden -= ca->mi.bucket_size;
-       if (data_type_is_hidden(new.data_type))
+       if (data_type_is_hidden(new->data_type))
                fs_usage->hidden += ca->mi.bucket_size;
 
        u = dev_usage_ptr(ca, journal_seq, gc);
 
-       u->d[old.data_type].buckets--;
-       u->d[new.data_type].buckets++;
+       u->d[old->data_type].buckets--;
+       u->d[new->data_type].buckets++;
 
-       u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old);
-       u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new);
+       u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(*old);
+       u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(*new);
 
-       u->d[BCH_DATA_cached].sectors += new.cached_sectors;
-       u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
+       u->d[BCH_DATA_cached].sectors += new->cached_sectors;
+       u->d[BCH_DATA_cached].sectors -= old->cached_sectors;
 
-       u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old);
-       u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new);
+       u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, *old);
+       u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, *new);
 
        preempt_enable();
 }
@@ -340,13 +340,13 @@ static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
        };
 }
 
-static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
-                                   struct bucket old, struct bucket new)
+void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
+                            struct bucket *old, struct bucket *new)
 {
-       bch2_dev_usage_update(c, ca,
-                             bucket_m_to_alloc(old),
-                             bucket_m_to_alloc(new),
-                             0, true);
+       struct bch_alloc_v4 old_a = bucket_m_to_alloc(*old);
+       struct bch_alloc_v4 new_a = bucket_m_to_alloc(*new);
+
+       bch2_dev_usage_update(c, ca, &old_a, &new_a, 0, true);
 }
 
 static inline int __update_replicas(struct bch_fs *c,
@@ -364,9 +364,9 @@ static inline int __update_replicas(struct bch_fs *c,
        return 0;
 }
 
-static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
-                       struct bch_replicas_entry_v1 *r, s64 sectors,
-                       unsigned journal_seq, bool gc)
+int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k,
+                        struct bch_replicas_entry_v1 *r, s64 sectors,
+                        unsigned journal_seq, bool gc)
 {
        struct bch_fs_usage *fs_usage;
        int idx, ret = 0;
@@ -413,7 +413,7 @@ static inline int update_cached_sectors(struct bch_fs *c,
 
        bch2_replicas_entry_cached(&r.e, dev);
 
-       return update_replicas(c, k, &r.e, sectors, journal_seq, gc);
+       return bch2_update_replicas(c, k, &r.e, sectors, journal_seq, gc);
 }
 
 static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more,
@@ -496,114 +496,6 @@ int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64
        return bch2_update_replicas_list(trans, &r.e, sectors);
 }
 
-int bch2_mark_alloc(struct btree_trans *trans,
-                   enum btree_id btree, unsigned level,
-                   struct bkey_s_c old, struct bkey_s_c new,
-                   unsigned flags)
-{
-       bool gc = flags & BTREE_TRIGGER_GC;
-       u64 journal_seq = trans->journal_res.seq;
-       u64 bucket_journal_seq;
-       struct bch_fs *c = trans->c;
-       struct bch_alloc_v4 old_a_convert, new_a_convert;
-       const struct bch_alloc_v4 *old_a, *new_a;
-       struct bch_dev *ca;
-       int ret = 0;
-
-       /*
-        * alloc btree is read in by bch2_alloc_read, not gc:
-        */
-       if ((flags & BTREE_TRIGGER_GC) &&
-           !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
-               return 0;
-
-       if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
-                                      "alloc key for invalid device or bucket"))
-               return -EIO;
-
-       ca = bch_dev_bkey_exists(c, new.k->p.inode);
-
-       old_a = bch2_alloc_to_v4(old, &old_a_convert);
-       new_a = bch2_alloc_to_v4(new, &new_a_convert);
-
-       bucket_journal_seq = new_a->journal_seq;
-
-       if ((flags & BTREE_TRIGGER_INSERT) &&
-           data_type_is_empty(old_a->data_type) !=
-           data_type_is_empty(new_a->data_type) &&
-           new.k->type == KEY_TYPE_alloc_v4) {
-               struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
-
-               EBUG_ON(!journal_seq);
-
-               /*
-                * If the btree updates referring to a bucket weren't flushed
-                * before the bucket became empty again, then the we don't have
-                * to wait on a journal flush before we can reuse the bucket:
-                */
-               v->journal_seq = bucket_journal_seq =
-                       data_type_is_empty(new_a->data_type) &&
-                       (journal_seq == v->journal_seq ||
-                        bch2_journal_noflush_seq(&c->journal, v->journal_seq))
-                       ? 0 : journal_seq;
-       }
-
-       if (!data_type_is_empty(old_a->data_type) &&
-           data_type_is_empty(new_a->data_type) &&
-           bucket_journal_seq) {
-               ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
-                               c->journal.flushed_seq_ondisk,
-                               new.k->p.inode, new.k->p.offset,
-                               bucket_journal_seq);
-               if (ret) {
-                       bch2_fs_fatal_error(c,
-                               "error setting bucket_needs_journal_commit: %i", ret);
-                       return ret;
-               }
-       }
-
-       percpu_down_read(&c->mark_lock);
-       if (!gc && new_a->gen != old_a->gen)
-               *bucket_gen(ca, new.k->p.offset) = new_a->gen;
-
-       bch2_dev_usage_update(c, ca, *old_a, *new_a, journal_seq, gc);
-
-       if (gc) {
-               struct bucket *g = gc_bucket(ca, new.k->p.offset);
-
-               bucket_lock(g);
-
-               g->gen_valid            = 1;
-               g->gen                  = new_a->gen;
-               g->data_type            = new_a->data_type;
-               g->stripe               = new_a->stripe;
-               g->stripe_redundancy    = new_a->stripe_redundancy;
-               g->dirty_sectors        = new_a->dirty_sectors;
-               g->cached_sectors       = new_a->cached_sectors;
-
-               bucket_unlock(g);
-       }
-       percpu_up_read(&c->mark_lock);
-
-       if (new_a->data_type == BCH_DATA_free &&
-           (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
-               closure_wake_up(&c->freelist_wait);
-
-       if (new_a->data_type == BCH_DATA_need_discard &&
-           (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
-               bch2_do_discards(c);
-
-       if (old_a->data_type != BCH_DATA_cached &&
-           new_a->data_type == BCH_DATA_cached &&
-           should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
-               bch2_do_invalidates(c);
-
-       if (new_a->data_type == BCH_DATA_need_gc_gens)
-               bch2_do_gc_gens(c);
-
-       return 0;
-}
-
 int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                              size_t b, enum bch_data_type data_type,
                              unsigned sectors, struct gc_pos pos,
@@ -652,17 +544,17 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 err:
        bucket_unlock(g);
        if (!ret)
-               bch2_dev_usage_update_m(c, ca, old, new);
+               bch2_dev_usage_update_m(c, ca, &old, &new);
        percpu_up_read(&c->mark_lock);
        return ret;
 }
 
-static int check_bucket_ref(struct btree_trans *trans,
-                           struct bkey_s_c k,
-                           const struct bch_extent_ptr *ptr,
-                           s64 sectors, enum bch_data_type ptr_data_type,
-                           u8 b_gen, u8 bucket_data_type,
-                           u32 bucket_sectors)
+int bch2_check_bucket_ref(struct btree_trans *trans,
+                         struct bkey_s_c k,
+                         const struct bch_extent_ptr *ptr,
+                         s64 sectors, enum bch_data_type ptr_data_type,
+                         u8 b_gen, u8 bucket_data_type,
+                         u32 bucket_sectors)
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
@@ -761,404 +653,6 @@ err:
        goto out;
 }
 
-static int mark_stripe_bucket(struct btree_trans *trans,
-                             struct bkey_s_c k,
-                             unsigned ptr_idx,
-                             unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-       unsigned nr_data = s->nr_blocks - s->nr_redundant;
-       bool parity = ptr_idx >= nr_data;
-       enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
-       s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
-       const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-       struct bucket old, new, *g;
-       struct printbuf buf = PRINTBUF;
-       int ret = 0;
-
-       BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
-       /* * XXX doesn't handle deletion */
-
-       percpu_down_read(&c->mark_lock);
-       g = PTR_GC_BUCKET(ca, ptr);
-
-       if (g->dirty_sectors ||
-           (g->stripe && g->stripe != k.k->p.offset)) {
-               bch2_fs_inconsistent(c,
-                             "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
-                             ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
-                             (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-               ret = -EINVAL;
-               goto err;
-       }
-
-       bucket_lock(g);
-       old = *g;
-
-       ret = check_bucket_ref(trans, k, ptr, sectors, data_type,
-                              g->gen, g->data_type,
-                              g->dirty_sectors);
-       if (ret)
-               goto err;
-
-       g->data_type = data_type;
-       g->dirty_sectors += sectors;
-
-       g->stripe               = k.k->p.offset;
-       g->stripe_redundancy    = s->nr_redundant;
-       new = *g;
-err:
-       bucket_unlock(g);
-       if (!ret)
-               bch2_dev_usage_update_m(c, ca, old, new);
-       percpu_up_read(&c->mark_lock);
-       printbuf_exit(&buf);
-       return ret;
-}
-
-static int __mark_pointer(struct btree_trans *trans,
-                         struct bkey_s_c k,
-                         const struct bch_extent_ptr *ptr,
-                         s64 sectors, enum bch_data_type ptr_data_type,
-                         u8 bucket_gen, u8 *bucket_data_type,
-                         u32 *dirty_sectors, u32 *cached_sectors)
-{
-       u32 *dst_sectors = !ptr->cached
-               ? dirty_sectors
-               : cached_sectors;
-       int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
-                                  bucket_gen, *bucket_data_type, *dst_sectors);
-
-       if (ret)
-               return ret;
-
-       *dst_sectors += sectors;
-
-       if (!*dirty_sectors && !*cached_sectors)
-               *bucket_data_type = 0;
-       else if (*bucket_data_type != BCH_DATA_stripe)
-               *bucket_data_type = ptr_data_type;
-
-       return 0;
-}
-
-static int bch2_mark_pointer(struct btree_trans *trans,
-                            enum btree_id btree_id, unsigned level,
-                            struct bkey_s_c k,
-                            struct extent_ptr_decoded p,
-                            s64 sectors,
-                            unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-       struct bucket old, new, *g;
-       enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
-       u8 bucket_data_type;
-       int ret = 0;
-
-       BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
-       percpu_down_read(&c->mark_lock);
-       g = PTR_GC_BUCKET(ca, &p.ptr);
-       bucket_lock(g);
-       old = *g;
-
-       bucket_data_type = g->data_type;
-       ret = __mark_pointer(trans, k, &p.ptr, sectors,
-                            data_type, g->gen,
-                            &bucket_data_type,
-                            &g->dirty_sectors,
-                            &g->cached_sectors);
-       if (!ret)
-               g->data_type = bucket_data_type;
-
-       new = *g;
-       bucket_unlock(g);
-       if (!ret)
-               bch2_dev_usage_update_m(c, ca, old, new);
-       percpu_up_read(&c->mark_lock);
-
-       return ret;
-}
-
-static int bch2_mark_stripe_ptr(struct btree_trans *trans,
-                               struct bkey_s_c k,
-                               struct bch_extent_stripe_ptr p,
-                               enum bch_data_type data_type,
-                               s64 sectors,
-                               unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       struct bch_replicas_padded r;
-       struct gc_stripe *m;
-
-       BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
-       m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL);
-       if (!m) {
-               bch_err(c, "error allocating memory for gc_stripes, idx %llu",
-                       (u64) p.idx);
-               return -BCH_ERR_ENOMEM_mark_stripe_ptr;
-       }
-
-       mutex_lock(&c->ec_stripes_heap_lock);
-
-       if (!m || !m->alive) {
-               mutex_unlock(&c->ec_stripes_heap_lock);
-               bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
-                                   (u64) p.idx);
-               bch2_inconsistent_error(c);
-               return -EIO;
-       }
-
-       m->block_sectors[p.block] += sectors;
-
-       r = m->r;
-       mutex_unlock(&c->ec_stripes_heap_lock);
-
-       r.e.data_type = data_type;
-       update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true);
-
-       return 0;
-}
-
-static int __mark_extent(struct btree_trans *trans,
-                        enum btree_id btree_id, unsigned level,
-                        struct bkey_s_c k, unsigned flags)
-{
-       u64 journal_seq = trans->journal_res.seq;
-       struct bch_fs *c = trans->c;
-       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-       const union bch_extent_entry *entry;
-       struct extent_ptr_decoded p;
-       struct bch_replicas_padded r;
-       enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
-               ? BCH_DATA_btree
-               : BCH_DATA_user;
-       s64 sectors = bkey_is_btree_ptr(k.k)
-               ? btree_sectors(c)
-               : k.k->size;
-       s64 dirty_sectors = 0;
-       bool stale;
-       int ret;
-
-       BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
-       r.e.data_type   = data_type;
-       r.e.nr_devs     = 0;
-       r.e.nr_required = 1;
-
-       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-               s64 disk_sectors = ptr_disk_sectors(sectors, p);
-
-               if (flags & BTREE_TRIGGER_OVERWRITE)
-                       disk_sectors = -disk_sectors;
-
-               ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags);
-               if (ret < 0)
-                       return ret;
-
-               stale = ret > 0;
-
-               if (p.ptr.cached) {
-                       if (!stale) {
-                               ret = update_cached_sectors(c, k, p.ptr.dev,
-                                               disk_sectors, journal_seq, true);
-                               if (ret) {
-                                       bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
-                                                           __func__);
-                                       return ret;
-                               }
-                       }
-               } else if (!p.has_ec) {
-                       dirty_sectors          += disk_sectors;
-                       r.e.devs[r.e.nr_devs++] = p.ptr.dev;
-               } else {
-                       ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type,
-                                       disk_sectors, flags);
-                       if (ret)
-                               return ret;
-
-                       /*
-                        * There may be other dirty pointers in this extent, but
-                        * if so they're not required for mounting if we have an
-                        * erasure coded pointer in this extent:
-                        */
-                       r.e.nr_required = 0;
-               }
-       }
-
-       if (r.e.nr_devs) {
-               ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true);
-               if (ret) {
-                       struct printbuf buf = PRINTBUF;
-
-                       bch2_bkey_val_to_text(&buf, c, k);
-                       bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
-                       printbuf_exit(&buf);
-                       return ret;
-               }
-       }
-
-       return 0;
-}
-
-int bch2_mark_extent(struct btree_trans *trans,
-                    enum btree_id btree_id, unsigned level,
-                    struct bkey_s_c old, struct bkey_s_c new,
-                    unsigned flags)
-{
-       return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags);
-}
-
-int bch2_mark_stripe(struct btree_trans *trans,
-                    enum btree_id btree_id, unsigned level,
-                    struct bkey_s_c old, struct bkey_s_c new,
-                    unsigned flags)
-{
-       bool gc = flags & BTREE_TRIGGER_GC;
-       u64 journal_seq = trans->journal_res.seq;
-       struct bch_fs *c = trans->c;
-       u64 idx = new.k->p.offset;
-       const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
-               ? bkey_s_c_to_stripe(old).v : NULL;
-       const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
-               ? bkey_s_c_to_stripe(new).v : NULL;
-       unsigned i;
-       int ret;
-
-       BUG_ON(gc && old_s);
-
-       if (!gc) {
-               struct stripe *m = genradix_ptr(&c->stripes, idx);
-
-               if (!m) {
-                       struct printbuf buf1 = PRINTBUF;
-                       struct printbuf buf2 = PRINTBUF;
-
-                       bch2_bkey_val_to_text(&buf1, c, old);
-                       bch2_bkey_val_to_text(&buf2, c, new);
-                       bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
-                                           "old %s\n"
-                                           "new %s", idx, buf1.buf, buf2.buf);
-                       printbuf_exit(&buf2);
-                       printbuf_exit(&buf1);
-                       bch2_inconsistent_error(c);
-                       return -1;
-               }
-
-               if (!new_s) {
-                       bch2_stripes_heap_del(c, m, idx);
-
-                       memset(m, 0, sizeof(*m));
-               } else {
-                       m->sectors      = le16_to_cpu(new_s->sectors);
-                       m->algorithm    = new_s->algorithm;
-                       m->nr_blocks    = new_s->nr_blocks;
-                       m->nr_redundant = new_s->nr_redundant;
-                       m->blocks_nonempty = 0;
-
-                       for (i = 0; i < new_s->nr_blocks; i++)
-                               m->blocks_nonempty += !!stripe_blockcount_get(new_s, i);
-
-                       if (!old_s)
-                               bch2_stripes_heap_insert(c, m, idx);
-                       else
-                               bch2_stripes_heap_update(c, m, idx);
-               }
-       } else {
-               struct gc_stripe *m =
-                       genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
-
-               if (!m) {
-                       bch_err(c, "error allocating memory for gc_stripes, idx %llu",
-                               idx);
-                       return -BCH_ERR_ENOMEM_mark_stripe;
-               }
-               /*
-                * This will be wrong when we bring back runtime gc: we should
-                * be unmarking the old key and then marking the new key
-                */
-               m->alive        = true;
-               m->sectors      = le16_to_cpu(new_s->sectors);
-               m->nr_blocks    = new_s->nr_blocks;
-               m->nr_redundant = new_s->nr_redundant;
-
-               for (i = 0; i < new_s->nr_blocks; i++)
-                       m->ptrs[i] = new_s->ptrs[i];
-
-               bch2_bkey_to_replicas(&m->r.e, new);
-
-               /*
-                * gc recalculates this field from stripe ptr
-                * references:
-                */
-               memset(m->block_sectors, 0, sizeof(m->block_sectors));
-
-               for (i = 0; i < new_s->nr_blocks; i++) {
-                       ret = mark_stripe_bucket(trans, new, i, flags);
-                       if (ret)
-                               return ret;
-               }
-
-               ret = update_replicas(c, new, &m->r.e,
-                                     ((s64) m->sectors * m->nr_redundant),
-                                     journal_seq, gc);
-               if (ret) {
-                       struct printbuf buf = PRINTBUF;
-
-                       bch2_bkey_val_to_text(&buf, c, new);
-                       bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
-                       printbuf_exit(&buf);
-                       return ret;
-               }
-       }
-
-       return 0;
-}
-
-static int __mark_reservation(struct btree_trans *trans,
-                             enum btree_id btree_id, unsigned level,
-                             struct bkey_s_c k, unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       struct bch_fs_usage *fs_usage;
-       unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-       s64 sectors = (s64) k.k->size;
-
-       BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
-       if (flags & BTREE_TRIGGER_OVERWRITE)
-               sectors = -sectors;
-       sectors *= replicas;
-
-       percpu_down_read(&c->mark_lock);
-       preempt_disable();
-
-       fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC);
-       replicas = clamp_t(unsigned, replicas, 1,
-                          ARRAY_SIZE(fs_usage->persistent_reserved));
-
-       fs_usage->reserved                              += sectors;
-       fs_usage->persistent_reserved[replicas - 1]     += sectors;
-
-       preempt_enable();
-       percpu_up_read(&c->mark_lock);
-
-       return 0;
-}
-
-int bch2_mark_reservation(struct btree_trans *trans,
-                         enum btree_id btree_id, unsigned level,
-                         struct bkey_s_c old, struct bkey_s_c new,
-                         unsigned flags)
-{
-       return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags);
-}
-
 void bch2_trans_fs_usage_revert(struct btree_trans *trans,
                                struct replicas_delta_list *deltas)
 {
@@ -1278,92 +772,184 @@ need_mark:
        return -1;
 }
 
-/* trans_mark: */
+/* KEY_TYPE_extent: */
+
+static int __mark_pointer(struct btree_trans *trans,
+                         struct bkey_s_c k,
+                         const struct bch_extent_ptr *ptr,
+                         s64 sectors, enum bch_data_type ptr_data_type,
+                         u8 bucket_gen, u8 *bucket_data_type,
+                         u32 *dirty_sectors, u32 *cached_sectors)
+{
+       u32 *dst_sectors = !ptr->cached
+               ? dirty_sectors
+               : cached_sectors;
+       int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
+                                  bucket_gen, *bucket_data_type, *dst_sectors);
+
+       if (ret)
+               return ret;
+
+       *dst_sectors += sectors;
+
+       if (!*dirty_sectors && !*cached_sectors)
+               *bucket_data_type = 0;
+       else if (*bucket_data_type != BCH_DATA_stripe)
+               *bucket_data_type = ptr_data_type;
+
+       return 0;
+}
 
-static inline int bch2_trans_mark_pointer(struct btree_trans *trans,
-                                  enum btree_id btree_id, unsigned level,
-                                  struct bkey_s_c k, struct extent_ptr_decoded p,
-                                  unsigned flags)
+static int bch2_trigger_pointer(struct btree_trans *trans,
+                       enum btree_id btree_id, unsigned level,
+                       struct bkey_s_c k, struct extent_ptr_decoded p,
+                       s64 *sectors,
+                       unsigned flags)
 {
        bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
-       struct btree_iter iter;
-       struct bkey_i_alloc_v4 *a;
        struct bpos bucket;
        struct bch_backpointer bp;
-       s64 sectors;
-       int ret;
 
        bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
-       sectors = bp.bucket_len;
-       if (!insert)
-               sectors = -sectors;
-
-       a = bch2_trans_start_alloc_update(trans, &iter, bucket);
-       if (IS_ERR(a))
-               return PTR_ERR(a);
+       *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
 
-       ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
-                            a->v.gen, &a->v.data_type,
-                            &a->v.dirty_sectors, &a->v.cached_sectors) ?:
-               bch2_trans_update(trans, &iter, &a->k_i, 0);
-       bch2_trans_iter_exit(trans, &iter);
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               struct btree_iter iter;
+               struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &iter, bucket);
+               int ret = PTR_ERR_OR_ZERO(a);
+               if (ret)
+                       return ret;
 
-       if (ret)
-               return ret;
+               ret = __mark_pointer(trans, k, &p.ptr, *sectors, bp.data_type,
+                                    a->v.gen, &a->v.data_type,
+                                    &a->v.dirty_sectors, &a->v.cached_sectors) ?:
+                       bch2_trans_update(trans, &iter, &a->k_i, 0);
+               bch2_trans_iter_exit(trans, &iter);
 
-       if (!p.ptr.cached) {
-               ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert);
                if (ret)
                        return ret;
+
+               if (!p.ptr.cached) {
+                       ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       if (flags & BTREE_TRIGGER_GC) {
+               struct bch_fs *c = trans->c;
+               struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+               enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+
+               percpu_down_read(&c->mark_lock);
+               struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+               bucket_lock(g);
+               struct bucket old = *g;
+
+               u8 bucket_data_type = g->data_type;
+               int ret = __mark_pointer(trans, k, &p.ptr, *sectors,
+                                    data_type, g->gen,
+                                    &bucket_data_type,
+                                    &g->dirty_sectors,
+                                    &g->cached_sectors);
+               if (ret) {
+                       bucket_unlock(g);
+                       percpu_up_read(&c->mark_lock);
+                       return ret;
+               }
+
+               g->data_type = bucket_data_type;
+               struct bucket new = *g;
+               bucket_unlock(g);
+               bch2_dev_usage_update_m(c, ca, &old, &new);
+               percpu_up_read(&c->mark_lock);
        }
 
        return 0;
 }
 
-static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
-                       struct extent_ptr_decoded p,
-                       s64 sectors, enum bch_data_type data_type)
+static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
+                               struct bkey_s_c k,
+                               struct extent_ptr_decoded p,
+                               enum bch_data_type data_type,
+                               s64 sectors, unsigned flags)
 {
-       struct btree_iter iter;
-       struct bkey_i_stripe *s;
-       struct bch_replicas_padded r;
-       int ret = 0;
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               struct btree_iter iter;
+               struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter,
+                               BTREE_ID_stripes, POS(0, p.ec.idx),
+                               BTREE_ITER_WITH_UPDATES, stripe);
+               int ret = PTR_ERR_OR_ZERO(s);
+               if (unlikely(ret)) {
+                       bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
+                               "pointer to nonexistent stripe %llu",
+                               (u64) p.ec.idx);
+                       goto err;
+               }
 
-       s = bch2_bkey_get_mut_typed(trans, &iter,
-                       BTREE_ID_stripes, POS(0, p.ec.idx),
-                       BTREE_ITER_WITH_UPDATES, stripe);
-       ret = PTR_ERR_OR_ZERO(s);
-       if (unlikely(ret)) {
-               bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
-                       "pointer to nonexistent stripe %llu",
-                       (u64) p.ec.idx);
-               goto err;
-       }
+               if (!bch2_ptr_matches_stripe(&s->v, p)) {
+                       bch2_trans_inconsistent(trans,
+                               "stripe pointer doesn't match stripe %llu",
+                               (u64) p.ec.idx);
+                       ret = -EIO;
+                       goto err;
+               }
 
-       if (!bch2_ptr_matches_stripe(&s->v, p)) {
-               bch2_trans_inconsistent(trans,
-                       "stripe pointer doesn't match stripe %llu",
-                       (u64) p.ec.idx);
-               ret = -EIO;
-               goto err;
+               stripe_blockcount_set(&s->v, p.ec.block,
+                       stripe_blockcount_get(&s->v, p.ec.block) +
+                       sectors);
+
+               struct bch_replicas_padded r;
+               bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
+               r.e.data_type = data_type;
+               ret = bch2_update_replicas_list(trans, &r.e, sectors);
+err:
+               bch2_trans_iter_exit(trans, &iter);
+               return ret;
        }
 
-       stripe_blockcount_set(&s->v, p.ec.block,
-               stripe_blockcount_get(&s->v, p.ec.block) +
-               sectors);
+       if (flags & BTREE_TRIGGER_GC) {
+               struct bch_fs *c = trans->c;
 
-       bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
-       r.e.data_type = data_type;
-       ret = bch2_update_replicas_list(trans, &r.e, sectors);
-err:
-       bch2_trans_iter_exit(trans, &iter);
-       return ret;
+               BUG_ON(!(flags & BTREE_TRIGGER_GC));
+
+               struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
+               if (!m) {
+                       bch_err(c, "error allocating memory for gc_stripes, idx %llu",
+                               (u64) p.ec.idx);
+                       return -BCH_ERR_ENOMEM_mark_stripe_ptr;
+               }
+
+               mutex_lock(&c->ec_stripes_heap_lock);
+
+               if (!m || !m->alive) {
+                       mutex_unlock(&c->ec_stripes_heap_lock);
+                       struct printbuf buf = PRINTBUF;
+                       bch2_bkey_val_to_text(&buf, c, k);
+                       bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n  while marking %s",
+                                           (u64) p.ec.idx, buf.buf);
+                       printbuf_exit(&buf);
+                       bch2_inconsistent_error(c);
+                       return -EIO;
+               }
+
+               m->block_sectors[p.ec.block] += sectors;
+
+               struct bch_replicas_padded r = m->r;
+               mutex_unlock(&c->ec_stripes_heap_lock);
+
+               r.e.data_type = data_type;
+               bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true);
+       }
+
+       return 0;
 }
 
-static int __trans_mark_extent(struct btree_trans *trans,
-                              enum btree_id btree_id, unsigned level,
-                              struct bkey_s_c k, unsigned flags)
+static int __trigger_extent(struct btree_trans *trans,
+                           enum btree_id btree_id, unsigned level,
+                           struct bkey_s_c k, unsigned flags)
 {
+       bool gc = flags & BTREE_TRIGGER_GC;
        struct bch_fs *c = trans->c;
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
@@ -1372,11 +958,7 @@ static int __trans_mark_extent(struct btree_trans *trans,
        enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
                ? BCH_DATA_btree
                : BCH_DATA_user;
-       s64 sectors = bkey_is_btree_ptr(k.k)
-               ? btree_sectors(c)
-               : k.k->size;
        s64 dirty_sectors = 0;
-       bool stale;
        int ret = 0;
 
        r.e.data_type   = data_type;
@@ -1384,21 +966,20 @@ static int __trans_mark_extent(struct btree_trans *trans,
        r.e.nr_required = 1;
 
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-               s64 disk_sectors = ptr_disk_sectors(sectors, p);
-
-               if (flags & BTREE_TRIGGER_OVERWRITE)
-                       disk_sectors = -disk_sectors;
-
-               ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
+               s64 disk_sectors;
+               ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
                if (ret < 0)
                        return ret;
 
-               stale = ret > 0;
+               bool stale = ret > 0;
 
                if (p.ptr.cached) {
                        if (!stale) {
-                               ret = bch2_update_cached_sectors_list(trans, p.ptr.dev,
-                                                                     disk_sectors);
+                               ret = !gc
+                                       ? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
+                                       : update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
+                               bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
+                                                    __func__);
                                if (ret)
                                        return ret;
                        }
@@ -1406,226 +987,111 @@ static int __trans_mark_extent(struct btree_trans *trans,
                        dirty_sectors          += disk_sectors;
                        r.e.devs[r.e.nr_devs++] = p.ptr.dev;
                } else {
-                       ret = bch2_trans_mark_stripe_ptr(trans, p,
-                                       disk_sectors, data_type);
+                       ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
                        if (ret)
                                return ret;
 
+                       /*
+                        * There may be other dirty pointers in this extent, but
+                        * if so they're not required for mounting if we have an
+                        * erasure coded pointer in this extent:
+                        */
                        r.e.nr_required = 0;
                }
        }
 
-       if (r.e.nr_devs)
-               ret = bch2_update_replicas_list(trans, &r.e, dirty_sectors);
-
-       return ret;
-}
-
-int bch2_trans_mark_extent(struct btree_trans *trans,
-                          enum btree_id btree_id, unsigned level,
-                          struct bkey_s_c old, struct bkey_i *new,
-                          unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) -
-                 (int) bch2_bkey_needs_rebalance(c, old);
+       if (r.e.nr_devs) {
+               ret = !gc
+                       ? bch2_update_replicas_list(trans, &r.e, dirty_sectors)
+                       : bch2_update_replicas(c, k, &r.e, dirty_sectors, 0, true);
+               if (unlikely(ret && gc)) {
+                       struct printbuf buf = PRINTBUF;
 
-       if (mod) {
-               int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0);
+                       bch2_bkey_val_to_text(&buf, c, k);
+                       bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
+                       printbuf_exit(&buf);
+               }
                if (ret)
                        return ret;
        }
 
-       return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags);
+       return 0;
 }
 
-static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
-                                        struct bkey_s_c_stripe s,
-                                        unsigned idx, bool deleting)
+int bch2_trigger_extent(struct btree_trans *trans,
+                       enum btree_id btree_id, unsigned level,
+                       struct bkey_s_c old, struct bkey_s new,
+                       unsigned flags)
 {
-       struct bch_fs *c = trans->c;
-       const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
-       struct btree_iter iter;
-       struct bkey_i_alloc_v4 *a;
-       enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
-               ? BCH_DATA_parity : 0;
-       s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
-       int ret = 0;
-
-       if (deleting)
-               sectors = -sectors;
-
-       a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
-       if (IS_ERR(a))
-               return PTR_ERR(a);
-
-       ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
-                              a->v.gen, a->v.data_type,
-                              a->v.dirty_sectors);
-       if (ret)
-               goto err;
-
-       if (!deleting) {
-               if (bch2_trans_inconsistent_on(a->v.stripe ||
-                                              a->v.stripe_redundancy, trans,
-                               "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
-                               iter.pos.inode, iter.pos.offset, a->v.gen,
-                               bch2_data_types[a->v.data_type],
-                               a->v.dirty_sectors,
-                               a->v.stripe, s.k->p.offset)) {
-                       ret = -EIO;
-                       goto err;
-               }
-
-               if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
-                               "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
-                               iter.pos.inode, iter.pos.offset, a->v.gen,
-                               bch2_data_types[a->v.data_type],
-                               a->v.dirty_sectors,
-                               s.k->p.offset)) {
-                       ret = -EIO;
-                       goto err;
-               }
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               struct bch_fs *c = trans->c;
+               int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) -
+                         (int) bch2_bkey_needs_rebalance(c, old);
 
-               a->v.stripe             = s.k->p.offset;
-               a->v.stripe_redundancy  = s.v->nr_redundant;
-               a->v.data_type          = BCH_DATA_stripe;
-       } else {
-               if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
-                                              a->v.stripe_redundancy != s.v->nr_redundant, trans,
-                               "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
-                               iter.pos.inode, iter.pos.offset, a->v.gen,
-                               s.k->p.offset, a->v.stripe)) {
-                       ret = -EIO;
-                       goto err;
+               if (mod) {
+                       int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0);
+                       if (ret)
+                               return ret;
                }
-
-               a->v.stripe             = 0;
-               a->v.stripe_redundancy  = 0;
-               a->v.data_type          = alloc_data_type(a->v, BCH_DATA_user);
        }
 
-       a->v.dirty_sectors += sectors;
-       if (data_type)
-               a->v.data_type = !deleting ? data_type : 0;
+       if (flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))
+               return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree_id, level, old, new, flags);
 
-       ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
-       if (ret)
-               goto err;
-err:
-       bch2_trans_iter_exit(trans, &iter);
-       return ret;
+       return 0;
 }
 
-int bch2_trans_mark_stripe(struct btree_trans *trans,
-                          enum btree_id btree_id, unsigned level,
-                          struct bkey_s_c old, struct bkey_i *new,
-                          unsigned flags)
-{
-       const struct bch_stripe *old_s = NULL;
-       struct bch_stripe *new_s = NULL;
-       struct bch_replicas_padded r;
-       unsigned i, nr_blocks;
-       int ret = 0;
-
-       if (old.k->type == KEY_TYPE_stripe)
-               old_s = bkey_s_c_to_stripe(old).v;
-       if (new->k.type == KEY_TYPE_stripe)
-               new_s = &bkey_i_to_stripe(new)->v;
-
-       /*
-        * If the pointers aren't changing, we don't need to do anything:
-        */
-       if (new_s && old_s &&
-           new_s->nr_blocks    == old_s->nr_blocks &&
-           new_s->nr_redundant == old_s->nr_redundant &&
-           !memcmp(old_s->ptrs, new_s->ptrs,
-                   new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
-               return 0;
+/* KEY_TYPE_reservation */
 
-       BUG_ON(new_s && old_s &&
-              (new_s->nr_blocks        != old_s->nr_blocks ||
-               new_s->nr_redundant     != old_s->nr_redundant));
-
-       nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks;
-
-       if (new_s) {
-               s64 sectors = le16_to_cpu(new_s->sectors);
-
-               bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new));
-               ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
-               if (ret)
-                       return ret;
-       }
+static int __trigger_reservation(struct btree_trans *trans,
+                                enum btree_id btree_id, unsigned level,
+                                struct bkey_s_c k, unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+       s64 sectors = (s64) k.k->size * replicas;
 
-       if (old_s) {
-               s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
+       if (flags & BTREE_TRIGGER_OVERWRITE)
+               sectors = -sectors;
 
-               bch2_bkey_to_replicas(&r.e, old);
-               ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               int ret = bch2_replicas_deltas_realloc(trans, 0);
                if (ret)
                        return ret;
-       }
-
-       for (i = 0; i < nr_blocks; i++) {
-               if (new_s && old_s &&
-                   !memcmp(&new_s->ptrs[i],
-                           &old_s->ptrs[i],
-                           sizeof(new_s->ptrs[i])))
-                       continue;
 
-               if (new_s) {
-                       ret = bch2_trans_mark_stripe_bucket(trans,
-                                       bkey_i_to_s_c_stripe(new), i, false);
-                       if (ret)
-                               break;
-               }
+               struct replicas_delta_list *d = trans->fs_usage_deltas;
+               replicas = min(replicas, ARRAY_SIZE(d->persistent_reserved));
 
-               if (old_s) {
-                       ret = bch2_trans_mark_stripe_bucket(trans,
-                                       bkey_s_c_to_stripe(old), i, true);
-                       if (ret)
-                               break;
-               }
+               d->persistent_reserved[replicas - 1] += sectors;
        }
 
-       return ret;
-}
-
-static int __trans_mark_reservation(struct btree_trans *trans,
-                                   enum btree_id btree_id, unsigned level,
-                                   struct bkey_s_c k, unsigned flags)
-{
-       unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-       s64 sectors = (s64) k.k->size;
-       struct replicas_delta_list *d;
-       int ret;
+       if (flags & BTREE_TRIGGER_GC) {
+               percpu_down_read(&c->mark_lock);
+               preempt_disable();
 
-       if (flags & BTREE_TRIGGER_OVERWRITE)
-               sectors = -sectors;
-       sectors *= replicas;
+               struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc);
 
-       ret = bch2_replicas_deltas_realloc(trans, 0);
-       if (ret)
-               return ret;
+               replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved));
+               fs_usage->reserved                              += sectors;
+               fs_usage->persistent_reserved[replicas - 1]     += sectors;
 
-       d = trans->fs_usage_deltas;
-       replicas = clamp_t(unsigned, replicas, 1,
-                          ARRAY_SIZE(d->persistent_reserved));
+               preempt_enable();
+               percpu_up_read(&c->mark_lock);
+       }
 
-       d->persistent_reserved[replicas - 1] += sectors;
        return 0;
 }
 
-int bch2_trans_mark_reservation(struct btree_trans *trans,
-                               enum btree_id btree_id, unsigned level,
-                               struct bkey_s_c old,
-                               struct bkey_i *new,
-                               unsigned flags)
+int bch2_trigger_reservation(struct btree_trans *trans,
+                         enum btree_id btree_id, unsigned level,
+                         struct bkey_s_c old, struct bkey_s new,
+                         unsigned flags)
 {
-       return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags);
+       return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags);
 }
 
+/* Mark superblocks: */
+
 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                                    struct bch_dev *ca, size_t b,
                                    enum bch_data_type type,
index 379101d7e585b414de3757fc161a4c4f407b14a7..2c95cc5d86be661c6d6a0783d366d5d8b8b919d7 100644 (file)
@@ -302,6 +302,12 @@ u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *);
 struct bch_fs_usage_short
 bch2_fs_usage_read_short(struct bch_fs *);
 
+void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *,
+                          const struct bch_alloc_v4 *,
+                          const struct bch_alloc_v4 *, u64, bool);
+void bch2_dev_usage_update_m(struct bch_fs *, struct bch_dev *,
+                            struct bucket *, struct bucket *);
+
 /* key/bucket marking: */
 
 static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
@@ -316,6 +322,9 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
                            : c->usage[journal_seq & JOURNAL_BUF_MASK]);
 }
 
+int bch2_update_replicas(struct bch_fs *, struct bkey_s_c,
+                        struct bch_replicas_entry_v1 *, s64,
+                        unsigned, bool);
 int bch2_update_replicas_list(struct btree_trans *,
                         struct bch_replicas_entry_v1 *, s64);
 int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64);
@@ -323,36 +332,30 @@ int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned);
 
 void bch2_fs_usage_initialize(struct bch_fs *);
 
+int bch2_check_bucket_ref(struct btree_trans *, struct bkey_s_c,
+                         const struct bch_extent_ptr *,
+                         s64, enum bch_data_type, u8, u8, u32);
+
 int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
                              size_t, enum bch_data_type, unsigned,
                              struct gc_pos, unsigned);
 
-int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
-                   struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned,
-                    struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned,
-                    struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned,
-                         struct bkey_s_c, struct bkey_s_c, unsigned);
-
-int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
+int bch2_trigger_extent(struct btree_trans *, enum btree_id, unsigned,
+                       struct bkey_s_c, struct bkey_s, unsigned);
+int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned,
+                         struct bkey_s_c, struct bkey_s, unsigned);
+
+#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
 ({                                                                                             \
        int ret = 0;                                                                            \
                                                                                                \
        if (_old.k->type)                                                                       \
                ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT);     \
        if (!ret && _new.k->type)                                                               \
-               ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE);  \
+               ret = _fn(_trans, _btree_id, _level, _new.s_c, _flags & ~BTREE_TRIGGER_OVERWRITE);\
        ret;                                                                                    \
 })
 
-#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)  \
-       mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags)
-
 void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
 int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
 
index 22a52bc8406bb7a1828f54ba9defa303d47a698f..226b39c176673a374f50ab06ad5f6d3e0a4858d8 100644 (file)
 #include "replicas.h"
 #include "super.h"
 #include "super-io.h"
+#include "thread_with_file.h"
 
-#include <linux/anon_inodes.h>
 #include <linux/cdev.h>
 #include <linux/device.h>
-#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/ioctl.h>
-#include <linux/kthread.h>
 #include <linux/major.h>
-#include <linux/poll.h>
 #include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -31,65 +28,6 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long
        return copy_to_user(to, from, n) ? -EFAULT : 0;
 }
 
-struct thread_with_file {
-       struct task_struct      *task;
-       int                     ret;
-       bool                    done;
-};
-
-static void thread_with_file_exit(struct thread_with_file *thr)
-{
-       if (thr->task) {
-               kthread_stop(thr->task);
-               put_task_struct(thr->task);
-       }
-}
-
-__printf(4, 0)
-static int run_thread_with_file(struct thread_with_file *thr,
-                               const struct file_operations *fops,
-                               int (*fn)(void *), const char *fmt, ...)
-{
-       va_list args;
-       struct file *file = NULL;
-       int ret, fd = -1;
-       struct printbuf name = PRINTBUF;
-       unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
-
-       va_start(args, fmt);
-       prt_vprintf(&name, fmt, args);
-       va_end(args);
-
-       thr->ret = 0;
-       thr->task = kthread_create(fn, thr, name.buf);
-       ret = PTR_ERR_OR_ZERO(thr->task);
-       if (ret)
-               goto err;
-
-       ret = get_unused_fd_flags(fd_flags);
-       if (ret < 0)
-               goto err_stop_task;
-       fd = ret;
-
-       file = anon_inode_getfile(name.buf, fops, thr, fd_flags);
-       ret = PTR_ERR_OR_ZERO(file);
-       if (ret)
-               goto err_put_fd;
-
-       fd_install(fd, file);
-       get_task_struct(thr->task);
-       wake_up_process(thr->task);
-       printbuf_exit(&name);
-       return fd;
-err_put_fd:
-       put_unused_fd(fd);
-err_stop_task:
-       kthread_stop(thr->task);
-err:
-       printbuf_exit(&name);
-       return ret;
-}
-
 /* returns with ref on ca->ref */
 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
                                          unsigned flags)
@@ -200,132 +138,33 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
 #endif
 
 struct fsck_thread {
-       struct thread_with_file thr;
-       struct printbuf         buf;
+       struct thread_with_stdio thr;
        struct bch_fs           *c;
        char                    **devs;
        size_t                  nr_devs;
        struct bch_opts         opts;
-
-       struct log_output       output;
-       DARRAY(char)            output2;
 };
 
-static void bch2_fsck_thread_free(struct fsck_thread *thr)
+static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
 {
-       thread_with_file_exit(&thr->thr);
+       struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
        if (thr->devs)
                for (size_t i = 0; i < thr->nr_devs; i++)
                        kfree(thr->devs[i]);
-       darray_exit(&thr->output2);
-       printbuf_exit(&thr->output.buf);
        kfree(thr->devs);
        kfree(thr);
 }
 
-static int bch2_fsck_thread_release(struct inode *inode, struct file *file)
-{
-       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
-
-       bch2_fsck_thread_free(thr);
-       return 0;
-}
-
-static bool fsck_thread_ready(struct fsck_thread *thr)
-{
-       return thr->output.buf.pos ||
-               thr->output2.nr ||
-               thr->thr.done;
-}
-
-static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf,
-                                    size_t len, loff_t *ppos)
-{
-       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
-       size_t copied = 0, b;
-       int ret = 0;
-
-       if ((file->f_flags & O_NONBLOCK) &&
-           !fsck_thread_ready(thr))
-               return -EAGAIN;
-
-       ret = wait_event_interruptible(thr->output.wait,
-                       fsck_thread_ready(thr));
-       if (ret)
-               return ret;
-
-       if (thr->thr.done)
-               return 0;
-
-       while (len) {
-               ret = darray_make_room(&thr->output2, thr->output.buf.pos);
-               if (ret)
-                       break;
-
-               spin_lock_irq(&thr->output.lock);
-               b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos);
-
-               memcpy(&darray_top(thr->output2), thr->output.buf.buf, b);
-               memmove(thr->output.buf.buf,
-                       thr->output.buf.buf + b,
-                       thr->output.buf.pos - b);
-
-               thr->output2.nr += b;
-               thr->output.buf.pos -= b;
-               spin_unlock_irq(&thr->output.lock);
-
-               b = min(len, thr->output2.nr);
-               if (!b)
-                       break;
-
-               b -= copy_to_user(buf, thr->output2.data, b);
-               if (!b) {
-                       ret = -EFAULT;
-                       break;
-               }
-
-               copied  += b;
-               buf     += b;
-               len     -= b;
-
-               memmove(thr->output2.data,
-                       thr->output2.data + b,
-                       thr->output2.nr - b);
-               thr->output2.nr -= b;
-       }
-
-       return copied ?: ret;
-}
-
-static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait)
-{
-       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
-
-       poll_wait(file, &thr->output.wait, wait);
-
-       return fsck_thread_ready(thr)
-               ? EPOLLIN|EPOLLHUP
-               : 0;
-}
-
-static const struct file_operations fsck_thread_ops = {
-       .release        = bch2_fsck_thread_release,
-       .read           = bch2_fsck_thread_read,
-       .poll           = bch2_fsck_thread_poll,
-       .llseek         = no_llseek,
-};
-
 static int bch2_fsck_offline_thread_fn(void *arg)
 {
        struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
        struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
 
-       thr->thr.ret = PTR_ERR_OR_ZERO(c);
-       if (!thr->thr.ret)
+       thr->thr.thr.ret = PTR_ERR_OR_ZERO(c);
+       if (!thr->thr.thr.ret)
                bch2_fs_stop(c);
 
-       thr->thr.done = true;
-       wake_up(&thr->output.wait);
+       thread_with_stdio_done(&thr->thr);
        return 0;
 }
 
@@ -354,11 +193,6 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
 
        thr->opts = bch2_opts_empty();
        thr->nr_devs = arg.nr_devs;
-       thr->output.buf = PRINTBUF;
-       thr->output.buf.atomic++;
-       spin_lock_init(&thr->output.lock);
-       init_waitqueue_head(&thr->output.wait);
-       darray_init(&thr->output2);
 
        if (copy_from_user(devs, &user_arg->devs[0],
                           array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
@@ -384,16 +218,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
                        goto err;
        }
 
-       opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output);
+       opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
 
-       ret = run_thread_with_file(&thr->thr,
-                                  &fsck_thread_ops,
-                                  bch2_fsck_offline_thread_fn,
-                                  "bch-fsck");
+       ret = bch2_run_thread_with_stdio(&thr->thr,
+                       bch2_fsck_thread_exit,
+                       bch2_fsck_offline_thread_fn);
 err:
        if (ret < 0) {
                if (thr)
-                       bch2_fsck_thread_free(thr);
+                       bch2_fsck_thread_exit(&thr->thr);
                pr_err("ret %s", bch2_err_str(ret));
        }
        kfree(devs);
@@ -592,7 +425,7 @@ static int bch2_data_job_release(struct inode *inode, struct file *file)
 {
        struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
 
-       thread_with_file_exit(&ctx->thr);
+       bch2_thread_with_file_exit(&ctx->thr);
        kfree(ctx);
        return 0;
 }
@@ -642,10 +475,9 @@ static long bch2_ioctl_data(struct bch_fs *c,
        ctx->c = c;
        ctx->arg = arg;
 
-       ret = run_thread_with_file(&ctx->thr,
-                                  &bcachefs_data_ops,
-                                  bch2_data_thread,
-                                  "bch-data/%s", c->name);
+       ret = bch2_run_thread_with_file(&ctx->thr,
+                       &bcachefs_data_ops,
+                       bch2_data_thread);
        if (ret < 0)
                kfree(ctx);
        return ret;
@@ -936,24 +768,32 @@ static int bch2_fsck_online_thread_fn(void *arg)
        struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
        struct bch_fs *c = thr->c;
 
-       c->output_filter = current;
-       c->output = &thr->output;
+       c->stdio_filter = current;
+       c->stdio = &thr->thr.stdio;
 
        /*
         * XXX: can we figure out a way to do this without mucking with c->opts?
         */
+       unsigned old_fix_errors = c->opts.fix_errors;
        if (opt_defined(thr->opts, fix_errors))
                c->opts.fix_errors = thr->opts.fix_errors;
+       else
+               c->opts.fix_errors = FSCK_FIX_ask;
+
        c->opts.fsck = true;
+       set_bit(BCH_FS_fsck_running, &c->flags);
 
        c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
-       bch2_run_online_recovery_passes(c);
+       int ret = bch2_run_online_recovery_passes(c);
+
+       clear_bit(BCH_FS_fsck_running, &c->flags);
+       bch_err_fn(c, ret);
 
-       c->output = NULL;
-       c->output_filter = NULL;
+       c->stdio = NULL;
+       c->stdio_filter = NULL;
+       c->opts.fix_errors = old_fix_errors;
 
-       thr->thr.done = true;
-       wake_up(&thr->output.wait);
+       thread_with_stdio_done(&thr->thr);
 
        up(&c->online_fsck_mutex);
        bch2_ro_ref_put(c);
@@ -988,11 +828,6 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
 
        thr->c = c;
        thr->opts = bch2_opts_empty();
-       thr->output.buf = PRINTBUF;
-       thr->output.buf.atomic++;
-       spin_lock_init(&thr->output.lock);
-       init_waitqueue_head(&thr->output.wait);
-       darray_init(&thr->output2);
 
        if (arg.opts) {
                char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
@@ -1005,15 +840,14 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
                        goto err;
        }
 
-       ret = run_thread_with_file(&thr->thr,
-                                  &fsck_thread_ops,
-                                  bch2_fsck_online_thread_fn,
-                                  "bch-fsck");
+       ret = bch2_run_thread_with_stdio(&thr->thr,
+                       bch2_fsck_thread_exit,
+                       bch2_fsck_online_thread_fn);
 err:
        if (ret < 0) {
                bch_err_fn(c, ret);
                if (thr)
-                       bch2_fsck_thread_free(thr);
+                       bch2_fsck_thread_exit(&thr->thr);
                up(&c->online_fsck_mutex);
                bch2_ro_ref_put(c);
        }
index d867ee620bc1f041e0a3c67fb8eba3ad28560285..4b340d13caace03b12f75e788316ad5af7e08d1c 100644 (file)
@@ -20,7 +20,7 @@ struct {                                                              \
 #define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
 
 typedef DARRAY(char)   darray_char;
-typedef DARRAY(char *) darray_str;
+typedef DARRAY(char *) darray_str;
 
 int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
 
index de5bfc0d46844166b3543b4bf3ee1a7a841bbc3a..d6418948495f8392898178dd9b350b1829a24aae 100644 (file)
@@ -627,7 +627,7 @@ restart:
                prt_printf(&i->buf, "backtrace:");
                prt_newline(&i->buf);
                printbuf_indent_add(&i->buf, 2);
-               bch2_prt_task_backtrace(&i->buf, task);
+               bch2_prt_task_backtrace(&i->buf, task, 0);
                printbuf_indent_sub(&i->buf, 2);
                prt_newline(&i->buf);
 
@@ -930,8 +930,6 @@ void bch2_debug_exit(void)
 
 int __init bch2_debug_init(void)
 {
-       int ret = 0;
-
        bch_debug = debugfs_create_dir("bcachefs", NULL);
-       return ret;
+       return 0;
 }
index 1cd6ba8d0cce7ed7fade705180cea16837a49281..06a7df529b401c2f8665c17d66803b4649692bc9 100644 (file)
@@ -557,7 +557,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
                        : NULL;
 
                if (ca && percpu_ref_tryget(&ca->io_ref)) {
-                       prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
+                       prt_printf(out, "/dev/%s", ca->name);
                        percpu_ref_put(&ca->io_ref);
                } else if (ca) {
                        prt_printf(out, "offline device %u", t.dev);
index e89185a28e0899e2b0ff8a973ae3326209e2f6ce..d802bc63c8d0b4832bd8062ce827c8af180361e6 100644 (file)
@@ -3,6 +3,7 @@
 /* erasure coding */
 
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "backpointers.h"
 #include "bkey_buf.h"
@@ -156,6 +157,306 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
        }
 }
 
+/* Triggers: */
+
+static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
+                                        struct bkey_s_c_stripe s,
+                                        unsigned idx, bool deleting)
+{
+       struct bch_fs *c = trans->c;
+       const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
+       struct btree_iter iter;
+       struct bkey_i_alloc_v4 *a;
+       enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
+               ? BCH_DATA_parity : 0;
+       s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
+       int ret = 0;
+
+       if (deleting)
+               sectors = -sectors;
+
+       a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
+       if (IS_ERR(a))
+               return PTR_ERR(a);
+
+       ret = bch2_check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
+                                   a->v.gen, a->v.data_type,
+                                   a->v.dirty_sectors);
+       if (ret)
+               goto err;
+
+       if (!deleting) {
+               if (bch2_trans_inconsistent_on(a->v.stripe ||
+                                              a->v.stripe_redundancy, trans,
+                               "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
+                               iter.pos.inode, iter.pos.offset, a->v.gen,
+                               bch2_data_types[a->v.data_type],
+                               a->v.dirty_sectors,
+                               a->v.stripe, s.k->p.offset)) {
+                       ret = -EIO;
+                       goto err;
+               }
+
+               if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
+                               "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
+                               iter.pos.inode, iter.pos.offset, a->v.gen,
+                               bch2_data_types[a->v.data_type],
+                               a->v.dirty_sectors,
+                               s.k->p.offset)) {
+                       ret = -EIO;
+                       goto err;
+               }
+
+               a->v.stripe             = s.k->p.offset;
+               a->v.stripe_redundancy  = s.v->nr_redundant;
+               a->v.data_type          = BCH_DATA_stripe;
+       } else {
+               if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
+                                              a->v.stripe_redundancy != s.v->nr_redundant, trans,
+                               "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
+                               iter.pos.inode, iter.pos.offset, a->v.gen,
+                               s.k->p.offset, a->v.stripe)) {
+                       ret = -EIO;
+                       goto err;
+               }
+
+               a->v.stripe             = 0;
+               a->v.stripe_redundancy  = 0;
+               a->v.data_type          = alloc_data_type(a->v, BCH_DATA_user);
+       }
+
+       a->v.dirty_sectors += sectors;
+       if (data_type)
+               a->v.data_type = !deleting ? data_type : 0;
+
+       ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+       if (ret)
+               goto err;
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+static int mark_stripe_bucket(struct btree_trans *trans,
+                             struct bkey_s_c k,
+                             unsigned ptr_idx,
+                             unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+       unsigned nr_data = s->nr_blocks - s->nr_redundant;
+       bool parity = ptr_idx >= nr_data;
+       enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
+       s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
+       const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+       struct bucket old, new, *g;
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
+
+       BUG_ON(!(flags & BTREE_TRIGGER_GC));
+
+       /* * XXX doesn't handle deletion */
+
+       percpu_down_read(&c->mark_lock);
+       g = PTR_GC_BUCKET(ca, ptr);
+
+       if (g->dirty_sectors ||
+           (g->stripe && g->stripe != k.k->p.offset)) {
+               bch2_fs_inconsistent(c,
+                             "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
+                             ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
+                             (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+               ret = -EINVAL;
+               goto err;
+       }
+
+       bucket_lock(g);
+       old = *g;
+
+       ret = bch2_check_bucket_ref(trans, k, ptr, sectors, data_type,
+                                   g->gen, g->data_type,
+                                   g->dirty_sectors);
+       if (ret)
+               goto err;
+
+       g->data_type = data_type;
+       g->dirty_sectors += sectors;
+
+       g->stripe               = k.k->p.offset;
+       g->stripe_redundancy    = s->nr_redundant;
+       new = *g;
+err:
+       bucket_unlock(g);
+       if (!ret)
+               bch2_dev_usage_update_m(c, ca, &old, &new);
+       percpu_up_read(&c->mark_lock);
+       printbuf_exit(&buf);
+       return ret;
+}
+
+int bch2_trigger_stripe(struct btree_trans *trans,
+                       enum btree_id btree_id, unsigned level,
+                       struct bkey_s_c old, struct bkey_s _new,
+                       unsigned flags)
+{
+       struct bkey_s_c new = _new.s_c;
+       struct bch_fs *c = trans->c;
+       u64 idx = new.k->p.offset;
+       const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(old).v : NULL;
+       const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(new).v : NULL;
+
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               /*
+                * If the pointers aren't changing, we don't need to do anything:
+                */
+               if (new_s && old_s &&
+                   new_s->nr_blocks    == old_s->nr_blocks &&
+                   new_s->nr_redundant == old_s->nr_redundant &&
+                   !memcmp(old_s->ptrs, new_s->ptrs,
+                           new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
+                       return 0;
+
+               BUG_ON(new_s && old_s &&
+                      (new_s->nr_blocks        != old_s->nr_blocks ||
+                       new_s->nr_redundant     != old_s->nr_redundant));
+
+               if (new_s) {
+                       s64 sectors = le16_to_cpu(new_s->sectors);
+
+                       struct bch_replicas_padded r;
+                       bch2_bkey_to_replicas(&r.e, new);
+                       int ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
+                       if (ret)
+                               return ret;
+               }
+
+               if (old_s) {
+                       s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
+
+                       struct bch_replicas_padded r;
+                       bch2_bkey_to_replicas(&r.e, old);
+                       int ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
+                       if (ret)
+                               return ret;
+               }
+
+               unsigned nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks;
+               for (unsigned i = 0; i < nr_blocks; i++) {
+                       if (new_s && old_s &&
+                           !memcmp(&new_s->ptrs[i],
+                                   &old_s->ptrs[i],
+                                   sizeof(new_s->ptrs[i])))
+                               continue;
+
+                       if (new_s) {
+                               int ret = bch2_trans_mark_stripe_bucket(trans,
+                                               bkey_s_c_to_stripe(new), i, false);
+                               if (ret)
+                                       return ret;
+                       }
+
+                       if (old_s) {
+                               int ret = bch2_trans_mark_stripe_bucket(trans,
+                                               bkey_s_c_to_stripe(old), i, true);
+                               if (ret)
+                                       return ret;
+                       }
+               }
+       }
+
+       if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) {
+               struct stripe *m = genradix_ptr(&c->stripes, idx);
+
+               if (!m) {
+                       struct printbuf buf1 = PRINTBUF;
+                       struct printbuf buf2 = PRINTBUF;
+
+                       bch2_bkey_val_to_text(&buf1, c, old);
+                       bch2_bkey_val_to_text(&buf2, c, new);
+                       bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
+                                           "old %s\n"
+                                           "new %s", idx, buf1.buf, buf2.buf);
+                       printbuf_exit(&buf2);
+                       printbuf_exit(&buf1);
+                       bch2_inconsistent_error(c);
+                       return -1;
+               }
+
+               if (!new_s) {
+                       bch2_stripes_heap_del(c, m, idx);
+
+                       memset(m, 0, sizeof(*m));
+               } else {
+                       m->sectors      = le16_to_cpu(new_s->sectors);
+                       m->algorithm    = new_s->algorithm;
+                       m->nr_blocks    = new_s->nr_blocks;
+                       m->nr_redundant = new_s->nr_redundant;
+                       m->blocks_nonempty = 0;
+
+                       for (unsigned i = 0; i < new_s->nr_blocks; i++)
+                               m->blocks_nonempty += !!stripe_blockcount_get(new_s, i);
+
+                       if (!old_s)
+                               bch2_stripes_heap_insert(c, m, idx);
+                       else
+                               bch2_stripes_heap_update(c, m, idx);
+               }
+       }
+
+       if (flags & BTREE_TRIGGER_GC) {
+               struct gc_stripe *m =
+                       genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
+
+               if (!m) {
+                       bch_err(c, "error allocating memory for gc_stripes, idx %llu",
+                               idx);
+                       return -BCH_ERR_ENOMEM_mark_stripe;
+               }
+               /*
+                * This will be wrong when we bring back runtime gc: we should
+                * be unmarking the old key and then marking the new key
+                */
+               m->alive        = true;
+               m->sectors      = le16_to_cpu(new_s->sectors);
+               m->nr_blocks    = new_s->nr_blocks;
+               m->nr_redundant = new_s->nr_redundant;
+
+               for (unsigned i = 0; i < new_s->nr_blocks; i++)
+                       m->ptrs[i] = new_s->ptrs[i];
+
+               bch2_bkey_to_replicas(&m->r.e, new);
+
+               /*
+                * gc recalculates this field from stripe ptr
+                * references:
+                */
+               memset(m->block_sectors, 0, sizeof(m->block_sectors));
+
+               for (unsigned i = 0; i < new_s->nr_blocks; i++) {
+                       int ret = mark_stripe_bucket(trans, new, i, flags);
+                       if (ret)
+                               return ret;
+               }
+
+               int ret = bch2_update_replicas(c, new, &m->r.e,
+                                     ((s64) m->sectors * m->nr_redundant),
+                                     0, true);
+               if (ret) {
+                       struct printbuf buf = PRINTBUF;
+
+                       bch2_bkey_val_to_text(&buf, c, new);
+                       bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
+                       printbuf_exit(&buf);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 /* returns blocknr in stripe that we matched: */
 static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
                                                struct bkey_s_c k, unsigned *block)
index 7d0237c9819f1a42561f5ec81512e1c4278d12fd..f4369b02e805f0a24572a8cf87d18867c3d3301a 100644 (file)
@@ -12,13 +12,14 @@ int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c,
                        enum bkey_invalid_flags, struct printbuf *);
 void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
                         struct bkey_s_c);
+int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned,
+                       struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_stripe ((struct bkey_ops) {      \
        .key_invalid    = bch2_stripe_invalid,          \
        .val_to_text    = bch2_stripe_to_text,          \
        .swab           = bch2_ptr_swab,                \
-       .trans_trigger  = bch2_trans_mark_stripe,       \
-       .atomic_trigger = bch2_mark_stripe,             \
+       .trigger        = bch2_trigger_stripe,          \
        .min_val_size   = 8,                            \
 })
 
index aa4f7f4925f6855c486221b0c26415a4bddc8aed..d32c8bebe46c32f7abc1a11ad49ee80752f2a623 100644 (file)
@@ -2,6 +2,7 @@
 #include "bcachefs.h"
 #include "error.h"
 #include "super.h"
+#include "thread_with_file.h"
 
 #define FSCK_ERR_RATELIMIT_NR  10
 
@@ -27,7 +28,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
 void bch2_topology_error(struct bch_fs *c)
 {
        set_bit(BCH_FS_topology_error, &c->flags);
-       if (test_bit(BCH_FS_fsck_done, &c->flags))
+       if (!test_bit(BCH_FS_fsck_running, &c->flags))
                bch2_inconsistent_error(c);
 }
 
@@ -69,40 +70,66 @@ enum ask_yn {
        YN_ALLYES,
 };
 
+static enum ask_yn parse_yn_response(char *buf)
+{
+       buf = strim(buf);
+
+       if (strlen(buf) == 1)
+               switch (buf[0]) {
+               case 'n':
+                       return YN_NO;
+               case 'y':
+                       return YN_YES;
+               case 'N':
+                       return YN_ALLNO;
+               case 'Y':
+                       return YN_ALLYES;
+               }
+       return -1;
+}
+
 #ifdef __KERNEL__
-#define bch2_fsck_ask_yn()     YN_NO
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
+{
+       struct stdio_redirect *stdio = c->stdio;
+
+       if (c->stdio_filter && c->stdio_filter != current)
+               stdio = NULL;
+
+       if (!stdio)
+               return YN_NO;
+
+       char buf[100];
+       int ret;
+
+       do {
+               bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
+
+               int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
+               if (r < 0)
+                       return YN_NO;
+               buf[r] = '\0';
+       } while ((ret = parse_yn_response(buf)) < 0);
+
+       return ret;
+}
 #else
 
 #include "tools-util.h"
 
-enum ask_yn bch2_fsck_ask_yn(void)
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
 {
        char *buf = NULL;
        size_t buflen = 0;
-       bool ret;
+       int ret;
 
-       while (true) {
+       do {
                fputs(" (y,n, or Y,N for all errors of this type) ", stdout);
                fflush(stdout);
 
                if (getline(&buf, &buflen, stdin) < 0)
                        die("error reading from standard input");
-
-               strim(buf);
-               if (strlen(buf) != 1)
-                       continue;
-
-               switch (buf[0]) {
-               case 'n':
-                       return YN_NO;
-               case 'y':
-                       return YN_YES;
-               case 'N':
-                       return YN_ALLNO;
-               case 'Y':
-                       return YN_ALLYES;
-               }
-       }
+       } while ((ret = parse_yn_response(buf)) < 0);
 
        free(buf);
        return ret;
@@ -114,7 +141,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
 {
        struct fsck_err_state *s;
 
-       if (test_bit(BCH_FS_fsck_done, &c->flags))
+       if (!test_bit(BCH_FS_fsck_running, &c->flags))
                return NULL;
 
        list_for_each_entry(s, &c->fsck_error_msgs, list)
@@ -152,7 +179,8 @@ int bch2_fsck_err(struct bch_fs *c,
        struct printbuf buf = PRINTBUF, *out = &buf;
        int ret = -BCH_ERR_fsck_ignore;
 
-       if (test_bit(err, c->sb.errors_silent))
+       if ((flags & FSCK_CAN_FIX) &&
+           test_bit(err, c->sb.errors_silent))
                return -BCH_ERR_fsck_fix;
 
        bch2_sb_error_count(c, err);
@@ -196,7 +224,7 @@ int bch2_fsck_err(struct bch_fs *c,
                prt_printf(out, bch2_log_msg(c, ""));
 #endif
 
-       if (test_bit(BCH_FS_fsck_done, &c->flags)) {
+       if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
                if (c->opts.errors != BCH_ON_ERROR_continue ||
                    !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
                        prt_str(out, ", shutting down");
@@ -221,10 +249,13 @@ int bch2_fsck_err(struct bch_fs *c,
                        int ask;
 
                        prt_str(out, ": fix?");
-                       bch2_print_string_as_lines(KERN_ERR, out->buf);
+                       if (bch2_fs_stdio_redirect(c))
+                               bch2_print(c, "%s", out->buf);
+                       else
+                               bch2_print_string_as_lines(KERN_ERR, out->buf);
                        print = false;
 
-                       ask = bch2_fsck_ask_yn();
+                       ask = bch2_fsck_ask_yn(c);
 
                        if (ask >= YN_ALLNO && s)
                                s->fix = ask == YN_ALLNO
@@ -253,10 +284,14 @@ int bch2_fsck_err(struct bch_fs *c,
             !(flags & FSCK_CAN_IGNORE)))
                ret = -BCH_ERR_fsck_errors_not_fixed;
 
-       if (print)
-               bch2_print_string_as_lines(KERN_ERR, out->buf);
+       if (print) {
+               if (bch2_fs_stdio_redirect(c))
+                       bch2_print(c, "%s\n", out->buf);
+               else
+                       bch2_print_string_as_lines(KERN_ERR, out->buf);
+       }
 
-       if (!test_bit(BCH_FS_fsck_done, &c->flags) &&
+       if (test_bit(BCH_FS_fsck_running, &c->flags) &&
            (ret != -BCH_ERR_fsck_fix &&
             ret != -BCH_ERR_fsck_ignore))
                bch_err(c, "Unable to continue, halting");
index 77ae4476578b1a7721cceee22be4ded1b1db90a1..a855c94d43ddb4f770f69807401f6d9dd5f66cbf 100644 (file)
@@ -415,8 +415,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
        .key_invalid    = bch2_btree_ptr_invalid,               \
        .val_to_text    = bch2_btree_ptr_to_text,               \
        .swab           = bch2_ptr_swab,                        \
-       .trans_trigger  = bch2_trans_mark_extent,               \
-       .atomic_trigger = bch2_mark_extent,                     \
+       .trigger        = bch2_trigger_extent,                  \
 })
 
 #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) {                \
@@ -424,8 +423,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
        .val_to_text    = bch2_btree_ptr_v2_to_text,            \
        .swab           = bch2_ptr_swab,                        \
        .compat         = bch2_btree_ptr_v2_compat,             \
-       .trans_trigger  = bch2_trans_mark_extent,               \
-       .atomic_trigger = bch2_mark_extent,                     \
+       .trigger        = bch2_trigger_extent,                  \
        .min_val_size   = 40,                                   \
 })
 
@@ -439,8 +437,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
        .swab           = bch2_ptr_swab,                        \
        .key_normalize  = bch2_extent_normalize,                \
        .key_merge      = bch2_extent_merge,                    \
-       .trans_trigger  = bch2_trans_mark_extent,               \
-       .atomic_trigger = bch2_mark_extent,                     \
+       .trigger        = bch2_trigger_extent,                  \
 })
 
 /* KEY_TYPE_reservation: */
@@ -454,8 +451,7 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
        .key_invalid    = bch2_reservation_invalid,             \
        .val_to_text    = bch2_reservation_to_text,             \
        .key_merge      = bch2_reservation_merge,               \
-       .trans_trigger  = bch2_trans_mark_reservation,          \
-       .atomic_trigger = bch2_mark_reservation,                \
+       .trigger        = bch2_trigger_reservation,             \
        .min_val_size   = 8,                                    \
 })
 
index 4496cf91a4c17bcde4e4a934eb0475007ff1311c..1c1ea0f0c692a6fdd4c262ef184bbcdda32d154f 100644 (file)
@@ -166,10 +166,8 @@ int bch2_create_trans(struct btree_trans *trans,
                if (ret)
                        goto err;
 
-               if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
-                       new_inode->bi_dir               = dir_u->bi_inum;
-                       new_inode->bi_dir_offset        = dir_offset;
-               }
+               new_inode->bi_dir               = dir_u->bi_inum;
+               new_inode->bi_dir_offset        = dir_offset;
        }
 
        inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
@@ -228,10 +226,8 @@ int bch2_link_trans(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
-               inode_u->bi_dir         = dir.inum;
-               inode_u->bi_dir_offset  = dir_offset;
-       }
+       inode_u->bi_dir         = dir.inum;
+       inode_u->bi_dir_offset  = dir_offset;
 
        ret =   bch2_inode_write(trans, &dir_iter, dir_u) ?:
                bch2_inode_write(trans, &inode_iter, inode_u);
@@ -414,21 +410,19 @@ int bch2_rename_trans(struct btree_trans *trans,
                        goto err;
        }
 
-       if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
-               src_inode_u->bi_dir             = dst_dir_u->bi_inum;
-               src_inode_u->bi_dir_offset      = dst_offset;
+       src_inode_u->bi_dir             = dst_dir_u->bi_inum;
+       src_inode_u->bi_dir_offset      = dst_offset;
 
-               if (mode == BCH_RENAME_EXCHANGE) {
-                       dst_inode_u->bi_dir             = src_dir_u->bi_inum;
-                       dst_inode_u->bi_dir_offset      = src_offset;
-               }
+       if (mode == BCH_RENAME_EXCHANGE) {
+               dst_inode_u->bi_dir             = src_dir_u->bi_inum;
+               dst_inode_u->bi_dir_offset      = src_offset;
+       }
 
-               if (mode == BCH_RENAME_OVERWRITE &&
-                   dst_inode_u->bi_dir         == dst_dir_u->bi_inum &&
-                   dst_inode_u->bi_dir_offset  == src_offset) {
-                       dst_inode_u->bi_dir             = 0;
-                       dst_inode_u->bi_dir_offset      = 0;
-               }
+       if (mode == BCH_RENAME_OVERWRITE &&
+           dst_inode_u->bi_dir         == dst_dir_u->bi_inum &&
+           dst_inode_u->bi_dir_offset  == src_offset) {
+               dst_inode_u->bi_dir             = 0;
+               dst_inode_u->bi_dir_offset      = 0;
        }
 
        if (mode == BCH_RENAME_OVERWRITE) {
index 98bd5babab193bec842dce20b0783e6c958ac5bf..9fea89762df01dd64b6d8e6463b237d34a106c3a 100644 (file)
@@ -194,6 +194,16 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        int ret;
 
+       /*
+        * check if unlinked, disable/defer until relink
+        */
+
+       /*
+        * also: add a mode where a file is a tmpfile until fully,
+        * asynchronously written
+        */
+
+
        ret = file_write_and_wait_range(file, start, end);
        if (ret)
                goto out;
index de1617ec1b59cb86e5202521b8d169fe8932640a..4f0ecd60567570b7364cef517225ea0e3dfa5575 100644 (file)
@@ -870,8 +870,7 @@ static int check_inode(struct btree_trans *trans,
                return 0;
        }
 
-       if (u.bi_flags & BCH_INODE_unlinked &&
-           c->sb.version >= bcachefs_metadata_version_deleted_inodes) {
+       if (u.bi_flags & BCH_INODE_unlinked) {
                ret = check_inode_deleted_list(trans, k.k->p);
                if (ret < 0)
                        return ret;
@@ -1594,13 +1593,12 @@ static int check_dirent_target(struct btree_trans *trans,
                d = dirent_i_to_s_c(n);
        }
 
-       if (d.v->d_type == DT_SUBVOL &&
-           target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
-           (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
-            fsck_err(c, dirent_d_parent_subvol_wrong,
-                     "dirent has wrong d_parent_subvol field: got %u, should be %u",
-                     le32_to_cpu(d.v->d_parent_subvol),
-                     target->bi_parent_subvol))) {
+       if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
+                       target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
+                       c, dirent_d_parent_subvol_wrong,
+                       "dirent has wrong d_parent_subvol field: got %u, should be %u",
+                       le32_to_cpu(d.v->d_parent_subvol),
+                       target->bi_parent_subvol)) {
                n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
                ret = PTR_ERR_OR_ZERO(n);
                if (ret)
index c39844b8e596b41bd1a106718b3e756db0359bda..37dce96f48ac42d28b98d99e75a77b049e04de8f 100644 (file)
@@ -561,64 +561,46 @@ static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
        return bkey_inode_flags(k) & BCH_INODE_unlinked;
 }
 
-int bch2_trans_mark_inode(struct btree_trans *trans,
-                         enum btree_id btree_id, unsigned level,
-                         struct bkey_s_c old,
-                         struct bkey_i *new,
-                         unsigned flags)
+int bch2_trigger_inode(struct btree_trans *trans,
+                      enum btree_id btree_id, unsigned level,
+                      struct bkey_s_c old,
+                      struct bkey_s new,
+                      unsigned flags)
 {
-       int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
-       bool old_deleted = bkey_is_deleted_inode(old);
-       bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new));
+       s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
 
-       if (nr) {
-               int ret = bch2_replicas_deltas_realloc(trans, 0);
-               struct replicas_delta_list *d = trans->fs_usage_deltas;
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               if (nr) {
+                       int ret = bch2_replicas_deltas_realloc(trans, 0);
+                       if (ret)
+                               return ret;
 
-               if (ret)
-                       return ret;
-
-               d->nr_inodes += nr;
-       }
+                       trans->fs_usage_deltas->nr_inodes += nr;
+               }
 
-       if (old_deleted != new_deleted) {
-               int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted);
-               if (ret)
-                       return ret;
+               bool old_deleted = bkey_is_deleted_inode(old);
+               bool new_deleted = bkey_is_deleted_inode(new.s_c);
+               if (old_deleted != new_deleted) {
+                       int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted);
+                       if (ret)
+                               return ret;
+               }
        }
 
-       return 0;
-}
+       if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+               BUG_ON(!trans->journal_res.seq);
 
-int bch2_mark_inode(struct btree_trans *trans,
-                   enum btree_id btree_id, unsigned level,
-                   struct bkey_s_c old, struct bkey_s_c new,
-                   unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       struct bch_fs_usage *fs_usage;
-       u64 journal_seq = trans->journal_res.seq;
-
-       if (flags & BTREE_TRIGGER_INSERT) {
-               struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
-
-               BUG_ON(!journal_seq);
-               BUG_ON(new.k->type != KEY_TYPE_inode_v3);
-
-               v->bi_journal_seq = cpu_to_le64(journal_seq);
+               bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
        }
 
        if (flags & BTREE_TRIGGER_GC) {
-               percpu_down_read(&c->mark_lock);
-               preempt_disable();
-
-               fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC);
-               fs_usage->nr_inodes += bkey_is_inode(new.k);
-               fs_usage->nr_inodes -= bkey_is_inode(old.k);
+               struct bch_fs *c = trans->c;
 
-               preempt_enable();
+               percpu_down_read(&c->mark_lock);
+               this_cpu_add(c->usage_gc->nr_inodes, nr);
                percpu_up_read(&c->mark_lock);
        }
+
        return 0;
 }
 
index 88818a332b1e5fcaa5fd9b350d958ef582c05161..b63f312581cfa5ea9975fae6fdcd2d1518d13d54 100644 (file)
@@ -17,32 +17,27 @@ int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c,
                          enum bkey_invalid_flags, struct printbuf *);
 void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
-int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned,
-                         struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned,
-                   struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned,
+                         struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_inode ((struct bkey_ops) {       \
        .key_invalid    = bch2_inode_invalid,           \
        .val_to_text    = bch2_inode_to_text,           \
-       .trans_trigger  = bch2_trans_mark_inode,        \
-       .atomic_trigger = bch2_mark_inode,              \
+       .trigger        = bch2_trigger_inode,           \
        .min_val_size   = 16,                           \
 })
 
 #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) {    \
        .key_invalid    = bch2_inode_v2_invalid,        \
        .val_to_text    = bch2_inode_to_text,           \
-       .trans_trigger  = bch2_trans_mark_inode,        \
-       .atomic_trigger = bch2_mark_inode,              \
+       .trigger        = bch2_trigger_inode,           \
        .min_val_size   = 32,                           \
 })
 
 #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) {    \
        .key_invalid    = bch2_inode_v3_invalid,        \
        .val_to_text    = bch2_inode_to_text,           \
-       .trans_trigger  = bch2_trans_mark_inode,        \
-       .atomic_trigger = bch2_mark_inode,              \
+       .trigger        = bch2_trigger_inode,           \
        .min_val_size   = 48,                           \
 })
 
index 42cad83efb48337a2d4462ca6fd24d7946671b6c..93a24fef42148488cdddb391cd291dd0e0168063 100644 (file)
@@ -414,11 +414,11 @@ enum fsck_err_opts {
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Allocate the buckets_nouse bitmap")            \
-       x(log_output,                   u64,                            \
+       x(stdio,                        u64,                            \
          0,                                                            \
          OPT_UINT(0, S64_MAX),                                         \
          BCH2_NO_SB_OPT,               false,                          \
-         NULL,         "Pointer to a struct log_output")               \
+         NULL,         "Pointer to a struct stdio_redirect")           \
        x(project,                      u8,                             \
          OPT_INODE,                                                    \
          OPT_BOOL(),                                                   \
@@ -458,7 +458,13 @@ enum fsck_err_opts {
          OPT_UINT(0, BCH_REPLICAS_MAX),                                \
          BCH2_NO_SB_OPT,               1,                              \
          "n",          "Data written to this device will be considered\n"\
-                       "to have already been replicated n times")
+                       "to have already been replicated n times")      \
+       x(btree_node_prefetch,          u8,                             \
+         OPT_FS|OPT_MOUNT|OPT_RUNTIME,                                 \
+         OPT_BOOL(),                                                   \
+         BCH2_NO_SB_OPT,               true,                           \
+         NULL,         "BTREE_ITER_PREFETCH casuse btree nodes to be\n"\
+         " prefetched sequentially")
 
 struct bch_opts {
 #define x(_name, _bits, ...)   unsigned _name##_defined:1;
index 187b0377bd40522ebb35235c64723f13bfaa5742..accf246c32330919869bccff32a1ecfcc6d97856 100644 (file)
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: LGPL-2.1+
 /* Copyright (C) 2022 Kent Overstreet */
 
-#include <linux/err.h>
 #include <linux/bitmap.h>
+#include <linux/err.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
index 3e49209db2dec37f9cd97aa6fdfd0fda61f479ca..e1f0da6a717e021b894ba39932275e8b2ed0b323 100644 (file)
@@ -575,7 +575,7 @@ u64 bch2_recovery_passes_from_stable(u64 v)
        return ret;
 }
 
-static u64 check_version_upgrade(struct bch_fs *c)
+static bool check_version_upgrade(struct bch_fs *c)
 {
        unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
        unsigned latest_version = bcachefs_metadata_version_current;
@@ -624,10 +624,15 @@ static u64 check_version_upgrade(struct bch_fs *c)
                bch2_version_to_text(&buf, new_version);
                prt_newline(&buf);
 
-               u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
-               if (recovery_passes) {
+               struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+               __le64 passes = ext->recovery_passes_required[0];
+               bch2_sb_set_upgrade(c, old_version, new_version);
+               passes = ext->recovery_passes_required[0] & ~passes;
+
+               if (passes) {
                        prt_str(&buf, "  running recovery passes: ");
-                       prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
+                       prt_bitflags(&buf, bch2_recovery_passes,
+                                    bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
                }
 
                bch_info(c, "%s", buf.buf);
@@ -635,10 +640,6 @@ static u64 check_version_upgrade(struct bch_fs *c)
                bch2_sb_upgrade(c, new_version);
 
                printbuf_exit(&buf);
-
-               struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
-               ext->recovery_passes_required[0] |=
-                       cpu_to_le64(bch2_recovery_passes_to_stable(recovery_passes));
                return true;
        }
 
@@ -795,23 +796,17 @@ int bch2_fs_recovery(struct bch_fs *c)
                        prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
                        bch_info(c, "%s", buf.buf);
                        printbuf_exit(&buf);
-                       c->recovery_passes_explicit |= sb_passes;
                }
 
-               if (bcachefs_metadata_version_current < c->sb.version) {
+               if (bch2_check_version_downgrade(c)) {
                        struct printbuf buf = PRINTBUF;
 
                        prt_str(&buf, "Version downgrade required:\n");
 
-                       u64 passes = ext->recovery_passes_required[0];
-                       ret = bch2_sb_set_downgrade(c,
+                       __le64 passes = ext->recovery_passes_required[0];
+                       bch2_sb_set_downgrade(c,
                                        BCH_VERSION_MINOR(bcachefs_metadata_version_current),
                                        BCH_VERSION_MINOR(c->sb.version));
-                       if (ret) {
-                               mutex_unlock(&c->sb_lock);
-                               goto err;
-                       }
-
                        passes = ext->recovery_passes_required[0] & ~passes;
                        if (passes) {
                                prt_str(&buf, "  running recovery passes: ");
@@ -821,8 +816,6 @@ int bch2_fs_recovery(struct bch_fs *c)
 
                        bch_info(c, "%s", buf.buf);
                        printbuf_exit(&buf);
-
-                       bch2_sb_maybe_downgrade(c);
                        write_sb = true;
                }
 
@@ -839,6 +832,9 @@ int bch2_fs_recovery(struct bch_fs *c)
        if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
                c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
 
+       if (c->opts.fsck)
+               set_bit(BCH_FS_fsck_running, &c->flags);
+
        ret = bch2_blacklist_table_initialize(c);
        if (ret) {
                bch_err(c, "error initializing blacklist table");
@@ -979,6 +975,8 @@ use_clean:
        if (ret)
                goto err;
 
+       clear_bit(BCH_FS_fsck_running, &c->flags);
+
        /* If we fixed errors, verify that fs is actually clean now: */
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
            test_bit(BCH_FS_errors_fixed, &c->flags) &&
@@ -1073,7 +1071,6 @@ use_clean:
 
        ret = 0;
 out:
-       set_bit(BCH_FS_fsck_done, &c->flags);
        bch2_flush_fsck_errs(c);
 
        if (!c->opts.keep_journal &&
@@ -1109,7 +1106,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
 
-       bch2_sb_maybe_downgrade(c);
+       bch2_check_version_downgrade(c);
 
        if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
                bch2_sb_upgrade(c, bcachefs_metadata_version_current);
@@ -1120,7 +1117,6 @@ int bch2_fs_initialize(struct bch_fs *c)
 
        c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
        set_bit(BCH_FS_may_go_rw, &c->flags);
-       set_bit(BCH_FS_fsck_done, &c->flags);
 
        for (unsigned i = 0; i < BTREE_ID_NR; i++)
                bch2_btree_root_alloc(c, i);
index 9f9c8a244c80aac3cd989706ae09d10f2c2c8737..b24b71bc4e60956917a7a90008357899e4325469 100644 (file)
@@ -34,15 +34,14 @@ int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k,
                           struct printbuf *err)
 {
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+       int ret = 0;
 
-       if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix &&
-           le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) {
-               prt_printf(err, "idx < front_pad (%llu < %u)",
-                      le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad));
-               return -EINVAL;
-       }
-
-       return 0;
+       bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad),
+                        c, err, reflink_p_front_pad_bad,
+                        "idx < front_pad (%llu < %u)",
+                        le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad));
+fsck_err:
+       return ret;
 }
 
 void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
@@ -74,7 +73,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
        return true;
 }
 
-static int trans_mark_reflink_p_segment(struct btree_trans *trans,
+static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
                        struct bkey_s_c_reflink_p p,
                        u64 *idx, unsigned flags)
 {
@@ -93,7 +92,7 @@ static int trans_mark_reflink_p_segment(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       refcount = bkey_refcount(k);
+       refcount = bkey_refcount(bkey_i_to_s(k));
        if (!refcount) {
                bch2_bkey_val_to_text(&buf, c, p.s_c);
                bch2_trans_inconsistent(trans,
@@ -141,47 +140,16 @@ err:
        return ret;
 }
 
-static int __trans_mark_reflink_p(struct btree_trans *trans,
-                               enum btree_id btree_id, unsigned level,
-                               struct bkey_s_c k, unsigned flags)
-{
-       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-       u64 idx, end_idx;
-       int ret = 0;
-
-       idx     = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
-       end_idx = le64_to_cpu(p.v->idx) + p.k->size +
-               le32_to_cpu(p.v->back_pad);
-
-       while (idx < end_idx && !ret)
-               ret = trans_mark_reflink_p_segment(trans, p, &idx, flags);
-       return ret;
-}
-
-int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-                             enum btree_id btree_id, unsigned level,
-                             struct bkey_s_c old,
-                             struct bkey_i *new,
-                             unsigned flags)
-{
-       if (flags & BTREE_TRIGGER_INSERT) {
-               struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v;
-
-               v->front_pad = v->back_pad = 0;
-       }
-
-       return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags);
-}
-
-static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
-                                struct bkey_s_c_reflink_p p,
-                                u64 start, u64 end,
-                                u64 *idx, unsigned flags, size_t r_idx)
+static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans,
+                               struct bkey_s_c_reflink_p p,
+                               u64 *idx, unsigned flags, size_t r_idx)
 {
        struct bch_fs *c = trans->c;
        struct reflink_gc *r;
        int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
-       u64 next_idx = end;
+       u64 start = le64_to_cpu(p.v->idx);
+       u64 end = le64_to_cpu(p.v->idx) + p.k->size;
+       u64 next_idx = end + le32_to_cpu(p.v->back_pad);
        s64 ret = 0;
        struct printbuf buf = PRINTBUF;
 
@@ -205,20 +173,24 @@ not_found:
                     "  missing range %llu-%llu",
                     (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
                     *idx, next_idx)) {
-               struct bkey_i_error *new;
-
-               new = bch2_trans_kmalloc(trans, sizeof(*new));
-               ret = PTR_ERR_OR_ZERO(new);
+               struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c);
+               ret = PTR_ERR_OR_ZERO(update);
                if (ret)
                        goto err;
 
-               bkey_init(&new->k);
-               new->k.type     = KEY_TYPE_error;
-               new->k.p                = bkey_start_pos(p.k);
-               new->k.p.offset += *idx - start;
-               bch2_key_resize(&new->k, next_idx - *idx);
-               ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i,
-                                         BTREE_TRIGGER_NORUN);
+               if (next_idx <= start) {
+                       bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx);
+               } else if (*idx >= end) {
+                       bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end);
+               } else {
+                       bkey_error_init(update);
+                       update->k.p             = p.k->p;
+                       update->k.p.offset      = next_idx;
+                       update->k.size          = next_idx - *idx;
+                       set_bkey_val_u64s(&update->k, 0);
+               }
+
+               ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_NORUN);
        }
 
        *idx = next_idx;
@@ -228,50 +200,55 @@ fsck_err:
        return ret;
 }
 
-static int __mark_reflink_p(struct btree_trans *trans,
+static int __trigger_reflink_p(struct btree_trans *trans,
                            enum btree_id btree_id, unsigned level,
                            struct bkey_s_c k, unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-       struct reflink_gc *ref;
-       size_t l, r, m;
-       u64 idx = le64_to_cpu(p.v->idx), start = idx;
-       u64 end = le64_to_cpu(p.v->idx) + p.k->size;
        int ret = 0;
 
-       BUG_ON(!(flags & BTREE_TRIGGER_GC));
+       u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+       u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad);
 
-       if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) {
-               idx -= le32_to_cpu(p.v->front_pad);
-               end += le32_to_cpu(p.v->back_pad);
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               while (idx < end && !ret)
+                       ret = trans_trigger_reflink_p_segment(trans, p, &idx, flags);
        }
 
-       l = 0;
-       r = c->reflink_gc_nr;
-       while (l < r) {
-               m = l + (r - l) / 2;
+       if (flags & BTREE_TRIGGER_GC) {
+               size_t l = 0, r = c->reflink_gc_nr;
 
-               ref = genradix_ptr(&c->reflink_gc_table, m);
-               if (ref->offset <= idx)
-                       l = m + 1;
-               else
-                       r = m;
-       }
+               while (l < r) {
+                       size_t m = l + (r - l) / 2;
+                       struct reflink_gc *ref = genradix_ptr(&c->reflink_gc_table, m);
+                       if (ref->offset <= idx)
+                               l = m + 1;
+                       else
+                               r = m;
+               }
 
-       while (idx < end && !ret)
-               ret = __bch2_mark_reflink_p(trans, p, start, end,
-                                           &idx, flags, l++);
+               while (idx < end && !ret)
+                       ret = gc_trigger_reflink_p_segment(trans, p, &idx, flags, l++);
+       }
 
        return ret;
 }
 
-int bch2_mark_reflink_p(struct btree_trans *trans,
-                       enum btree_id btree_id, unsigned level,
-                       struct bkey_s_c old, struct bkey_s_c new,
-                       unsigned flags)
+int bch2_trigger_reflink_p(struct btree_trans *trans,
+                          enum btree_id btree_id, unsigned level,
+                          struct bkey_s_c old,
+                          struct bkey_s new,
+                          unsigned flags)
 {
-       return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags);
+       if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
+           (flags & BTREE_TRIGGER_INSERT)) {
+               struct bch_reflink_p *v = bkey_s_to_reflink_p(new).v;
+
+               v->front_pad = v->back_pad = 0;
+       }
+
+       return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags);
 }
 
 /* indirect extents */
@@ -305,32 +282,34 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
 }
 #endif
 
-static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags)
+static inline void check_indirect_extent_deleting(struct bkey_new, unsigned *flags)
 {
        if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) {
-               new->k.type = KEY_TYPE_deleted;
-               new->k.size = 0;
-               set_bkey_val_u64s(&new->k, 0);
+               new.k->type = KEY_TYPE_deleted;
+               new.k->size = 0;
+               set_bkey_val_u64s(new.k, 0);
                *flags &= ~BTREE_TRIGGER_INSERT;
        }
 }
 
 int bch2_trans_mark_reflink_v(struct btree_trans *trans,
                              enum btree_id btree_id, unsigned level,
-                             struct bkey_s_c old, struct bkey_i *new,
+                             struct bkey_s_c old, struct bkey_new,
                              unsigned flags)
 {
-       check_indirect_extent_deleting(new, &flags);
+       if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
+           (flags & BTREE_TRIGGER_INSERT))
+               check_indirect_extent_deleting(new, &flags);
 
        if (old.k->type == KEY_TYPE_reflink_v &&
-           new->k.type == KEY_TYPE_reflink_v &&
-           old.k->u64s == new->k.u64s &&
+           new.k->type == KEY_TYPE_reflink_v &&
+           old.k->u64s == new.k->u64s &&
            !memcmp(bkey_s_c_to_reflink_v(old).v->start,
-                   bkey_i_to_reflink_v(new)->v.start,
-                   bkey_val_bytes(&new->k) - 8))
+                   bkey_s_to_reflink_v(new).v->start,
+                   bkey_val_bytes(new.k) - 8))
                return 0;
 
-       return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
+       return bch2_trigger_extent(trans, btree_id, level, old, new, flags);
 }
 
 /* indirect inline data */
@@ -355,7 +334,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out,
 
 int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
                              enum btree_id btree_id, unsigned level,
-                             struct bkey_s_c old, struct bkey_i *new,
+                             struct bkey_s_c old, struct bkey_new,
                              unsigned flags)
 {
        check_indirect_extent_deleting(new, &flags);
@@ -398,7 +377,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
 
        set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
 
-       refcount        = bkey_refcount(r_v);
+       refcount        = bkey_refcount(bkey_i_to_s(r_v));
        *refcount       = 0;
        memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
 
index 6cc9c4a77265988081c21aa006ee63a64bfff521..8ee778ec0022a327145eb91ebefbcb38cc1240bf 100644 (file)
@@ -9,17 +9,14 @@ int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c,
 void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
 bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
-                             struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
-                       struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned,
+                          struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_reflink_p ((struct bkey_ops) {           \
        .key_invalid    = bch2_reflink_p_invalid,               \
        .val_to_text    = bch2_reflink_p_to_text,               \
        .key_merge      = bch2_reflink_p_merge,                 \
-       .trans_trigger  = bch2_trans_mark_reflink_p,            \
-       .atomic_trigger = bch2_mark_reflink_p,                  \
+       .trigger        = bch2_trigger_reflink_p,               \
        .min_val_size   = 16,                                   \
 })
 
@@ -28,14 +25,13 @@ int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c,
 void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
 int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
-                             struct bkey_s_c, struct bkey_i *, unsigned);
+                             struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_reflink_v ((struct bkey_ops) {           \
        .key_invalid    = bch2_reflink_v_invalid,               \
        .val_to_text    = bch2_reflink_v_to_text,               \
        .swab           = bch2_ptr_swab,                        \
-       .trans_trigger  = bch2_trans_mark_reflink_v,            \
-       .atomic_trigger = bch2_mark_extent,                     \
+       .trigger        = bch2_trans_mark_reflink_v,            \
        .min_val_size   = 8,                                    \
 })
 
@@ -45,13 +41,13 @@ void bch2_indirect_inline_data_to_text(struct printbuf *,
                                struct bch_fs *, struct bkey_s_c);
 int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
                                         enum btree_id, unsigned,
-                             struct bkey_s_c, struct bkey_i *,
+                             struct bkey_s_c, struct bkey_s,
                              unsigned);
 
 #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) {        \
        .key_invalid    = bch2_indirect_inline_data_invalid,    \
        .val_to_text    = bch2_indirect_inline_data_to_text,    \
-       .trans_trigger  = bch2_trans_mark_indirect_inline_data, \
+       .trigger        = bch2_trans_mark_indirect_inline_data, \
        .min_val_size   = 8,                                    \
 })
 
@@ -67,13 +63,13 @@ static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
        }
 }
 
-static inline __le64 *bkey_refcount(struct bkey_i *k)
+static inline __le64 *bkey_refcount(struct bkey_k)
 {
-       switch (k->k.type) {
+       switch (k.k->type) {
        case KEY_TYPE_reflink_v:
-               return &bkey_i_to_reflink_v(k)->v.refcount;
+               return &bkey_s_to_reflink_v(k).v->refcount;
        case KEY_TYPE_indirect_inline_data:
-               return &bkey_i_to_indirect_inline_data(k)->v.refcount;
+               return &bkey_s_to_indirect_inline_data(k).v->refcount;
        default:
                return NULL;
        }
index d2a92fb0d6fa068ff28b055a1282598e8999dbcb..441dcb1bf160e917d531d1a5ea955cf0238f0844 100644 (file)
 #include "sb-errors.h"
 #include "super-io.h"
 
+#define RECOVERY_PASS_ALL_FSCK         BIT_ULL(63)
+
 /*
- * Downgrade table:
- * When dowgrading past certain versions, we need to run certain recovery passes
- * and fix certain errors:
+ * Upgrade, downgrade tables - run certain recovery passes, fix certain errors
  *
  * x(version, recovery_passes, errors...)
  */
-
-#define DOWNGRADE_TABLE()                                      \
-       x(disk_accounting_v2,                                   \
-         BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info),          \
-         BCH_FSCK_ERR_dev_usage_buckets_wrong)
-
-struct downgrade_entry {
+#define UPGRADE_TABLE()                                                \
+       x(backpointers,                                         \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(inode_v3,                                             \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(unwritten_extents,                                    \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(bucket_gens,                                          \
+         BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)|          \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(lru_v2,                                               \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(fragmentation_lru,                                    \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(no_bps_in_alloc_keys,                                 \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(snapshot_trees,                                       \
+         RECOVERY_PASS_ALL_FSCK)                               \
+       x(snapshot_skiplists,                                   \
+         BIT_ULL(BCH_RECOVERY_PASS_check_snapshots),           \
+         BCH_FSCK_ERR_snapshot_bad_depth,                      \
+         BCH_FSCK_ERR_snapshot_bad_skiplist)                   \
+       x(deleted_inodes,                                       \
+         BIT_ULL(BCH_RECOVERY_PASS_check_inodes),              \
+         BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list)      \
+       x(rebalance_work,                                       \
+         BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
+
+#define DOWNGRADE_TABLE()
+
+struct upgrade_downgrade_entry {
        u64             recovery_passes;
        u16             version;
        u16             nr_errors;
        const u16       *errors;
 };
 
-#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
+#define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ };
+UPGRADE_TABLE()
+#undef x
+
+static const struct upgrade_downgrade_entry upgrade_table[] = {
+#define x(ver, passes, ...) {                                  \
+       .recovery_passes        = passes,                       \
+       .version                = bcachefs_metadata_version_##ver,\
+       .nr_errors              = ARRAY_SIZE(upgrade_##ver##_errors),   \
+       .errors                 = upgrade_##ver##_errors,       \
+},
+UPGRADE_TABLE()
+#undef x
+};
+
+void bch2_sb_set_upgrade(struct bch_fs *c,
+                        unsigned old_version,
+                        unsigned new_version)
+{
+       lockdep_assert_held(&c->sb_lock);
+
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+       for (const struct upgrade_downgrade_entry *i = upgrade_table;
+            i < upgrade_table + ARRAY_SIZE(upgrade_table);
+            i++)
+               if (i->version > old_version && i->version <= new_version) {
+                       u64 passes = i->recovery_passes;
+
+                       if (passes & RECOVERY_PASS_ALL_FSCK)
+                               passes |= bch2_fsck_recovery_passes();
+                       passes &= ~RECOVERY_PASS_ALL_FSCK;
+
+                       ext->recovery_passes_required[0] |=
+                               cpu_to_le64(bch2_recovery_passes_to_stable(passes));
+
+                       for (const u16 *e = i->errors;
+                            e < i->errors + i->nr_errors;
+                            e++) {
+                               __set_bit(*e, c->sb.errors_silent);
+                               ext->errors_silent[*e / 64] |= cpu_to_le64(BIT_ULL(*e % 64));
+                       }
+               }
+}
+
+#define x(ver, passes, ...) static const u16 downgrade_ver_##errors[] = { __VA_ARGS__ };
 DOWNGRADE_TABLE()
 #undef x
 
-static const struct downgrade_entry downgrade_table[] = {
+static const struct upgrade_downgrade_entry downgrade_table[] = {
 #define x(ver, passes, ...) {                                  \
        .recovery_passes        = passes,                       \
        .version                = bcachefs_metadata_version_##ver,\
-       .nr_errors              = ARRAY_SIZE(ver_##errors),     \
-       .errors                 = ver_##errors,                 \
+       .nr_errors              = ARRAY_SIZE(downgrade_##ver##_errors), \
+       .errors                 = downgrade_##ver##_errors,     \
 },
 DOWNGRADE_TABLE()
 #undef x
@@ -59,12 +128,6 @@ downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
             (void *) &_i->errors[0] < vstruct_end(&(_d)->field);                       \
             _i = downgrade_entry_next_c(_i))
 
-static inline unsigned bch2_sb_field_downgrade_u64s(unsigned nr)
-{
-       return (sizeof(struct bch_sb_field_downgrade) +
-               sizeof(struct bch_sb_field_downgrade_entry) * nr) / sizeof(u64);
-}
-
 static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
                                      struct printbuf *err)
 {
@@ -127,7 +190,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
        darray_char table = {};
        int ret = 0;
 
-       for (const struct downgrade_entry *src = downgrade_table;
+       for (const struct upgrade_downgrade_entry *src = downgrade_table;
             src < downgrade_table + ARRAY_SIZE(downgrade_table);
             src++) {
                if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
@@ -171,11 +234,11 @@ out:
        return ret;
 }
 
-int bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
+void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
 {
        struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
        if (!d)
-               return 0;
+               return;
 
        struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
 
@@ -194,6 +257,4 @@ int bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_min
                        }
                }
        }
-
-       return 0;
 }
index 0703ad7e99e4d3321db9005df2b732c53fa259a9..57e6c916fc738b2605929eec5811844fd772f70d 100644 (file)
@@ -5,6 +5,7 @@
 extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
 
 int bch2_sb_downgrade_update(struct bch_fs *);
-int bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
+void bch2_sb_set_upgrade(struct bch_fs *, unsigned, unsigned);
+void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
 
 #endif /* _BCACHEFS_SB_DOWNGRADE_H */
index e7be1f9bdaabb39190e9d598683ae5fc171a14b6..c08aacdfd073c203e44a072363c94e89dd93eec8 100644 (file)
        x(dir_loop,                                             241)    \
        x(hash_table_key_duplicate,                             242)    \
        x(hash_table_key_wrong_offset,                          243)    \
-       x(unlinked_inode_not_on_deleted_list,                   244)
+       x(unlinked_inode_not_on_deleted_list,                   244)    \
+       x(reflink_p_front_pad_bad,                              245)
 
 enum bch_sb_error_id {
 #define x(t, n) BCH_FSCK_ERR_##t = n,
index 4c19a8096c1dfc4ced3b8ea2db21bedda3cd1352..a44a238bf8b5550023226844734424b1211c812a 100644 (file)
@@ -266,7 +266,7 @@ static void member_to_text(struct printbuf *out,
 
        prt_str(out, "Durability:");
        prt_tab(out);
-       prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m));
+       prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
        prt_newline(out);
 
        prt_printf(out, "Discard:");
index 96df4052ff7b93b8927daf7f8429b86fcf15a428..56af937523ff2a8deda0a5168f45a67533a57da5 100644 (file)
@@ -276,7 +276,7 @@ static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
        mutex_unlock(&c->snapshot_table_lock);
 }
 
-int bch2_mark_snapshot(struct btree_trans *trans,
+static int __bch2_mark_snapshot(struct btree_trans *trans,
                       enum btree_id btree, unsigned level,
                       struct bkey_s_c old, struct bkey_s_c new,
                       unsigned flags)
@@ -330,6 +330,14 @@ err:
        return ret;
 }
 
+int bch2_mark_snapshot(struct btree_trans *trans,
+                      enum btree_id btree, unsigned level,
+                      struct bkey_s_c old, struct bkey_s new,
+                      unsigned flags)
+{
+       return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags);
+}
+
 int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
                         struct bch_snapshot *s)
 {
@@ -806,11 +814,10 @@ static int check_snapshot(struct btree_trans *trans,
 
        real_depth = bch2_snapshot_depth(c, parent_id);
 
-       if (le32_to_cpu(s.depth) != real_depth &&
-           (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
-            fsck_err(c, snapshot_bad_depth,
-                     "snapshot with incorrect depth field, should be %u:\n  %s",
-                     real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
+       if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
+                       c, snapshot_bad_depth,
+                       "snapshot with incorrect depth field, should be %u:\n  %s",
+                       real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
                u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
                ret = PTR_ERR_OR_ZERO(u);
                if (ret)
@@ -824,11 +831,9 @@ static int check_snapshot(struct btree_trans *trans,
        if (ret < 0)
                goto err;
 
-       if (!ret &&
-           (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
-            fsck_err(c, snapshot_bad_skiplist,
-                     "snapshot with bad skiplist field:\n  %s",
-                     (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
+       if (fsck_err_on(!ret, c, snapshot_bad_skiplist,
+                       "snapshot with bad skiplist field:\n  %s",
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
                u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
                ret = PTR_ERR_OR_ZERO(u);
                if (ret)
@@ -1055,7 +1060,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
                bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
                SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
 
-               ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+               ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
                                         bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
                if (ret)
                        goto err;
@@ -1664,7 +1669,7 @@ int bch2_snapshots_read(struct bch_fs *c)
        int ret = bch2_trans_run(c,
                for_each_btree_key(trans, iter, BTREE_ID_snapshots,
                                   POS_MIN, 0, k,
-                       bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
+                       __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
                        bch2_snapshot_set_equiv(trans, k) ?:
                        bch2_check_snapshot_needs_deletion(trans, k)) ?:
                for_each_btree_key(trans, iter, BTREE_ID_snapshots,
index 94f35b2cfbb3427ca83ab6d5b423ab9b640bed99..7c66ffc06385ddea63685298f691660d906055d5 100644 (file)
@@ -22,12 +22,12 @@ void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c,
                          enum bkey_invalid_flags, struct printbuf *);
 int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
-                      struct bkey_s_c, struct bkey_s_c, unsigned);
+                      struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_snapshot ((struct bkey_ops) {            \
        .key_invalid    = bch2_snapshot_invalid,                \
        .val_to_text    = bch2_snapshot_to_text,                \
-       .atomic_trigger = bch2_mark_snapshot,                   \
+       .trigger        = bch2_mark_snapshot,                   \
        .min_val_size   = 24,                                   \
 })
 
index 7cbf496dcf99b046cc0e2add0548bdea389d6460..ea86921727b4e5abd34540c028979b76dda99434 100644 (file)
@@ -30,14 +30,12 @@ static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
 struct bch2_metadata_version {
        u16             version;
        const char      *name;
-       u64             recovery_passes;
 };
 
 static const struct bch2_metadata_version bch2_metadata_versions[] = {
-#define x(n, v, _recovery_passes) {            \
+#define x(n, v) {              \
        .version = v,                           \
        .name = #n,                             \
-       .recovery_passes = _recovery_passes,    \
 },
        BCH_METADATA_VERSIONS()
 #undef x
@@ -70,24 +68,6 @@ unsigned bch2_latest_compatible_version(unsigned v)
        return v;
 }
 
-u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
-                                unsigned old_version,
-                                unsigned new_version)
-{
-       u64 ret = 0;
-
-       for (const struct bch2_metadata_version *i = bch2_metadata_versions;
-            i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions);
-            i++)
-               if (i->version > old_version && i->version <= new_version) {
-                       if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK)
-                               ret |= bch2_fsck_recovery_passes();
-                       ret |= i->recovery_passes;
-               }
-
-       return ret &= ~RECOVERY_PASS_ALL_FSCK;
-}
-
 const char * const bch2_sb_fields[] = {
 #define x(name, nr)    #name,
        BCH_SB_FIELDS()
@@ -190,8 +170,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
                u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
 
                if (new_bytes > max_bytes) {
-                       pr_err("%pg: superblock too big: want %zu but have %llu",
-                              sb->bdev, new_bytes, max_bytes);
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_bdevname(&buf, sb->bdev);
+                       prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes);
+                       pr_err("%s", buf.buf);
+                       printbuf_exit(&buf);
                        return -BCH_ERR_ENOSPC_sb;
                }
        }
@@ -1095,8 +1079,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
 }
 
 /* Downgrade if superblock is at a higher version than currently supported: */
-void bch2_sb_maybe_downgrade(struct bch_fs *c)
+bool bch2_check_version_downgrade(struct bch_fs *c)
 {
+       bool ret = bcachefs_metadata_version_current < c->sb.version;
+
        lockdep_assert_held(&c->sb_lock);
 
        /*
@@ -1110,6 +1096,7 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c)
        if (c->sb.version_min > bcachefs_metadata_version_current)
                c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
        c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
+       return ret;
 }
 
 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
@@ -1200,8 +1187,8 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
        return ret;
 }
 
-void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
-                          struct bch_sb_field *f)
+void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+                            struct bch_sb_field *f)
 {
        unsigned type = le32_to_cpu(f->type);
        const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
@@ -1209,6 +1196,15 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
        if (!out->nr_tabstops)
                printbuf_tabstop_push(out, 32);
 
+       if (ops->to_text)
+               ops->to_text(out, sb, f);
+}
+
+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+                          struct bch_sb_field *f)
+{
+       unsigned type = le32_to_cpu(f->type);
+
        if (type < BCH_SB_FIELD_NR)
                prt_printf(out, "%s", bch2_sb_fields[type]);
        else
@@ -1217,11 +1213,7 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
        prt_printf(out, " (size %zu):", vstruct_bytes(f));
        prt_newline(out);
 
-       if (ops->to_text) {
-               printbuf_indent_add(out, 2);
-               ops->to_text(out, sb, f);
-               printbuf_indent_sub(out, 2);
-       }
+       __bch2_sb_field_to_text(out, sb, f);
 }
 
 void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
index 1a8c2088c5c56ea93e9065a40e8041d8ba69deba..95e80e06316bf49873d64d4dc79cc766df0023a0 100644 (file)
@@ -19,10 +19,6 @@ static inline bool bch2_version_compatible(u16 version)
 void bch2_version_to_text(struct printbuf *, unsigned);
 unsigned bch2_latest_compatible_version(unsigned);
 
-u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
-                                unsigned,
-                                unsigned);
-
 static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
 {
        return le32_to_cpu(f->u64s) * sizeof(u64);
@@ -94,9 +90,11 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
                __bch2_check_set_feature(c, feat);
 }
 
-void bch2_sb_maybe_downgrade(struct bch_fs *);
+bool bch2_check_version_downgrade(struct bch_fs *);
 void bch2_sb_upgrade(struct bch_fs *, unsigned);
 
+void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
+                            struct bch_sb_field *);
 void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
                           struct bch_sb_field *);
 void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *);
index 0f3a924ca1f945caae19a86ae906cbfcf201a209..9dbc35940197f1c55c1bc48746bc23a3983ac203 100644 (file)
@@ -88,14 +88,11 @@ const char * const bch2_fs_flag_strs[] = {
 
 void __bch2_print(struct bch_fs *c, const char *fmt, ...)
 {
-       struct log_output *output = c->output;
-       va_list args;
-
-       if (c->output_filter && c->output_filter != current)
-               output = NULL;
+       struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
 
+       va_list args;
        va_start(args, fmt);
-       if (likely(!output)) {
+       if (likely(!stdio)) {
                vprintk(fmt, args);
        } else {
                unsigned long flags;
@@ -103,11 +100,11 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...)
                if (fmt[0] == KERN_SOH[0])
                        fmt += 2;
 
-               spin_lock_irqsave(&output->lock, flags);
-               prt_vprintf(&output->buf, fmt, args);
-               spin_unlock_irqrestore(&output->lock, flags);
+               spin_lock_irqsave(&stdio->output_lock, flags);
+               prt_vprintf(&stdio->output_buf, fmt, args);
+               spin_unlock_irqrestore(&stdio->output_lock, flags);
 
-               wake_up(&output->wait);
+               wake_up(&stdio->output_wait);
        }
        va_end(args);
 }
@@ -724,7 +721,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                goto out;
        }
 
-       c->output = (void *)(unsigned long) opts.log_output;
+       c->stdio = (void *)(unsigned long) opts.stdio;
 
        __module_get(THIS_MODULE);
 
@@ -871,7 +868,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
                                WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
            !(c->io_complete_wq = alloc_workqueue("bcachefs_io",
-                               WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
+                               WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 512)) ||
            !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
                                WQ_FREEZABLE, 0)) ||
 #ifndef BCH_WRITE_REF_DEBUG
@@ -1086,17 +1083,22 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
            fs->sb->write_time != sb->sb->write_time) {
                struct printbuf buf = PRINTBUF;
 
-               prt_printf(&buf, "Split brain detected between %pg and %pg:",
-                          sb->bdev, fs->bdev);
+               prt_str(&buf, "Split brain detected between ");
+               prt_bdevname(&buf, sb->bdev);
+               prt_str(&buf, " and ");
+               prt_bdevname(&buf, fs->bdev);
+               prt_char(&buf, ':');
                prt_newline(&buf);
                prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq));
                prt_newline(&buf);
 
-               prt_printf(&buf, "%pg ", fs->bdev);
+               prt_bdevname(&buf, fs->bdev);
+               prt_char(&buf, ' ');
                bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));;
                prt_newline(&buf);
 
-               prt_printf(&buf, "%pg ", sb->bdev);
+               prt_bdevname(&buf, sb->bdev);
+               prt_char(&buf, ' ');
                bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));;
                prt_newline(&buf);
 
@@ -1112,13 +1114,26 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
        u64 seq_from_member     = le64_to_cpu(sb->sb->seq);
 
        if (seq_from_fs && seq_from_fs < seq_from_member) {
-               pr_err("Split brain detected between %pg and %pg:\n"
-                      "%pg believes seq of %pg to be %llu, but %pg has %llu\n"
-                      "Not using %pg",
-                      sb->bdev, fs->bdev,
-                      fs->bdev, sb->bdev, seq_from_fs,
-                      sb->bdev, seq_from_member,
-                      sb->bdev);
+               struct printbuf buf = PRINTBUF;
+
+               prt_str(&buf, "Split brain detected between ");
+               prt_bdevname(&buf, sb->bdev);
+               prt_str(&buf, " and ");
+               prt_bdevname(&buf, fs->bdev);
+               prt_char(&buf, ':');
+               prt_newline(&buf);
+
+               prt_bdevname(&buf, fs->bdev);
+               prt_str(&buf, "believes seq of ");
+               prt_bdevname(&buf, sb->bdev);
+               prt_printf(&buf, " to be %llu, but ", seq_from_fs);
+               prt_bdevname(&buf, sb->bdev);
+               prt_printf(&buf, " has %llu\n", seq_from_member);
+               prt_str(&buf, "Not using ");
+               prt_bdevname(&buf, sb->bdev);
+
+               pr_err("%s", buf.buf);
+               printbuf_exit(&buf);
                return -BCH_ERR_device_splitbrain;
        }
 
@@ -1367,9 +1382,14 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
 
        bch2_dev_sysfs_online(c, ca);
 
+       struct printbuf name = PRINTBUF;
+       prt_bdevname(&name, ca->disk_sb.bdev);
+
        if (c->sb.nr_devices == 1)
-               snprintf(c->name, sizeof(c->name), "%pg", ca->disk_sb.bdev);
-       snprintf(ca->name, sizeof(ca->name), "%pg", ca->disk_sb.bdev);
+               strlcpy(c->name, name.buf, sizeof(c->name));
+       strlcpy(ca->name, name.buf, sizeof(ca->name));
+
+       printbuf_exit(&name);
 
        rebalance_wakeup(c);
        return 0;
diff --git a/libbcachefs/thread_with_file.c b/libbcachefs/thread_with_file.c
new file mode 100644 (file)
index 0000000..b1c867a
--- /dev/null
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "printbuf.h"
+#include "thread_with_file.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+
+void bch2_thread_with_file_exit(struct thread_with_file *thr)
+{
+       if (thr->task) {
+               kthread_stop(thr->task);
+               put_task_struct(thr->task);
+       }
+}
+
+int bch2_run_thread_with_file(struct thread_with_file *thr,
+                             const struct file_operations *fops,
+                             int (*fn)(void *))
+{
+       struct file *file = NULL;
+       int ret, fd = -1;
+       unsigned fd_flags = O_CLOEXEC;
+
+       if (fops->read && fops->write)
+               fd_flags |= O_RDWR;
+       else if (fops->read)
+               fd_flags |= O_RDONLY;
+       else if (fops->write)
+               fd_flags |= O_WRONLY;
+
+       char name[TASK_COMM_LEN];
+       get_task_comm(name, current);
+
+       thr->ret = 0;
+       thr->task = kthread_create(fn, thr, "%s", name);
+       ret = PTR_ERR_OR_ZERO(thr->task);
+       if (ret)
+               return ret;
+
+       ret = get_unused_fd_flags(fd_flags);
+       if (ret < 0)
+               goto err;
+       fd = ret;
+
+       file = anon_inode_getfile(name, fops, thr, fd_flags);
+       ret = PTR_ERR_OR_ZERO(file);
+       if (ret)
+               goto err;
+
+       fd_install(fd, file);
+       get_task_struct(thr->task);
+       wake_up_process(thr->task);
+       return fd;
+err:
+       if (fd >= 0)
+               put_unused_fd(fd);
+       if (thr->task)
+               kthread_stop(thr->task);
+       return ret;
+}
+
+static inline bool thread_with_stdio_has_output(struct thread_with_stdio *thr)
+{
+       return thr->stdio.output_buf.pos ||
+               thr->output2.nr ||
+               thr->thr.done;
+}
+
+static ssize_t thread_with_stdio_read(struct file *file, char __user *buf,
+                                     size_t len, loff_t *ppos)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+       size_t copied = 0, b;
+       int ret = 0;
+
+       if ((file->f_flags & O_NONBLOCK) &&
+           !thread_with_stdio_has_output(thr))
+               return -EAGAIN;
+
+       ret = wait_event_interruptible(thr->stdio.output_wait,
+               thread_with_stdio_has_output(thr));
+       if (ret)
+               return ret;
+
+       if (thr->thr.done)
+               return 0;
+
+       while (len) {
+               ret = darray_make_room(&thr->output2, thr->stdio.output_buf.pos);
+               if (ret)
+                       break;
+
+               spin_lock_irq(&thr->stdio.output_lock);
+               b = min_t(size_t, darray_room(thr->output2), thr->stdio.output_buf.pos);
+
+               memcpy(&darray_top(thr->output2), thr->stdio.output_buf.buf, b);
+               memmove(thr->stdio.output_buf.buf,
+                       thr->stdio.output_buf.buf + b,
+                       thr->stdio.output_buf.pos - b);
+
+               thr->output2.nr += b;
+               thr->stdio.output_buf.pos -= b;
+               spin_unlock_irq(&thr->stdio.output_lock);
+
+               b = min(len, thr->output2.nr);
+               if (!b)
+                       break;
+
+               b -= copy_to_user(buf, thr->output2.data, b);
+               if (!b) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               copied  += b;
+               buf     += b;
+               len     -= b;
+
+               memmove(thr->output2.data,
+                       thr->output2.data + b,
+                       thr->output2.nr - b);
+               thr->output2.nr -= b;
+       }
+
+       return copied ?: ret;
+}
+
+static int thread_with_stdio_release(struct inode *inode, struct file *file)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+
+       bch2_thread_with_file_exit(&thr->thr);
+       printbuf_exit(&thr->stdio.input_buf);
+       printbuf_exit(&thr->stdio.output_buf);
+       darray_exit(&thr->output2);
+       thr->exit(thr);
+       return 0;
+}
+
+#define WRITE_BUFFER           4096
+
+static inline bool thread_with_stdio_has_input_space(struct thread_with_stdio *thr)
+{
+       return thr->stdio.input_buf.pos < WRITE_BUFFER || thr->thr.done;
+}
+
+static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubuf,
+                                      size_t len, loff_t *ppos)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+       struct printbuf *buf = &thr->stdio.input_buf;
+       size_t copied = 0;
+       ssize_t ret = 0;
+
+       while (len) {
+               if (thr->thr.done) {
+                       ret = -EPIPE;
+                       break;
+               }
+
+               size_t b = len - fault_in_readable(ubuf, len);
+               if (!b) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               spin_lock(&thr->stdio.input_lock);
+               if (buf->pos < WRITE_BUFFER)
+                       bch2_printbuf_make_room(buf, min(b, WRITE_BUFFER - buf->pos));
+               b = min(len, printbuf_remaining_size(buf));
+
+               if (b && !copy_from_user_nofault(&buf->buf[buf->pos], ubuf, b)) {
+                       ubuf += b;
+                       len -= b;
+                       copied += b;
+                       buf->pos += b;
+               }
+               spin_unlock(&thr->stdio.input_lock);
+
+               if (b) {
+                       wake_up(&thr->stdio.input_wait);
+               } else {
+                       if ((file->f_flags & O_NONBLOCK)) {
+                               ret = -EAGAIN;
+                               break;
+                       }
+
+                       ret = wait_event_interruptible(thr->stdio.input_wait,
+                                       thread_with_stdio_has_input_space(thr));
+                       if (ret)
+                               break;
+               }
+       }
+
+       return copied ?: ret;
+}
+
+static __poll_t thread_with_stdio_poll(struct file *file, struct poll_table_struct *wait)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+
+       poll_wait(file, &thr->stdio.output_wait, wait);
+       poll_wait(file, &thr->stdio.input_wait, wait);
+
+       __poll_t mask = 0;
+
+       if (thread_with_stdio_has_output(thr))
+               mask |= EPOLLIN;
+       if (thread_with_stdio_has_input_space(thr))
+               mask |= EPOLLOUT;
+       if (thr->thr.done)
+               mask |= EPOLLHUP|EPOLLERR;
+       return mask;
+}
+
+static const struct file_operations thread_with_stdio_fops = {
+       .release        = thread_with_stdio_release,
+       .read           = thread_with_stdio_read,
+       .write          = thread_with_stdio_write,
+       .poll           = thread_with_stdio_poll,
+       .llseek         = no_llseek,
+};
+
+int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
+                              void (*exit)(struct thread_with_stdio *),
+                              int (*fn)(void *))
+{
+       thr->stdio.input_buf = PRINTBUF;
+       thr->stdio.input_buf.atomic++;
+       spin_lock_init(&thr->stdio.input_lock);
+       init_waitqueue_head(&thr->stdio.input_wait);
+
+       thr->stdio.output_buf = PRINTBUF;
+       thr->stdio.output_buf.atomic++;
+       spin_lock_init(&thr->stdio.output_lock);
+       init_waitqueue_head(&thr->stdio.output_wait);
+
+       darray_init(&thr->output2);
+       thr->exit = exit;
+
+       return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, fn);
+}
+
+int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *buf, size_t len)
+{
+       wait_event(stdio->input_wait,
+                  stdio->input_buf.pos || stdio->done);
+
+       if (stdio->done)
+               return -1;
+
+       spin_lock(&stdio->input_lock);
+       int ret = min(len, stdio->input_buf.pos);
+       stdio->input_buf.pos -= ret;
+       memcpy(buf, stdio->input_buf.buf, ret);
+       memmove(stdio->input_buf.buf,
+               stdio->input_buf.buf + ret,
+               stdio->input_buf.pos);
+       spin_unlock(&stdio->input_lock);
+
+       wake_up(&stdio->input_wait);
+       return ret;
+}
+
+int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t len)
+{
+       wait_event(stdio->input_wait,
+                  stdio->input_buf.pos || stdio->done);
+
+       if (stdio->done)
+               return -1;
+
+       spin_lock(&stdio->input_lock);
+       int ret = min(len, stdio->input_buf.pos);
+       char *n = memchr(stdio->input_buf.buf, '\n', ret);
+       if (n)
+               ret = min(ret, n + 1 - stdio->input_buf.buf);
+       stdio->input_buf.pos -= ret;
+       memcpy(buf, stdio->input_buf.buf, ret);
+       memmove(stdio->input_buf.buf,
+               stdio->input_buf.buf + ret,
+               stdio->input_buf.pos);
+       spin_unlock(&stdio->input_lock);
+
+       wake_up(&stdio->input_wait);
+       return ret;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/libbcachefs/thread_with_file.h b/libbcachefs/thread_with_file.h
new file mode 100644 (file)
index 0000000..05879c5
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_THREAD_WITH_FILE_H
+#define _BCACHEFS_THREAD_WITH_FILE_H
+
+#include "thread_with_file_types.h"
+
+struct task_struct;
+
+struct thread_with_file {
+       struct task_struct      *task;
+       int                     ret;
+       bool                    done;
+};
+
+void bch2_thread_with_file_exit(struct thread_with_file *);
+int bch2_run_thread_with_file(struct thread_with_file *,
+                             const struct file_operations *,
+                             int (*fn)(void *));
+
+struct thread_with_stdio {
+       struct thread_with_file thr;
+       struct stdio_redirect   stdio;
+       DARRAY(char)            output2;
+       void                    (*exit)(struct thread_with_stdio *);
+};
+
+static inline void thread_with_stdio_done(struct thread_with_stdio *thr)
+{
+       thr->thr.done = true;
+       thr->stdio.done = true;
+       wake_up(&thr->stdio.input_wait);
+       wake_up(&thr->stdio.output_wait);
+}
+
+int bch2_run_thread_with_stdio(struct thread_with_stdio *,
+                              void (*exit)(struct thread_with_stdio *),
+                              int (*fn)(void *));
+int bch2_stdio_redirect_read(struct stdio_redirect *, char *, size_t);
+int bch2_stdio_redirect_readline(struct stdio_redirect *, char *, size_t);
+
+#endif /* _BCACHEFS_THREAD_WITH_FILE_H */
diff --git a/libbcachefs/thread_with_file_types.h b/libbcachefs/thread_with_file_types.h
new file mode 100644 (file)
index 0000000..90b5e64
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_THREAD_WITH_FILE_TYPES_H
+#define _BCACHEFS_THREAD_WITH_FILE_TYPES_H
+
+struct stdio_redirect {
+       spinlock_t              output_lock;
+       wait_queue_head_t       output_wait;
+       struct printbuf         output_buf;
+
+       spinlock_t              input_lock;
+       wait_queue_head_t       input_wait;
+       struct printbuf         input_buf;
+       bool                    done;
+};
+
+#endif /* _BCACHEFS_THREAD_WITH_FILE_TYPES_H */
index 427edb3e7cd6ec40fb6d57b29fe99e40b09996b5..c94876b3bb06e4d8bf0ba490421ead37d87e5569 100644 (file)
@@ -72,6 +72,27 @@ DECLARE_EVENT_CLASS(trans_str,
                  __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str))
 );
 
+DECLARE_EVENT_CLASS(trans_str_nocaller,
+       TP_PROTO(struct btree_trans *trans, const char *str),
+       TP_ARGS(trans, str),
+
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __array(char,           trans_fn, 32            )
+               __string(str,           str                     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = trans->c->dev;
+               strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+               __assign_str(str, str);
+       ),
+
+       TP_printk("%d,%d %s %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->trans_fn, __get_str(str))
+);
+
 DECLARE_EVENT_CLASS(btree_node_nofs,
        TP_PROTO(struct bch_fs *c, struct btree *b),
        TP_ARGS(c, b),
@@ -1243,11 +1264,10 @@ DEFINE_EVENT(transaction_restart_iter,  trans_restart_memory_allocation_failure,
        TP_ARGS(trans, caller_ip, path)
 );
 
-DEFINE_EVENT(trans_str, trans_restart_would_deadlock,
+DEFINE_EVENT(trans_str_nocaller, trans_restart_would_deadlock,
        TP_PROTO(struct btree_trans *trans,
-                unsigned long caller_ip,
                 const char *cycle),
-       TP_ARGS(trans, caller_ip, cycle)
+       TP_ARGS(trans, cycle)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_would_deadlock_recursion_limit,
index 2e4c5d9606decbd3375d9124e50b3a2ef2f45135..c2ef7cddaa4fcb0e9de9df263aadd019cc7a4965 100644 (file)
@@ -267,7 +267,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
        console_unlock();
 }
 
-int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task)
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr)
 {
 #ifdef CONFIG_STACKTRACE
        unsigned nr_entries = 0;
@@ -282,7 +282,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task)
                return -1;
 
        do {
-               nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0);
+               nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
        } while (nr_entries == stack->size &&
                 !(ret = darray_make_room(stack, stack->size * 2)));
 
@@ -303,10 +303,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
        }
 }
 
-int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task)
+int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr)
 {
        bch_stacktrace stack = { 0 };
-       int ret = bch2_save_backtrace(&stack, task);
+       int ret = bch2_save_backtrace(&stack, task, skipnr + 1);
 
        bch2_prt_backtrace(out, &stack);
        darray_exit(&stack);
index 4290e0a53b7563a4e6d912ce17083a947b6a3f4b..c75fc31915d3936d8c0a26949915534aac482b3a 100644 (file)
@@ -347,9 +347,18 @@ void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
 void bch2_print_string_as_lines(const char *prefix, const char *lines);
 
 typedef DARRAY(unsigned long) bch_stacktrace;
-int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *);
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned);
 void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *);
-int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *);
+int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned);
+
+static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev)
+{
+#ifdef __KERNEL__
+       prt_printf(out, "%pg", bdev);
+#else
+       prt_str(out, bdev->name);
+#endif
+}
 
 #define NR_QUANTILES   15
 #define QUANTILE_IDX(i)        inorder_to_eytzinger0(i, NR_QUANTILES)