]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to feaca6edbd24 mean and variance: Promote to lib/math
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 27 Nov 2023 03:33:04 +0000 (22:33 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 28 Nov 2023 02:04:03 +0000 (21:04 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
45 files changed:
.bcachefs_revision
cmd_fsck.c
libbcachefs/backpointers.c
libbcachefs/backpointers.h
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_gc.c
libbcachefs/btree_iter.c
libbcachefs/btree_journal_iter.c
libbcachefs/btree_key_cache.c
libbcachefs/btree_trans_commit.c
libbcachefs/btree_types.h
libbcachefs/btree_update.c
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_write_buffer.c
libbcachefs/btree_write_buffer.h
libbcachefs/btree_write_buffer_types.h
libbcachefs/buckets.c
libbcachefs/chardev.c
libbcachefs/ec.c
libbcachefs/errcode.h
libbcachefs/error.c
libbcachefs/fs-io-buffered.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_io.c
libbcachefs/journal_reclaim.c
libbcachefs/journal_reclaim.h
libbcachefs/journal_seq_blacklist.c
libbcachefs/journal_types.h
libbcachefs/lru.c
libbcachefs/move.c
libbcachefs/opts.h
libbcachefs/recovery.c
libbcachefs/snapshot.c
libbcachefs/super-io.c
libbcachefs/super.c
libbcachefs/super.h
libbcachefs/sysfs.c
libbcachefs/trace.h

index 97936a157a1fee58056954ef7b85a20208ac0115..e41bb401696b0f4682485ea5a0ad417fdf71e88c 100644 (file)
@@ -1 +1 @@
-8c94740b1bf8645d3398170f41c9c88b78332252
+feaca6edbd240bbd98d261097a97037c56a09eec
index 0954a83c0df854ecca6208acabb8973eae25b1a3..f7dcae98e518917d9fa3ba5636233111fb4aa8d9 100644 (file)
@@ -97,11 +97,11 @@ int cmd_fsck(int argc, char *argv[])
                exit(8);
        }
 
-       if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags)) {
+       if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
                fprintf(stderr, "%s: errors fixed\n", c->name);
                ret |= 1;
        }
-       if (test_bit(BCH_FS_ERROR, &c->flags)) {
+       if (test_bit(BCH_FS_error, &c->flags)) {
                fprintf(stderr, "%s: still has errors\n", c->name);
                ret |= 4;
        }
index 4c8bcf23bb27194875191f0214a583b2340de3db..8c66333bcf272127400c4b7f8581c787b851d021 100644 (file)
@@ -136,15 +136,30 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
 }
 
 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
-                               struct bkey_i_backpointer *bp_k,
+                               struct bpos bucket,
                                struct bch_backpointer bp,
                                struct bkey_s_c orig_k,
                                bool insert)
 {
        struct btree_iter bp_iter;
        struct bkey_s_c k;
+       struct bkey_i_backpointer *bp_k;
        int ret;
 
+       bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
+       ret = PTR_ERR_OR_ZERO(bp_k);
+       if (ret)
+               return ret;
+
+       bkey_backpointer_init(&bp_k->k_i);
+       bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+       bp_k->v = bp;
+
+       if (!insert) {
+               bp_k->k.type = KEY_TYPE_deleted;
+               set_bkey_val_u64s(&bp_k->k, 0);
+       }
+
        k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
                               bp_k->k.p,
                               BTREE_ITER_INTENT|
index ab866feeaf660f497cc58ddf73a2692ab32865ac..737e2396ade7ec44edf4f18738e286b5da3189bd 100644 (file)
@@ -63,7 +63,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
        return ret;
 }
 
-int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bkey_i_backpointer *,
+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bpos bucket,
                                struct bch_backpointer, struct bkey_s_c, bool);
 
 static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
@@ -72,28 +72,21 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
                                struct bkey_s_c orig_k,
                                bool insert)
 {
-       struct bch_fs *c = trans->c;
-       struct bkey_i_backpointer *bp_k;
-       int ret;
+       if (unlikely(bch2_backpointers_no_use_write_buffer))
+               return bch2_bucket_backpointer_mod_nowritebuffer(trans, bucket, bp, orig_k, insert);
 
-       bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
-       ret = PTR_ERR_OR_ZERO(bp_k);
-       if (ret)
-               return ret;
+       struct bkey_i_backpointer bp_k;
 
-       bkey_backpointer_init(&bp_k->k_i);
-       bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset);
-       bp_k->v = bp;
+       bkey_backpointer_init(&bp_k.k_i);
+       bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+       bp_k.v = bp;
 
        if (!insert) {
-               bp_k->k.type = KEY_TYPE_deleted;
-               set_bkey_val_u64s(&bp_k->k, 0);
+               bp_k.k.type = KEY_TYPE_deleted;
+               set_bkey_val_u64s(&bp_k.k, 0);
        }
 
-       if (unlikely(bch2_backpointers_no_use_write_buffer))
-               return bch2_bucket_backpointer_mod_nowritebuffer(trans, bp_k, bp, orig_k, insert);
-
-       return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
+       return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
 }
 
 static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
index 295efeda12ff8eaaf43e6c88aa9aabea138fb548..2e9f4af3ad58072a7e199462b2c7b3dbcfebea1f 100644 (file)
@@ -406,7 +406,6 @@ BCH_DEBUG_PARAMS_DEBUG()
        x(blocked_journal_max_in_flight)        \
        x(blocked_allocate)                     \
        x(blocked_allocate_open_bucket)         \
-       x(blocked_write_buffer_full)            \
        x(nocow_lock_contended)
 
 enum bch_time_stats {
@@ -567,32 +566,38 @@ struct bch_dev {
        struct io_count __percpu *io_done;
 };
 
-enum {
-       /* startup: */
-       BCH_FS_STARTED,
-       BCH_FS_MAY_GO_RW,
-       BCH_FS_RW,
-       BCH_FS_WAS_RW,
-
-       /* shutdown: */
-       BCH_FS_STOPPING,
-       BCH_FS_EMERGENCY_RO,
-       BCH_FS_GOING_RO,
-       BCH_FS_WRITE_DISABLE_COMPLETE,
-       BCH_FS_CLEAN_SHUTDOWN,
-
-       /* fsck passes: */
-       BCH_FS_FSCK_DONE,
-       BCH_FS_INITIAL_GC_UNFIXED,      /* kill when we enumerate fsck errors */
-       BCH_FS_NEED_ANOTHER_GC,
-
-       BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS,
-
-       /* errors: */
-       BCH_FS_ERROR,
-       BCH_FS_TOPOLOGY_ERROR,
-       BCH_FS_ERRORS_FIXED,
-       BCH_FS_ERRORS_NOT_FIXED,
+/*
+ * fsck_done - kill?
+ *
+ * replace with something more general from enumated fsck passes/errors:
+ * initial_gc_unfixed
+ * error
+ * topology error
+ */
+
+#define BCH_FS_FLAGS()                 \
+       x(started)                      \
+       x(may_go_rw)                    \
+       x(rw)                           \
+       x(was_rw)                       \
+       x(stopping)                     \
+       x(emergency_ro)                 \
+       x(going_ro)                     \
+       x(write_disable_complete)       \
+       x(clean_shutdown)               \
+       x(fsck_done)                    \
+       x(initial_gc_unfixed)           \
+       x(need_another_gc)              \
+       x(need_delete_dead_snapshots)   \
+       x(error)                        \
+       x(topology_error)               \
+       x(errors_fixed)                 \
+       x(errors_not_fixed)
+
+enum bch_fs_flags {
+#define x(n)           BCH_FS_##n,
+       BCH_FS_FLAGS()
+#undef x
 };
 
 struct btree_debug {
@@ -1068,20 +1073,10 @@ static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
 #endif
 }
 
-static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
-{
-#ifdef BCH_WRITE_REF_DEBUG
-       return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
-               atomic_long_inc_not_zero(&c->writes[ref]);
-#else
-       return percpu_ref_tryget(&c->writes);
-#endif
-}
-
 static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
 {
 #ifdef BCH_WRITE_REF_DEBUG
-       return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
+       return !test_bit(BCH_FS_going_ro, &c->flags) &&
                atomic_long_inc_not_zero(&c->writes[ref]);
 #else
        return percpu_ref_tryget_live(&c->writes);
@@ -1100,7 +1095,7 @@ static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
                if (atomic_long_read(&c->writes[i]))
                        return;
 
-       set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+       set_bit(BCH_FS_write_disable_complete, &c->flags);
        wake_up(&bch2_read_only_wait);
 #else
        percpu_ref_put(&c->writes);
index 967780072f69c18ad5be64cbee0a114e129a0c94..be0367f3785160f4f5076f10c24f931e692dd825 100644 (file)
@@ -1539,7 +1539,7 @@ struct bch_sb_field_disk_groups {
        x(move_extent_write,                            36)     \
        x(move_extent_finish,                           37)     \
        x(move_extent_fail,                             38)     \
-       x(move_extent_alloc_mem_fail,                   39)     \
+       x(move_extent_start_fail,                       39)     \
        x(copygc,                                       40)     \
        x(copygc_wait,                                  41)     \
        x(gc_gens_end,                                  42)     \
@@ -1576,7 +1576,9 @@ struct bch_sb_field_disk_groups {
        x(write_super,                                  73)     \
        x(trans_restart_would_deadlock_recursion_limit, 74)     \
        x(trans_restart_write_buffer_flush,             75)     \
-       x(trans_restart_split_race,                     76)
+       x(trans_restart_split_race,                     76)     \
+       x(write_buffer_flush_slowpath,                  77)     \
+       x(write_buffer_flush_sync,                      78)
 
 enum bch_persistent_counters {
 #define x(t, n, ...) BCH_COUNTER_##t,
@@ -2135,8 +2137,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
        x(clock,                7)              \
        x(dev_usage,            8)              \
        x(log,                  9)              \
-       x(overwrite,            10)             \
-       x(write_buffer_keys,    11)
+       x(overwrite,            10)
 
 enum {
 #define x(f, nr)       BCH_JSET_ENTRY_##f      = nr,
index 72dea90e12fa9d11b2d1ceb74edebecc3b78bc5e..47e7770d05831757d45c4aee9331f52a10c06e76 100644 (file)
 
 #include <linux/prefetch.h>
 #include <linux/sched/mm.h>
-#include <linux/seq_buf.h>
-
-#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
-do {                                            \
-       if (shrinker_counter)                    \
-               bc->not_freed_##counter++;       \
-} while (0)
 
 const char * const bch2_btree_node_flags[] = {
 #define x(f)   #f,
@@ -208,7 +201,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
  * this version is for btree nodes that have already been freed (we're not
  * reaping a real btree node)
  */
-static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
 {
        struct btree_cache *bc = &c->btree_cache;
        int ret = 0;
@@ -218,64 +211,38 @@ wait_on_io:
        if (b->flags & ((1U << BTREE_NODE_dirty)|
                        (1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush) {
-                       if (btree_node_dirty(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
-                       else if (btree_node_read_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
-                       else if (btree_node_write_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
+               if (!flush)
                        return -BCH_ERR_ENOMEM_btree_node_reclaim;
-               }
 
                /* XXX: waiting on IO with btree cache lock held */
                bch2_btree_node_wait_on_read(b);
                bch2_btree_node_wait_on_write(b);
        }
 
-       if (!six_trylock_intent(&b->c.lock)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
+       if (!six_trylock_intent(&b->c.lock))
                return -BCH_ERR_ENOMEM_btree_node_reclaim;
-       }
 
-       if (!six_trylock_write(&b->c.lock)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
+       if (!six_trylock_write(&b->c.lock))
                goto out_unlock_intent;
-       }
 
        /* recheck under lock */
        if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush) {
-                       if (btree_node_read_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
-                       else if (btree_node_write_in_flight(b))
-                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
+               if (!flush)
                        goto out_unlock;
-               }
                six_unlock_write(&b->c.lock);
                six_unlock_intent(&b->c.lock);
                goto wait_on_io;
        }
 
-       if (btree_node_noevict(b)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
-               goto out_unlock;
-       }
-       if (btree_node_write_blocked(b)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
-               goto out_unlock;
-       }
-       if (btree_node_will_make_reachable(b)) {
-               BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
+       if (btree_node_noevict(b) ||
+           btree_node_write_blocked(b) ||
+           btree_node_will_make_reachable(b))
                goto out_unlock;
-       }
 
        if (btree_node_dirty(b)) {
-               if (!flush) {
-                       BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
+               if (!flush)
                        goto out_unlock;
-               }
                /*
                 * Using the underscore version because we don't want to compact
                 * bsets after the write, since this node is about to be evicted
@@ -305,14 +272,14 @@ out_unlock_intent:
        goto out;
 }
 
-static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
 {
-       return __btree_node_reclaim(c, b, false, shrinker_counter);
+       return __btree_node_reclaim(c, b, false);
 }
 
 static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
 {
-       return __btree_node_reclaim(c, b, true, false);
+       return __btree_node_reclaim(c, b, true);
 }
 
 static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
@@ -360,12 +327,11 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
                if (touched >= nr)
                        goto out;
 
-               if (!btree_node_reclaim(c, b, true)) {
+               if (!btree_node_reclaim(c, b)) {
                        btree_node_data_free(c, b);
                        six_unlock_write(&b->c.lock);
                        six_unlock_intent(&b->c.lock);
                        freed++;
-                       bc->freed++;
                }
        }
 restart:
@@ -374,11 +340,9 @@ restart:
 
                if (btree_node_accessed(b)) {
                        clear_btree_node_accessed(b);
-                       bc->not_freed_access_bit++;
-               } else if (!btree_node_reclaim(c, b, true)) {
+               } else if (!btree_node_reclaim(c, b)) {
                        freed++;
                        btree_node_data_free(c, b);
-                       bc->freed++;
 
                        bch2_btree_node_hash_remove(bc, b);
                        six_unlock_write(&b->c.lock);
@@ -428,17 +392,6 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
        return btree_cache_can_free(bc);
 }
 
-static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
-{
-       struct bch_fs *c = shrink->private_data;
-       char *cbuf;
-       size_t buflen = seq_buf_get_buf(s, &cbuf);
-       struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
-
-       bch2_btree_cache_to_text(&out, &c->btree_cache);
-       seq_buf_commit(s, out.pos);
-}
-
 void bch2_fs_btree_cache_exit(struct bch_fs *c)
 {
        struct btree_cache *bc = &c->btree_cache;
@@ -525,7 +478,6 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
        bc->shrink = shrink;
        shrink->count_objects   = bch2_btree_cache_count;
        shrink->scan_objects    = bch2_btree_cache_scan;
-       shrink->to_text         = bch2_btree_cache_shrinker_to_text;
        shrink->seeks           = 4;
        shrink->private_data    = c;
        shrinker_register(shrink);
@@ -599,7 +551,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
        struct btree *b;
 
        list_for_each_entry_reverse(b, &bc->live, list)
-               if (!btree_node_reclaim(c, b, false))
+               if (!btree_node_reclaim(c, b))
                        return b;
 
        while (1) {
@@ -635,7 +587,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
         * disk node. Check the freed list before allocating a new one:
         */
        list_for_each_entry(b, freed, list)
-               if (!btree_node_reclaim(c, b, false)) {
+               if (!btree_node_reclaim(c, b)) {
                        list_del_init(&b->list);
                        goto got_node;
                }
@@ -661,7 +613,7 @@ got_node:
         * the list. Check if there's any freed nodes there:
         */
        list_for_each_entry(b2, &bc->freeable, list)
-               if (!btree_node_reclaim(c, b2, false)) {
+               if (!btree_node_reclaim(c, b2)) {
                        swap(b->data, b2->data);
                        swap(b->aux_data, b2->aux_data);
                        btree_node_to_freedlist(bc, b2);
@@ -1257,21 +1209,9 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc
               stats.failed);
 }
 
-void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
+void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
 {
-       prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
-       prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
-       prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
-
-       prt_printf(out, "freed:\t\t\t\t%u\n", bc->freed);
-       prt_printf(out, "not freed, dirty:\t\t%u\n", bc->not_freed_dirty);
-       prt_printf(out, "not freed, write in flight:\t%u\n", bc->not_freed_write_in_flight);
-       prt_printf(out, "not freed, read in flight:\t%u\n", bc->not_freed_read_in_flight);
-       prt_printf(out, "not freed, lock intent failed:\t%u\n", bc->not_freed_lock_intent);
-       prt_printf(out, "not freed, lock write failed:\t%u\n", bc->not_freed_lock_write);
-       prt_printf(out, "not freed, access bit:\t\t%u\n", bc->not_freed_access_bit);
-       prt_printf(out, "not freed, no evict failed:\t%u\n", bc->not_freed_noevict);
-       prt_printf(out, "not freed, write blocked:\t%u\n", bc->not_freed_write_blocked);
-       prt_printf(out, "not freed, will make reachable:\t%u\n", bc->not_freed_will_make_reachable);
-
+       prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
+       prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
+       prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
 }
index bfe1d7482cbc8d5205ddb87edf801e251c6206a2..cfb80b201d61be9240ed659baa57a693d12b796a 100644 (file)
@@ -126,6 +126,6 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
 const char *bch2_btree_id_str(enum btree_id);
 void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
 void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
-void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
+void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
 
 #endif /* _BCACHEFS_BTREE_CACHE_H */
index 90f5bcfa3c4f2acaf0fc8e2b96b03c04e8e9b920..70e4788074b9271fd0b5ba58066143527123143e 100644 (file)
@@ -108,7 +108,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
                                ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
                                goto err;
                        } else {
-                               set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
+                               set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
                        }
                }
        }
@@ -134,7 +134,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
                        ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
                        goto err;
                } else {
-                       set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
+                       set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
                }
        }
 
@@ -619,7 +619,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                                g->data_type            = 0;
                                g->dirty_sectors        = 0;
                                g->cached_sectors       = 0;
-                               set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+                               set_bit(BCH_FS_need_another_gc, &c->flags);
                        } else {
                                do_update = true;
                        }
@@ -664,7 +664,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                                 bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
                        if (data_type == BCH_DATA_btree) {
                                g->data_type    = data_type;
-                               set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+                               set_bit(BCH_FS_need_another_gc, &c->flags);
                        } else {
                                do_update = true;
                        }
@@ -996,7 +996,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                                        /* Continue marking when opted to not
                                         * fix the error: */
                                        ret = 0;
-                                       set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
+                                       set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
                                        continue;
                                }
                        } else if (ret) {
@@ -1847,7 +1847,7 @@ again:
 #endif
        c->gc_count++;
 
-       if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
+       if (test_bit(BCH_FS_need_another_gc, &c->flags) ||
            (!iter && bch2_test_restart_gc)) {
                if (iter++ > 2) {
                        bch_info(c, "Unable to fix bucket gens, looping");
@@ -1859,7 +1859,7 @@ again:
                 * XXX: make sure gens we fixed got saved
                 */
                bch_info(c, "Second GC pass needed, restarting:");
-               clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+               clear_bit(BCH_FS_need_another_gc, &c->flags);
                __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
 
                bch2_gc_stripes_reset(c, metadata_only);
index a52fd206f8222858264b75420b3fcb43511934f3..bdc80808715b0a7db5a1aff4b68eec21c8608b51 100644 (file)
@@ -781,7 +781,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
        struct btree_node_iter node_iter = l->iter;
        struct bkey_packed *k;
        struct bkey_buf tmp;
-       unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
+       unsigned nr = test_bit(BCH_FS_started, &c->flags)
                ? (path->level > 1 ? 0 :  2)
                : (path->level > 1 ? 1 : 16);
        bool was_locked = btree_node_locked(path, path->level);
@@ -816,7 +816,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p
        struct bch_fs *c = trans->c;
        struct bkey_s_c k;
        struct bkey_buf tmp;
-       unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
+       unsigned nr = test_bit(BCH_FS_started, &c->flags)
                ? (path->level > 1 ? 0 :  2)
                : (path->level > 1 ? 1 : 16);
        bool was_locked = btree_node_locked(path, path->level);
index 7a5e0a893df924b35a6126829111c3240c847d1f..4c084ce493a4a16eb30821a712b5ac74bb8fca67 100644 (file)
@@ -177,7 +177,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
        struct journal_keys *keys = &c->journal_keys;
        size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
 
-       BUG_ON(test_bit(BCH_FS_RW, &c->flags));
+       BUG_ON(test_bit(BCH_FS_rw, &c->flags));
 
        if (idx < keys->size &&
            journal_key_cmp(&n, &keys->d[idx]) == 0) {
index e14e9b4cd0298b70df3428c2ff0e51774f986ad4..c64f8db0673387fc2f456914a0789de907a1b54b 100644 (file)
@@ -13,7 +13,6 @@
 #include "trace.h"
 
 #include <linux/sched/mm.h>
-#include <linux/seq_buf.h>
 
 static inline bool btree_uses_pcpu_readers(enum btree_id id)
 {
@@ -779,7 +778,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
        ck->valid = true;
 
        if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-               EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
+               EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
                set_bit(BKEY_CACHED_DIRTY, &ck->flags);
                atomic_long_inc(&c->btree_key_cache.nr_dirty);
 
@@ -1008,7 +1007,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 
        if (atomic_long_read(&bc->nr_dirty) &&
            !bch2_journal_error(&c->journal) &&
-           test_bit(BCH_FS_WAS_RW, &c->flags))
+           test_bit(BCH_FS_was_rw, &c->flags))
                panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n",
                      atomic_long_read(&bc->nr_dirty));
 
@@ -1029,18 +1028,6 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
        INIT_LIST_HEAD(&c->freed_nonpcpu);
 }
 
-static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
-{
-       struct bch_fs *c = shrink->private_data;
-       struct btree_key_cache *bc = &c->btree_key_cache;
-       char *cbuf;
-       size_t buflen = seq_buf_get_buf(s, &cbuf);
-       struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
-
-       bch2_btree_key_cache_to_text(&out, bc);
-       seq_buf_commit(s, out.pos);
-}
-
 int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
 {
        struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
@@ -1064,7 +1051,6 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
        shrink->seeks           = 0;
        shrink->count_objects   = bch2_btree_key_cache_count;
        shrink->scan_objects    = bch2_btree_key_cache_scan;
-       shrink->to_text         = bch2_btree_key_cache_shrinker_to_text;
        shrink->private_data    = c;
        shrinker_register(shrink);
        return 0;
index 7210d5c22c986a4bc9bbc2ca442fc535d4a914d2..336350bd904828088f0fdabdc1ae7c8d638ec382 100644 (file)
@@ -287,7 +287,7 @@ inline void bch2_btree_insert_key_leaf(struct btree_trans *trans,
        bch2_btree_add_journal_pin(c, b, journal_seq);
 
        if (unlikely(!btree_node_dirty(b))) {
-               EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
+               EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
                set_btree_node_dirty_acct(c, b);
        }
 
@@ -659,6 +659,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
                i->k->k.needs_whiteout = false;
        }
 
+       if (trans->nr_wb_updates &&
+           trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
+               return -BCH_ERR_btree_insert_need_flush_buffer;
+
        /*
         * Don't get journal reservation until after we know insert will
         * succeed:
@@ -693,6 +697,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
            bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
                return -BCH_ERR_btree_insert_need_mark_replicas;
 
+       if (trans->nr_wb_updates) {
+               EBUG_ON(flags & BCH_TRANS_COMMIT_no_journal_res);
+
+               ret = bch2_btree_insert_keys_write_buffer(trans);
+               if (ret)
+                       goto revert_fs_usage;
+       }
+
        h = trans->hooks;
        while (h) {
                ret = h->fn(trans, h);
@@ -754,7 +766,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
 
                trans_for_each_wb_update(trans, wb) {
                        entry = bch2_journal_add_entry(j, &trans->journal_res,
-                                              BCH_JSET_ENTRY_write_buffer_keys,
+                                              BCH_JSET_ENTRY_btree_keys,
                                               wb->btree, 0,
                                               wb->k.k.u64s);
                        bkey_copy((struct bkey_i *) entry->start, &wb->k);
@@ -938,6 +950,30 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
 
                ret = bch2_trans_relock(trans);
                break;
+       case -BCH_ERR_btree_insert_need_flush_buffer: {
+               struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+               ret = 0;
+
+               if (wb->state.nr > wb->size * 3 / 4) {
+                       bch2_trans_unlock(trans);
+                       mutex_lock(&wb->flush_lock);
+
+                       if (wb->state.nr > wb->size * 3 / 4) {
+                               bch2_trans_begin(trans);
+                               ret = bch2_btree_write_buffer_flush_locked(trans);
+                               mutex_unlock(&wb->flush_lock);
+                               if (!ret) {
+                                       trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
+                                       ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
+                               }
+                       } else {
+                               mutex_unlock(&wb->flush_lock);
+                               ret = bch2_trans_relock(trans);
+                       }
+               }
+               break;
+       }
        default:
                BUG_ON(ret >= 0);
                break;
@@ -959,7 +995,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags)
        int ret;
 
        if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) ||
-           test_bit(BCH_FS_STARTED, &c->flags))
+           test_bit(BCH_FS_started, &c->flags))
                return -BCH_ERR_erofs_trans_commit;
 
        ret = drop_locks_do(trans, bch2_fs_read_write_early(c));
@@ -1024,7 +1060,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
                        return ret;
        }
 
-       if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
+       if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
                ret = do_bch2_trans_commit_to_journal_replay(trans);
                goto out_reset;
        }
@@ -1036,7 +1072,21 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
                        goto out_reset;
        }
 
-       EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
+       if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
+           mutex_trylock(&c->btree_write_buffer.flush_lock)) {
+               bch2_trans_begin(trans);
+               bch2_trans_unlock(trans);
+
+               ret = bch2_btree_write_buffer_flush_locked(trans);
+               mutex_unlock(&c->btree_write_buffer.flush_lock);
+               if (!ret) {
+                       trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
+                       ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
+               }
+               goto out;
+       }
+
+       EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
 
        trans->journal_u64s             = trans->extra_journal_entries.nr;
        trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
index 14983e778756f4be403a74718cabefd6d3eeb023..2326bceb34f8a81fccc69a5af799317bd0bdf052 100644 (file)
@@ -162,16 +162,6 @@ struct btree_cache {
        /* Number of elements in live + freeable lists */
        unsigned                used;
        unsigned                reserve;
-       unsigned                freed;
-       unsigned                not_freed_lock_intent;
-       unsigned                not_freed_lock_write;
-       unsigned                not_freed_dirty;
-       unsigned                not_freed_read_in_flight;
-       unsigned                not_freed_write_in_flight;
-       unsigned                not_freed_noevict;
-       unsigned                not_freed_write_blocked;
-       unsigned                not_freed_will_make_reachable;
-       unsigned                not_freed_access_bit;
        atomic_t                dirty;
        struct shrinker         *shrink;
 
index 1837f84845696dd8fa515a830ac97d2a26e36be5..ba42f578f8107591ec46f2c6c90d0b562179956b 100644 (file)
@@ -693,20 +693,6 @@ int bch2_btree_delete_at(struct btree_trans *trans,
        return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
 }
 
-int bch2_btree_delete_at_buffered(struct btree_trans *trans,
-                                 enum btree_id btree, struct bpos pos)
-{
-       struct bkey_i *k;
-
-       k = bch2_trans_kmalloc(trans, sizeof(*k));
-       if (IS_ERR(k))
-               return PTR_ERR(k);
-
-       bkey_init(&k->k);
-       k->k.p = pos;
-       return bch2_trans_update_buffered(trans, btree, k);
-}
-
 int bch2_btree_delete(struct btree_trans *trans,
                      enum btree_id btree, struct bpos pos,
                      unsigned update_flags)
@@ -811,19 +797,13 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
 int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
                       struct bpos pos, bool set)
 {
-       struct bkey_i *k;
-       int ret = 0;
+       struct bkey_i k;
 
-       k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k));
-       ret = PTR_ERR_OR_ZERO(k);
-       if (unlikely(ret))
-               return ret;
-
-       bkey_init(&k->k);
-       k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
-       k->k.p = pos;
+       bkey_init(&k.k);
+       k.k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
+       k.k.p = pos;
 
-       return bch2_trans_update_buffered(trans, btree, k);
+       return bch2_trans_update_buffered(trans, btree, &k);
 }
 
 __printf(2, 0)
index 14a2315aa88e4267775c910f3119728f3f5579dc..fa19f3212b05a355cd62e52f02b1e79134c63fc5 100644 (file)
@@ -47,7 +47,6 @@ enum bch_trans_commit_flags {
 int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
                                unsigned, unsigned);
 int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
-int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos);
 int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
 
 int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
@@ -65,6 +64,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
 
 int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
 
+static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
+                                               enum btree_id btree, struct bpos pos)
+{
+       return bch2_btree_bit_mod(trans, btree, pos, false);
+}
+
 int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
                                     struct bpos, struct bpos);
 
index bfe4d7975bd8738e1af5cbfb4e33f5f8ac1bc9c8..68627061b787ac2fb440e6bfbd3a8caa70516857 100644 (file)
@@ -1082,8 +1082,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                        break;
                }
 
+               /*
+                * Always check for space for two keys, even if we won't have to
+                * split at prior level - it might have been a merge instead:
+                */
                if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
-                                       BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
+                                               BKEY_BTREE_PTR_U64s_MAX * 2))
                        break;
 
                split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
@@ -2052,7 +2056,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
        a->seq          = b->data->keys.seq;
        INIT_WORK(&a->work, async_btree_node_rewrite_work);
 
-       if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
+       if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
                mutex_lock(&c->pending_node_rewrites_lock);
                list_add(&a->list, &c->pending_node_rewrites);
                mutex_unlock(&c->pending_node_rewrites_lock);
@@ -2060,7 +2064,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
        }
 
        if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
-               if (test_bit(BCH_FS_STARTED, &c->flags)) {
+               if (test_bit(BCH_FS_started, &c->flags)) {
                        bch_err(c, "%s: error getting c->writes ref", __func__);
                        kfree(a);
                        return;
index d3c38d2c008cf42507706635ade392a90d71944c..6a19156804079d1a0538e2cba7e0f8a50ba0fdb8 100644 (file)
 #include "btree_write_buffer.h"
 #include "error.h"
 #include "journal.h"
-#include "journal_io.h"
 #include "journal_reclaim.h"
 
-#include <linux/prefetch.h>
+#include <linux/sort.h>
 
 static int bch2_btree_write_buffer_journal_flush(struct journal *,
                                struct journal_entry_pin *, u64);
 
-static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *);
-
-static inline bool __wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
-{
-       return (cmp_int(l->hi, r->hi) ?:
-               cmp_int(l->mi, r->mi) ?:
-               cmp_int(l->lo, r->lo)) >= 0;
-}
-
-static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
-{
-#ifdef CONFIG_X86_64
-       int cmp;
-
-       asm("mov   (%[l]), %%rax;"
-           "sub   (%[r]), %%rax;"
-           "mov  8(%[l]), %%rax;"
-           "sbb  8(%[r]), %%rax;"
-           "mov 16(%[l]), %%rax;"
-           "sbb 16(%[r]), %%rax;"
-           : "=@ccae" (cmp)
-           : [l] "r" (l), [r] "r" (r)
-           : "rax", "cc");
-
-       EBUG_ON(cmp != __wb_key_cmp(l, r));
-       return cmp;
-#else
-       return __wb_key_cmp(l, r);
-#endif
-}
-
-/* Compare excluding idx, the low 24 bits: */
-static inline bool wb_key_eq(const void *_l, const void *_r)
+static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
 {
-       const struct wb_key_ref *l = _l;
-       const struct wb_key_ref *r = _r;
+       const struct btree_write_buffered_key *l = _l;
+       const struct btree_write_buffered_key *r = _r;
 
-       return !((l->hi ^ r->hi)|
-                (l->mi ^ r->mi)|
-                ((l->lo >> 24) ^ (r->lo >> 24)));
+       return  cmp_int(l->btree, r->btree) ?:
+               bpos_cmp(l->k.k.p, r->k.k.p) ?:
+               cmp_int(l->journal_seq, r->journal_seq) ?:
+               cmp_int(l->journal_offset, r->journal_offset);
 }
 
-static noinline void wb_sort(struct wb_key_ref *base, size_t num)
+static int btree_write_buffered_journal_cmp(const void *_l, const void *_r)
 {
-       size_t n = num, a = num / 2;
-
-       if (!a)         /* num < 2 || size == 0 */
-               return;
-
-       for (;;) {
-               size_t b, c, d;
-
-               if (a)                  /* Building heap: sift down --a */
-                       --a;
-               else if (--n)           /* Sorting: Extract root to --n */
-                       swap(base[0], base[n]);
-               else                    /* Sort complete */
-                       break;
-
-               /*
-                * Sift element at "a" down into heap.  This is the
-                * "bottom-up" variant, which significantly reduces
-                * calls to cmp_func(): we find the sift-down path all
-                * the way to the leaves (one compare per level), then
-                * backtrack to find where to insert the target element.
-                *
-                * Because elements tend to sift down close to the leaves,
-                * this uses fewer compares than doing two per level
-                * on the way down.  (A bit more than half as many on
-                * average, 3/4 worst-case.)
-                */
-               for (b = a; c = 2*b + 1, (d = c + 1) < n;)
-                       b = wb_key_cmp(base + c, base + d) ? c : d;
-               if (d == n)             /* Special case last leaf with no sibling */
-                       b = c;
-
-               /* Now backtrack from "b" to the correct location for "a" */
-               while (b != a && wb_key_cmp(base + a, base + b))
-                       b = (b - 1) / 2;
-               c = b;                  /* Where "a" belongs */
-               while (b != a) {        /* Shift it into place */
-                       b = (b - 1) / 2;
-                       swap(base[b], base[c]);
-               }
-       }
-}
-
-static noinline int wb_flush_one_slowpath(struct btree_trans *trans,
-                                         struct btree_iter *iter,
-                                         struct btree_write_buffered_key *wb)
-{
-       bch2_btree_node_unlock_write(trans, iter->path, iter->path->l[0].b);
-
-       trans->journal_res.seq = wb->journal_seq;
+       const struct btree_write_buffered_key *l = _l;
+       const struct btree_write_buffered_key *r = _r;
 
-       return bch2_trans_update(trans, iter, &wb->k,
-                                BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-               bch2_trans_commit(trans, NULL, NULL,
-                                 BCH_TRANS_COMMIT_no_enospc|
-                                 BCH_TRANS_COMMIT_no_check_rw|
-                                 BCH_TRANS_COMMIT_no_journal_res|
-                                 BCH_TRANS_COMMIT_journal_reclaim);
+       return  cmp_int(l->journal_seq, r->journal_seq);
 }
 
-static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter,
-                              struct btree_write_buffered_key *wb,
-                              bool *write_locked, size_t *fast)
+static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
+                                            struct btree_iter *iter,
+                                            struct btree_write_buffered_key *wb,
+                                            unsigned commit_flags,
+                                            bool *write_locked,
+                                            size_t *fast)
 {
        struct bch_fs *c = trans->c;
        struct btree_path *path;
        int ret;
 
-       EBUG_ON(!wb->journal_seq);
-       EBUG_ON(!c->btree_write_buffer.flushing.pin.seq);
-       EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
-
        ret = bch2_btree_iter_traverse(iter);
        if (ret)
                return ret;
@@ -153,14 +66,46 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
                *write_locked = true;
        }
 
-       if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) {
+       if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) {
+               bch2_btree_node_unlock_write(trans, path, path->l[0].b);
                *write_locked = false;
-               return wb_flush_one_slowpath(trans, iter, wb);
+               goto trans_commit;
        }
 
        bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
        (*fast)++;
        return 0;
+trans_commit:
+       trans->journal_res.seq = wb->journal_seq;
+
+       return  bch2_trans_update(trans, iter, &wb->k,
+                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 commit_flags|
+                                 BCH_TRANS_COMMIT_no_check_rw|
+                                 BCH_TRANS_COMMIT_no_enospc|
+                                 BCH_TRANS_COMMIT_no_journal_res|
+                                 BCH_TRANS_COMMIT_journal_reclaim);
+}
+
+static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
+{
+       union btree_write_buffer_state old, new;
+       u64 v = READ_ONCE(wb->state.v);
+
+       do {
+               old.v = new.v = v;
+
+               new.nr = 0;
+               new.idx++;
+       } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
+
+       while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
+               cpu_relax();
+
+       smp_mb();
+
+       return old;
 }
 
 /*
@@ -192,79 +137,31 @@ btree_write_buffered_insert(struct btree_trans *trans,
        return ret;
 }
 
-static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb)
-{
-       struct bch_fs *c = container_of(wb, struct bch_fs, btree_write_buffer);
-       struct journal *j = &c->journal;
-
-       if (!wb->inc.keys.nr)
-               return;
-
-       bch2_journal_pin_add(j, wb->inc.keys.data[0].journal_seq, &wb->flushing.pin,
-                            bch2_btree_write_buffer_journal_flush);
-
-       darray_resize(&wb->flushing.keys, min_t(size_t, 1U << 20, wb->flushing.keys.nr + wb->inc.keys.nr));
-       darray_resize(&wb->sorted, wb->flushing.keys.size);
-
-       if (!wb->flushing.keys.nr && wb->sorted.size >= wb->inc.keys.nr) {
-               swap(wb->flushing.keys, wb->inc.keys);
-               goto out;
-       }
-
-       size_t nr = min(darray_room(wb->flushing.keys),
-                       wb->sorted.size - wb->flushing.keys.nr);
-       nr = min(nr, wb->inc.keys.nr);
-
-       memcpy(&darray_top(wb->flushing.keys),
-              wb->inc.keys.data,
-              sizeof(wb->inc.keys.data[0]) * nr);
-
-       memmove(wb->inc.keys.data,
-               wb->inc.keys.data + nr,
-              sizeof(wb->inc.keys.data[0]) * (wb->inc.keys.nr - nr));
-
-       wb->flushing.keys.nr    += nr;
-       wb->inc.keys.nr         -= nr;
-out:
-       if (!wb->inc.keys.nr)
-               bch2_journal_pin_drop(j, &wb->inc.pin);
-       else
-               bch2_journal_pin_update(j, wb->inc.keys.data[0].journal_seq, &wb->inc.pin,
-                                       bch2_btree_write_buffer_journal_flush);
-
-       if (j->watermark) {
-               spin_lock(&j->lock);
-               bch2_journal_set_watermark(j);
-               spin_unlock(&j->lock);
-       }
-
-       BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
-}
-
-static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
+int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
 {
        struct bch_fs *c = trans->c;
        struct journal *j = &c->journal;
        struct btree_write_buffer *wb = &c->btree_write_buffer;
-       struct wb_key_ref *i;
+       struct journal_entry_pin pin;
+       struct btree_write_buffered_key *i, *keys;
        struct btree_iter iter = { NULL };
-       size_t skipped = 0, fast = 0, slowpath = 0;
+       size_t nr = 0, skipped = 0, fast = 0, slowpath = 0;
        bool write_locked = false;
+       union btree_write_buffer_state s;
        int ret = 0;
 
-       bch2_trans_unlock(trans);
-       bch2_trans_begin(trans);
+       memset(&pin, 0, sizeof(pin));
 
-       mutex_lock(&wb->inc.lock);
-       move_keys_from_inc_to_flushing(wb);
-       mutex_unlock(&wb->inc.lock);
+       bch2_journal_pin_copy(j, &pin, &wb->journal_pin,
+                             bch2_btree_write_buffer_journal_flush);
+       bch2_journal_pin_drop(j, &wb->journal_pin);
 
-       for (size_t i = 0; i < wb->flushing.keys.nr; i++) {
-               wb->sorted.data[i].idx = i;
-               wb->sorted.data[i].btree = wb->flushing.keys.data[i].btree;
-               memcpy(&wb->sorted.data[i].pos, &wb->flushing.keys.data[i].k.k.p, sizeof(struct bpos));
-       }
-       wb->sorted.nr = wb->flushing.keys.nr;
+       s = btree_write_buffer_switch(wb);
+       keys = wb->keys[s.idx];
+       nr = s.nr;
+
+       if (race_fault())
+               goto slowpath;
 
        /*
         * We first sort so that we can detect and skip redundant updates, and
@@ -280,151 +177,111 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
         * If that happens, simply skip the key so we can optimistically insert
         * as many keys as possible in the fast path.
         */
-       wb_sort(wb->sorted.data, wb->sorted.nr);
-
-       darray_for_each(wb->sorted, i) {
-               struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
-
-               for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
-                       prefetch(&wb->flushing.keys.data[n->idx]);
-
-               BUG_ON(!k->journal_seq);
-
-               if (i + 1 < &darray_top(wb->sorted) &&
-                   wb_key_eq(i, i + 1)) {
-                       struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
+       sort(keys, nr, sizeof(keys[0]),
+            btree_write_buffered_key_cmp, NULL);
 
+       for (i = keys; i < keys + nr; i++) {
+               if (i + 1 < keys + nr &&
+                   i[0].btree == i[1].btree &&
+                   bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
                        skipped++;
-                       n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
-                       k->journal_seq = 0;
+                       i->journal_seq = 0;
                        continue;
                }
 
                if (write_locked &&
-                   (iter.path->btree_id != k->btree ||
-                    bpos_gt(k->k.k.p, iter.path->l[0].b->key.k.p))) {
+                   (iter.path->btree_id != i->btree ||
+                    bpos_gt(i->k.k.p, iter.path->l[0].b->key.k.p))) {
                        bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
                        write_locked = false;
                }
 
-               if (!iter.path || iter.path->btree_id != k->btree) {
+               if (!iter.path || iter.path->btree_id != i->btree) {
                        bch2_trans_iter_exit(trans, &iter);
-                       bch2_trans_iter_init(trans, &iter, k->btree, k->k.k.p,
+                       bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p,
                                             BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
                }
 
-               bch2_btree_iter_set_pos(&iter, k->k.k.p);
+               bch2_btree_iter_set_pos(&iter, i->k.k.p);
                iter.path->preserve = false;
 
                do {
-                       if (race_fault()) {
-                               ret = -BCH_ERR_journal_reclaim_would_deadlock;
-                               break;
-                       }
-
-                       ret = wb_flush_one(trans, &iter, k, &write_locked, &fast);
+                       ret = bch2_btree_write_buffer_flush_one(trans, &iter, i, 0,
+                                                               &write_locked, &fast);
                        if (!write_locked)
                                bch2_trans_begin(trans);
                } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
 
-               if (!ret) {
-                       k->journal_seq = 0;
-               } else if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
+               if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
                        slowpath++;
-                       ret = 0;
-               } else
+                       continue;
+               }
+               if (ret)
                        break;
+
+               i->journal_seq = 0;
        }
 
        if (write_locked)
                bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
        bch2_trans_iter_exit(trans, &iter);
 
-       if (ret)
-               goto err;
-
-       if (slowpath) {
-               /*
-                * Flush in the order they were present in the journal, so that
-                * we can release journal pins:
-                * The fastpath zapped the seq of keys that were successfully flushed so
-                * we can skip those here.
-                */
-               trace_write_buffer_flush_slowpath(trans, slowpath, wb->flushing.keys.nr);
-
-               struct btree_write_buffered_key *i;
-               darray_for_each(wb->flushing.keys, i) {
-                       if (!i->journal_seq)
-                               continue;
-
-                       bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
-                                               bch2_btree_write_buffer_journal_flush);
-
-                       bch2_trans_begin(trans);
-
-                       ret = commit_do(trans, NULL, NULL,
-                                       BCH_WATERMARK_reclaim|
-                                       BCH_TRANS_COMMIT_no_check_rw|
-                                       BCH_TRANS_COMMIT_no_enospc|
-                                       BCH_TRANS_COMMIT_no_journal_res|
-                                       BCH_TRANS_COMMIT_journal_reclaim,
-                                       btree_write_buffered_insert(trans, i));
-                       if (ret)
-                               goto err;
-               }
-       }
-err:
+       trace_write_buffer_flush(trans, nr, skipped, fast, wb->size);
+
+       if (slowpath)
+               goto slowpath;
+
        bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
-       trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
-       bch2_journal_pin_drop(j, &wb->flushing.pin);
-       wb->flushing.keys.nr = 0;
+out:
+       bch2_journal_pin_drop(j, &pin);
        return ret;
-}
+slowpath:
+       trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr);
 
-static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq)
-{
-       struct journal *j = &c->journal;
-       struct journal_buf *buf;
-       int ret = 0;
+       /*
+        * Now sort the rest by journal seq and bump the journal pin as we go.
+        * The slowpath zapped the seq of keys that were successfully flushed so
+        * we can skip those here.
+        */
+       sort(keys, nr, sizeof(keys[0]),
+            btree_write_buffered_journal_cmp,
+            NULL);
 
-       mutex_lock(&j->buf_lock);
-       while ((buf = bch2_next_write_buffer_flush_journal_buf(j, seq)))
-               if (bch2_journal_keys_to_write_buffer(c, buf)) {
-                       ret = -ENOMEM;
+       for (i = keys; i < keys + nr; i++) {
+               if (!i->journal_seq)
+                       continue;
+
+               bch2_journal_pin_update(j, i->journal_seq, &pin,
+                             bch2_btree_write_buffer_journal_flush);
+
+               ret = commit_do(trans, NULL, NULL,
+                               BCH_WATERMARK_reclaim|
+                               BCH_TRANS_COMMIT_no_check_rw|
+                               BCH_TRANS_COMMIT_no_enospc|
+                               BCH_TRANS_COMMIT_no_journal_res|
+                               BCH_TRANS_COMMIT_journal_reclaim,
+                               btree_write_buffered_insert(trans, i));
+               if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
                        break;
-               }
-       mutex_unlock(&j->buf_lock);
+       }
 
-       return ret;
+       goto out;
 }
 
 int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
 {
        struct bch_fs *c = trans->c;
-       struct btree_write_buffer *wb = &c->btree_write_buffer;
-       int ret = 0, fetch_from_journal_err;
-
-       trace_write_buffer_flush_sync(trans, _RET_IP_);
-retry:
-       bch2_trans_unlock(trans);
 
-       bch2_journal_block_reservations(&c->journal);
-       fetch_from_journal_err = fetch_wb_keys_from_journal(c, U64_MAX);
-       bch2_journal_unblock(&c->journal);
-
-       /*
-        * On memory allocation failure, bch2_btree_write_buffer_flush_locked()
-        * is not guaranteed to empty wb->inc:
-        */
-       mutex_lock(&wb->flushing.lock);
-       while (!ret &&
-              (wb->flushing.keys.nr || wb->inc.keys.nr))
-               ret = bch2_btree_write_buffer_flush_locked(trans);
-       mutex_unlock(&wb->flushing.lock);
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer))
+               return -BCH_ERR_erofs_no_writes;
 
-       if (!ret && fetch_from_journal_err)
-               goto retry;
+       trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_);
 
+       bch2_trans_unlock(trans);
+       mutex_lock(&c->btree_write_buffer.flush_lock);
+       int ret = bch2_btree_write_buffer_flush_locked(trans);
+       mutex_unlock(&c->btree_write_buffer.flush_lock);
+       bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
        return ret;
 }
 
@@ -434,9 +291,9 @@ int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
        struct btree_write_buffer *wb = &c->btree_write_buffer;
        int ret = 0;
 
-       if (mutex_trylock(&wb->flushing.lock)) {
+       if (mutex_trylock(&wb->flush_lock)) {
                ret = bch2_btree_write_buffer_flush_locked(trans);
-               mutex_unlock(&wb->flushing.lock);
+               mutex_unlock(&wb->flush_lock);
        }
 
        return ret;
@@ -459,195 +316,85 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct btree_write_buffer *wb = &c->btree_write_buffer;
-       int ret, fetch_from_journal_err;
 
-       do {
-               fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq);
-
-               mutex_lock(&wb->flushing.lock);
-               ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
-               mutex_unlock(&wb->flushing.lock);
-       } while (!ret &&
-                (fetch_from_journal_err ||
-                 (wb->flushing.pin.seq && wb->flushing.pin.seq <= seq) ||
-                 (wb->inc.pin.seq && wb->inc.pin.seq <= seq)));
+       mutex_lock(&wb->flush_lock);
+       int ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
+       mutex_unlock(&wb->flush_lock);
 
        return ret;
 }
 
-static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
+static inline u64 btree_write_buffer_ref(int idx)
 {
-       struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
-       struct btree_write_buffer *wb = &c->btree_write_buffer;
-       int ret;
-
-       mutex_lock(&wb->flushing.lock);
-       do {
-               ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
-       } while (!ret && bch2_btree_write_buffer_should_flush(c));
-       mutex_unlock(&wb->flushing.lock);
-
-       bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+       return ((union btree_write_buffer_state) {
+               .ref0 = idx == 0,
+               .ref1 = idx == 1,
+       }).v;
 }
 
-int __bch2_journal_key_to_wb(struct bch_fs *c,
-                            struct journal_keys_to_wb *dst,
-                            enum btree_id btree, struct bkey_i *k)
+int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
 {
+       struct bch_fs *c = trans->c;
        struct btree_write_buffer *wb = &c->btree_write_buffer;
-       int ret;
-retry:
-       ret = darray_make_room_gfp(&dst->wb->keys, 1, GFP_KERNEL);
-       if (!ret && dst->wb == &wb->flushing)
-               ret = darray_resize(&wb->sorted, wb->flushing.keys.size);
-
-       if (unlikely(ret)) {
-               if (dst->wb == &c->btree_write_buffer.flushing) {
-                       mutex_unlock(&dst->wb->lock);
-                       dst->wb = &c->btree_write_buffer.inc;
-                       bch2_journal_pin_add(&c->journal, dst->seq, &dst->wb->pin,
-                                            bch2_btree_write_buffer_journal_flush);
-                       goto retry;
-               }
-
-               return ret;
-       }
-
-       dst->room = darray_room(dst->wb->keys);
-       if (dst->wb == &wb->flushing)
-               dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
-       BUG_ON(!dst->room);
-       BUG_ON(!dst->seq);
-
-       struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
-       wb_k->journal_seq       = dst->seq;
-       wb_k->btree             = btree;
-       bkey_copy(&wb_k->k, k);
-       dst->wb->keys.nr++;
-       dst->room--;
-       return 0;
-}
+       struct btree_write_buffered_key *i;
+       union btree_write_buffer_state old, new;
+       int ret = 0;
+       u64 v;
 
-void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_keys_to_wb *dst, u64 seq)
-{
-       struct btree_write_buffer *wb = &c->btree_write_buffer;
+       trans_for_each_wb_update(trans, i) {
+               EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
 
-       if (mutex_trylock(&wb->flushing.lock)) {
-               mutex_lock(&wb->inc.lock);
-               move_keys_from_inc_to_flushing(wb);
+               i->journal_seq          = trans->journal_res.seq;
+               i->journal_offset       = trans->journal_res.offset;
+       }
 
-               /*
-                * Attempt to skip wb->inc, and add keys directly to
-                * wb->flushing, saving us a copy later:
-                */
+       preempt_disable();
+       v = READ_ONCE(wb->state.v);
+       do {
+               old.v = new.v = v;
 
-               if (!wb->inc.keys.nr) {
-                       dst->wb = &wb->flushing;
-               } else {
-                       mutex_unlock(&wb->flushing.lock);
-                       dst->wb = &wb->inc;
+               new.v += btree_write_buffer_ref(new.idx);
+               new.nr += trans->nr_wb_updates;
+               if (new.nr > wb->size) {
+                       ret = -BCH_ERR_btree_insert_need_flush_buffer;
+                       goto out;
                }
-       } else {
-               mutex_lock(&wb->inc.lock);
-               dst->wb = &wb->inc;
-       }
+       } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
 
-       dst->room = darray_room(dst->wb->keys);
-       if (dst->wb == &wb->flushing)
-               dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
-       dst->seq = seq;
+       memcpy(wb->keys[new.idx] + old.nr,
+              trans->wb_updates,
+              sizeof(trans->wb_updates[0]) * trans->nr_wb_updates);
 
-       bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin,
+       bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin,
                             bch2_btree_write_buffer_journal_flush);
-}
 
-void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
-{
-       struct btree_write_buffer *wb = &c->btree_write_buffer;
-
-       if (!dst->wb->keys.nr)
-               bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
-
-       if (bch2_btree_write_buffer_should_flush(c) &&
-           __bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) &&
-           !queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work))
-               bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
-
-       if (dst->wb == &wb->flushing)
-               mutex_unlock(&wb->flushing.lock);
-       mutex_unlock(&wb->inc.lock);
-}
-
-static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf)
-{
-       struct journal_keys_to_wb dst;
-       struct jset_entry *entry;
-       struct bkey_i *k;
-       int ret = 0;
-
-       bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq));
-
-       for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) {
-               jset_entry_for_each_key(entry, k) {
-                       ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k);
-                       if (ret)
-                               goto out;
-               }
-
-               entry->type = BCH_JSET_ENTRY_btree_keys;
-       }
-
-       buf->need_flush_to_write_buffer = false;
+       atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter);
 out:
-       bch2_journal_keys_to_write_buffer_end(c, &dst);
-       return ret;
-}
-
-static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size)
-{
-       if (wb->keys.size >= new_size)
-               return 0;
-
-       if (!mutex_trylock(&wb->lock))
-               return -EINTR;
-
-       int ret = darray_resize(&wb->keys, new_size);
-       mutex_unlock(&wb->lock);
+       preempt_enable();
        return ret;
 }
 
-int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
-{
-       struct btree_write_buffer *wb = &c->btree_write_buffer;
-
-       return wb_keys_resize(&wb->flushing, new_size) ?:
-               wb_keys_resize(&wb->inc, new_size);
-}
-
 void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
 {
        struct btree_write_buffer *wb = &c->btree_write_buffer;
 
-       BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
-              !bch2_journal_error(&c->journal));
+       BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal));
 
-       darray_exit(&wb->sorted);
-       darray_exit(&wb->flushing.keys);
-       darray_exit(&wb->inc.keys);
+       kvfree(wb->keys[1]);
+       kvfree(wb->keys[0]);
 }
 
 int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
 {
        struct btree_write_buffer *wb = &c->btree_write_buffer;
 
-       mutex_init(&wb->inc.lock);
-       mutex_init(&wb->flushing.lock);
-       INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work);
+       mutex_init(&wb->flush_lock);
+       wb->size = c->opts.btree_write_buffer_size;
 
-       /* Will be resized by journal as needed: */
-       unsigned initial_size = 1 << 16;
+       wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL);
+       wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL);
+       if (!wb->keys[0] || !wb->keys[1])
+               return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init;
 
-       return  darray_make_room(&wb->inc.keys, initial_size) ?:
-               darray_make_room(&wb->flushing.keys, initial_size) ?:
-               darray_make_room(&wb->sorted, initial_size);
+       return 0;
 }
index 1f645f529ed21bf7afba7b67bac3ff1b28df2b1b..dec2c9a8bab2a94cf358fb130e0f89de915884c9 100644 (file)
@@ -2,59 +2,13 @@
 #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
 #define _BCACHEFS_BTREE_WRITE_BUFFER_H
 
-#include "bkey.h"
-
-static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
-{
-       struct btree_write_buffer *wb = &c->btree_write_buffer;
-
-       return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4;
-}
-
-static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
-{
-       struct btree_write_buffer *wb = &c->btree_write_buffer;
-
-       return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4;
-}
-
-struct btree_trans;
-int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
+int bch2_btree_write_buffer_flush_locked(struct btree_trans *);
 int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
+int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
 int bch2_btree_write_buffer_tryflush(struct btree_trans *);
 
-struct journal_keys_to_wb {
-       struct btree_write_buffer_keys  *wb;
-       size_t                          room;
-       u64                             seq;
-};
-
-int __bch2_journal_key_to_wb(struct bch_fs *,
-                            struct journal_keys_to_wb *,
-                            enum btree_id, struct bkey_i *);
-
-static inline int bch2_journal_key_to_wb(struct bch_fs *c,
-                            struct journal_keys_to_wb *dst,
-                            enum btree_id btree, struct bkey_i *k)
-{
-       EBUG_ON(!dst->seq);
-
-       if (unlikely(!dst->room))
-               return __bch2_journal_key_to_wb(c, dst, btree, k);
-
-       struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
-       wb_k->journal_seq       = dst->seq;
-       wb_k->btree             = btree;
-       bkey_copy(&wb_k->k, k);
-       dst->wb->keys.nr++;
-       dst->room--;
-       return 0;
-}
-
-void bch2_journal_keys_to_write_buffer_start(struct bch_fs *, struct journal_keys_to_wb *, u64);
-void bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *);
+int bch2_btree_insert_keys_write_buffer(struct btree_trans *);
 
-int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
 void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
 int bch2_fs_btree_write_buffer_init(struct bch_fs *);
 
index 9b9433de9c3686aa59255858e44411384219bafc..99993ba77aeab01a63470111e84db4c2ebc5afad 100644 (file)
@@ -2,56 +2,43 @@
 #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
 #define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
 
-#include "darray.h"
 #include "journal_types.h"
 
 #define BTREE_WRITE_BUFERED_VAL_U64s_MAX       4
 #define BTREE_WRITE_BUFERED_U64s_MAX   (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
 
-struct wb_key_ref {
-union {
-       struct {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-               unsigned                        idx:24;
-               u8                              pos[sizeof(struct bpos)];
-               enum btree_id                   btree:8;
-#else
-               enum btree_id                   btree:8;
-               u8                              pos[sizeof(struct bpos)];
-               unsigned                        idx:24;
-#endif
-       } __packed;
-       struct {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-               u64 lo;
-               u64 mi;
-               u64 hi;
-#else
-               u64 hi;
-               u64 mi;
-               u64 lo;
-#endif
-       };
-};
-};
-
 struct btree_write_buffered_key {
-       enum btree_id                   btree:8;
-       u64                             journal_seq:56;
+       u64                     journal_seq;
+       unsigned                journal_offset;
+       enum btree_id           btree;
        __BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
 };
 
-struct btree_write_buffer_keys {
-       DARRAY(struct btree_write_buffered_key) keys;
-       struct journal_entry_pin        pin;
-       struct mutex                    lock;
+union btree_write_buffer_state {
+       struct {
+               atomic64_t      counter;
+       };
+
+       struct {
+               u64             v;
+       };
+
+       struct {
+               u64                     nr:23;
+               u64                     idx:1;
+               u64                     ref0:20;
+               u64                     ref1:20;
+       };
 };
 
 struct btree_write_buffer {
-       DARRAY(struct wb_key_ref)       sorted;
-       struct btree_write_buffer_keys  inc;
-       struct btree_write_buffer_keys  flushing;
-       struct work_struct              flush_work;
+       struct mutex                    flush_lock;
+       struct journal_entry_pin        journal_pin;
+
+       union btree_write_buffer_state  state;
+       size_t                          size;
+
+       struct btree_write_buffered_key *keys[2];
 };
 
 #endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
index 50eb6ba2f64b5eb88eecf43eb72716dc08ebb46d..312bd0c86623402d6be837df1fe8298a71dfd5c6 100644 (file)
@@ -334,7 +334,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
        preempt_enable();
 }
 
-struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
+static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
 {
        return (struct bch_alloc_v4) {
                .gen            = b.gen,
@@ -346,13 +346,12 @@ struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
 }
 
 static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
-                                   struct bucket old, struct bucket new,
-                                   u64 journal_seq, bool gc)
+                                   struct bucket old, struct bucket new)
 {
        bch2_dev_usage_update(c, ca,
                              bucket_m_to_alloc(old),
                              bucket_m_to_alloc(new),
-                             journal_seq, gc);
+                             0, true);
 }
 
 static inline int __update_replicas(struct bch_fs *c,
@@ -658,7 +657,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 err:
        bucket_unlock(g);
        if (!ret)
-               bch2_dev_usage_update_m(c, ca, old, new, 0, true);
+               bch2_dev_usage_update_m(c, ca, old, new);
        percpu_up_read(&c->mark_lock);
        return ret;
 }
@@ -773,7 +772,6 @@ static int mark_stripe_bucket(struct btree_trans *trans,
                              unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       u64 journal_seq = trans->journal_res.seq;
        const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
        unsigned nr_data = s->nr_blocks - s->nr_redundant;
        bool parity = ptr_idx >= nr_data;
@@ -820,7 +818,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
 err:
        bucket_unlock(g);
        if (!ret)
-               bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
+               bch2_dev_usage_update_m(c, ca, old, new);
        percpu_up_read(&c->mark_lock);
        printbuf_exit(&buf);
        return ret;
@@ -843,8 +841,12 @@ static int __mark_pointer(struct btree_trans *trans,
                return ret;
 
        *dst_sectors += sectors;
-       *bucket_data_type = *dirty_sectors || *cached_sectors
-               ? ptr_data_type : 0;
+
+       if (!*dirty_sectors && !*cached_sectors)
+               *bucket_data_type = 0;
+       else if (*bucket_data_type != BCH_DATA_stripe)
+               *bucket_data_type = ptr_data_type;
+
        return 0;
 }
 
@@ -855,7 +857,6 @@ static int bch2_mark_pointer(struct btree_trans *trans,
                             s64 sectors,
                             unsigned flags)
 {
-       u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
        struct bucket old, new, *g;
@@ -882,7 +883,7 @@ static int bch2_mark_pointer(struct btree_trans *trans,
        new = *g;
        bucket_unlock(g);
        if (!ret)
-               bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
+               bch2_dev_usage_update_m(c, ca, old, new);
        percpu_up_read(&c->mark_lock);
 
        return ret;
index 118f0c0c4e3049bb7efc91eabd531c1196e61b03..ba0436ae6b05833869fe9989585ffe74f13f82e4 100644 (file)
@@ -418,7 +418,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
        unsigned i;
        int ret = 0;
 
-       if (!test_bit(BCH_FS_STARTED, &c->flags))
+       if (!test_bit(BCH_FS_started, &c->flags))
                return -EINVAL;
 
        if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
@@ -492,7 +492,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
        struct bch_dev *ca;
        unsigned i;
 
-       if (!test_bit(BCH_FS_STARTED, &c->flags))
+       if (!test_bit(BCH_FS_started, &c->flags))
                return -EINVAL;
 
        if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -533,7 +533,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
        struct bch_dev *ca;
        int ret = 0;
 
-       if (!test_bit(BCH_FS_STARTED, &c->flags))
+       if (!test_bit(BCH_FS_started, &c->flags))
                return -EINVAL;
 
        if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -725,7 +725,7 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
                BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
        }
 
-       if (!test_bit(BCH_FS_STARTED, &c->flags))
+       if (!test_bit(BCH_FS_started, &c->flags))
                return -EINVAL;
 
        switch (cmd) {
index 2a02bf00b67fba12d6024b42291f301f60aa4253..bc8b556f19a90fbbd768f360b87b11e4bf7cf86b 100644 (file)
@@ -1005,7 +1005,7 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
        unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
        int ret = 0;
 
-       ret = bch2_btree_write_buffer_flush_sync(trans);
+       ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
        if (ret)
                goto err;
 
@@ -1415,7 +1415,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
        if (ret)
                return ERR_PTR(ret);
 
-       if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
+       if (test_bit(BCH_FS_going_ro, &c->flags)) {
                h = ERR_PTR(-BCH_ERR_erofs_no_writes);
                goto found;
        }
index 4d35e5c6cd3e2cc3e3e2eb6c797a0c7e7ac2a046..e3e2be7922b926bc8c1407db7638daa1103f7365 100644 (file)
        x(BCH_ERR_btree_insert_fail,    btree_insert_need_mark_replicas)        \
        x(BCH_ERR_btree_insert_fail,    btree_insert_need_journal_res)          \
        x(BCH_ERR_btree_insert_fail,    btree_insert_need_journal_reclaim)      \
+       x(BCH_ERR_btree_insert_fail,    btree_insert_need_flush_buffer)         \
        x(0,                            backpointer_to_overwritten_btree_node)  \
        x(0,                            lock_fail_root_changed)                 \
        x(0,                            journal_reclaim_would_deadlock)         \
index 7b28d37922fd0e47d82ac1d27403f031cc577c7b..655e3ba9bfd2c09f221011ae16c38ba9f101e6ff 100644 (file)
@@ -7,7 +7,7 @@
 
 bool bch2_inconsistent_error(struct bch_fs *c)
 {
-       set_bit(BCH_FS_ERROR, &c->flags);
+       set_bit(BCH_FS_error, &c->flags);
 
        switch (c->opts.errors) {
        case BCH_ON_ERROR_continue:
@@ -26,8 +26,8 @@ bool bch2_inconsistent_error(struct bch_fs *c)
 
 void bch2_topology_error(struct bch_fs *c)
 {
-       set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags);
-       if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
+       set_bit(BCH_FS_topology_error, &c->flags);
+       if (test_bit(BCH_FS_fsck_done, &c->flags))
                bch2_inconsistent_error(c);
 }
 
@@ -114,7 +114,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
 {
        struct fsck_err_state *s;
 
-       if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
+       if (test_bit(BCH_FS_fsck_done, &c->flags))
                return NULL;
 
        list_for_each_entry(s, &c->fsck_error_msgs, list)
@@ -193,7 +193,7 @@ int bch2_fsck_err(struct bch_fs *c,
                prt_printf(out, bch2_log_msg(c, ""));
 #endif
 
-       if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
+       if (test_bit(BCH_FS_fsck_done, &c->flags)) {
                if (c->opts.errors != BCH_ON_ERROR_continue ||
                    !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
                        prt_str(out, ", shutting down");
@@ -253,7 +253,7 @@ int bch2_fsck_err(struct bch_fs *c,
        if (print)
                bch2_print_string_as_lines(KERN_ERR, out->buf);
 
-       if (!test_bit(BCH_FS_FSCK_DONE, &c->flags) &&
+       if (!test_bit(BCH_FS_fsck_done, &c->flags) &&
            (ret != -BCH_ERR_fsck_fix &&
             ret != -BCH_ERR_fsck_ignore))
                bch_err(c, "Unable to continue, halting");
@@ -271,10 +271,10 @@ int bch2_fsck_err(struct bch_fs *c,
                bch2_inconsistent_error(c);
 
        if (ret == -BCH_ERR_fsck_fix) {
-               set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
+               set_bit(BCH_FS_errors_fixed, &c->flags);
        } else {
-               set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
-               set_bit(BCH_FS_ERROR, &c->flags);
+               set_bit(BCH_FS_errors_not_fixed, &c->flags);
+               set_bit(BCH_FS_error, &c->flags);
        }
 
        return ret;
index 52f0e7acda3d81ce043672b428db4432cdcebeb2..637a83e4d961bf7b623617421543d9148329ec6e 100644 (file)
@@ -638,7 +638,7 @@ do_io:
                /* Check for writing past i_size: */
                WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
                          round_up(i_size, block_bytes(c)) &&
-                         !test_bit(BCH_FS_EMERGENCY_RO, &c->flags),
+                         !test_bit(BCH_FS_emergency_ro, &c->flags),
                          "writing past i_size: %llu > %llu (unrounded %llu)\n",
                          bio_end_sector(&w->io->op.wbio.bio) << 9,
                          round_up(i_size, block_bytes(c)),
index b1c89a4821f5e176f4b7b96710f9fc8de0a236e5..8cf4bcf9be897185a9cd89527362f9739772e9a5 100644 (file)
@@ -448,7 +448,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
                                bch2_btree_id_str(btree_id),
                                pos.inode, pos.offset,
                                i->id, n.id, n.equiv);
-                       set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
+                       set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
                        return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots);
                }
        }
index a2c96f7c193f96bec34e041bfa0abbff5cf7620f..b861ab2e27bd9b322543fd978fdd29e19297b231 100644 (file)
@@ -1173,7 +1173,7 @@ again:
                        break;
 
                if (ret) {
-                       if (!test_bit(BCH_FS_RW, &c->flags)) {
+                       if (!test_bit(BCH_FS_rw, &c->flags)) {
                                bch2_trans_unlock(trans);
                                bch2_fs_lazy_rw(c);
                        }
index 86b148d9bea343ddc4c5bcf7bee26fdaf866209d..d5540c8568fffb7ee0af03cd6b514eabe979f119 100644 (file)
@@ -10,7 +10,6 @@
 #include "bkey_methods.h"
 #include "btree_gc.h"
 #include "btree_update.h"
-#include "btree_write_buffer.h"
 #include "buckets.h"
 #include "error.h"
 #include "journal.h"
@@ -148,7 +147,6 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
                bch2_journal_reclaim_fast(j);
        if (write)
                closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
-       wake_up(&j->wait);
 }
 
 /*
@@ -332,7 +330,6 @@ static int journal_entry_open(struct journal *j)
        buf->must_flush = false;
        buf->separate_flush = false;
        buf->flush_time = 0;
-       buf->need_flush_to_write_buffer = true;
 
        memset(buf->data, 0, sizeof(*buf->data));
        buf->data->seq  = cpu_to_le64(journal_cur_seq(j));
@@ -769,75 +766,6 @@ void bch2_journal_block(struct journal *j)
        journal_quiesce(j);
 }
 
-/*
- * XXX: ideally this would not be closing the current journal entry, but
- * otherwise we do not have a way to avoid racing with res_get() - j->blocked
- * will race.
- */
-static bool journal_reservations_stopped(struct journal *j)
-{
-       union journal_res_state s;
-
-       journal_entry_close(j);
-
-       s.v = atomic64_read_acquire(&j->reservations.counter);
-
-       return  s.buf0_count == 0 &&
-               s.buf1_count == 0 &&
-               s.buf2_count == 0 &&
-               s.buf3_count == 0;
-}
-
-void bch2_journal_block_reservations(struct journal *j)
-{
-       spin_lock(&j->lock);
-       j->blocked++;
-       spin_unlock(&j->lock);
-
-       wait_event(j->wait, journal_reservations_stopped(j));
-}
-
-static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
-{
-       spin_lock(&j->lock);
-       max_seq = min(max_seq, journal_cur_seq(j));
-
-       for (u64 seq = journal_last_unwritten_seq(j);
-            seq <= max_seq;
-            seq++) {
-               unsigned idx = seq & JOURNAL_BUF_MASK;
-               struct journal_buf *buf = j->buf + idx;
-               union journal_res_state s;
-
-               if (!buf->need_flush_to_write_buffer)
-                       continue;
-
-               if (seq == journal_cur_seq(j))
-                       __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
-
-               s.v = atomic64_read_acquire(&j->reservations.counter);
-
-               if (journal_state_count(s, idx)) {
-                       spin_unlock(&j->lock);
-                       return ERR_PTR(-EAGAIN);
-               }
-
-               spin_unlock(&j->lock);
-               return buf;
-       }
-
-       spin_unlock(&j->lock);
-       return NULL;
-}
-
-struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
-{
-       struct journal_buf *ret;
-
-       wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN));
-       return ret;
-}
-
 /* allocate journal on a device: */
 
 static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
@@ -1289,7 +1217,6 @@ int bch2_fs_journal_init(struct journal *j)
        static struct lock_class_key res_key;
        unsigned i;
 
-       mutex_init(&j->buf_lock);
        spin_lock_init(&j->lock);
        spin_lock_init(&j->err_lock);
        init_waitqueue_head(&j->wait);
index b5185f97af0f12100300d6366e0228a62971b0dc..c85d01cf49484984d08d20a2159f84b2506f96a1 100644 (file)
@@ -259,7 +259,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
 {
        union journal_res_state s;
 
-       s.v = atomic64_sub_return_release(((union journal_res_state) {
+       s.v = atomic64_sub_return(((union journal_res_state) {
                                    .buf0_count = idx == 0,
                                    .buf1_count = idx == 1,
                                    .buf2_count = idx == 2,
@@ -427,8 +427,6 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
 
 void bch2_journal_unblock(struct journal *);
 void bch2_journal_block(struct journal *);
-void bch2_journal_block_reservations(struct journal *);
-struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq);
 
 void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
 void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
index c2a655235fb25e284dd698504334493d525a88f1..fe4565fcd67119dfec259dd1569e72a5b3adb818 100644 (file)
@@ -4,7 +4,6 @@
 #include "alloc_foreground.h"
 #include "btree_io.h"
 #include "btree_update_interior.h"
-#include "btree_write_buffer.h"
 #include "buckets.h"
 #include "checksum.h"
 #include "disk_groups.h"
@@ -722,22 +721,6 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs
        journal_entry_btree_keys_to_text(out, c, entry);
 }
 
-static int journal_entry_write_buffer_keys_validate(struct bch_fs *c,
-                               struct jset *jset,
-                               struct jset_entry *entry,
-                               unsigned version, int big_endian,
-                               enum bkey_invalid_flags flags)
-{
-       return journal_entry_btree_keys_validate(c, jset, entry,
-                               version, big_endian, READ);
-}
-
-static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c,
-                                           struct jset_entry *entry)
-{
-       journal_entry_btree_keys_to_text(out, c, entry);
-}
-
 struct jset_entry_ops {
        int (*validate)(struct bch_fs *, struct jset *,
                        struct jset_entry *, unsigned, int,
@@ -1518,8 +1501,6 @@ done:
 
 static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
 {
-       struct bch_fs *c = container_of(j, struct bch_fs, journal);
-
        /* we aren't holding j->lock: */
        unsigned new_size = READ_ONCE(j->buf_size_want);
        void *new_buf;
@@ -1527,11 +1508,6 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
        if (buf->buf_size >= new_size)
                return;
 
-       size_t btree_write_buffer_size = new_size / 64;
-
-       if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size))
-               return;
-
        new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
        if (!new_buf)
                return;
@@ -1621,7 +1597,6 @@ static CLOSURE_CALLBACK(journal_write_done)
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
 
-       bch2_journal_reclaim_fast(j);
        bch2_journal_space_available(j);
 
        track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
@@ -1725,11 +1700,9 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct jset_entry *start, *end, *i, *next, *prev = NULL;
        struct jset *jset = w->data;
-       struct journal_keys_to_wb wb = { NULL };
        unsigned sectors, bytes, u64s;
-       unsigned long btree_roots_have = 0;
        bool validate_before_checksum = false;
-       u64 seq = le64_to_cpu(jset->seq);
+       unsigned long btree_roots_have = 0;
        int ret;
 
        /*
@@ -1757,28 +1730,9 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
                 * to c->btree_roots we have to get any missing btree roots and
                 * add them to this journal entry:
                 */
-               switch (i->type) {
-               case BCH_JSET_ENTRY_btree_root:
+               if (i->type == BCH_JSET_ENTRY_btree_root) {
                        bch2_journal_entry_to_btree_root(c, i);
                        __set_bit(i->btree_id, &btree_roots_have);
-                       break;
-               case BCH_JSET_ENTRY_write_buffer_keys:
-                       EBUG_ON(!w->need_flush_to_write_buffer);
-
-                       if (!wb.wb)
-                               bch2_journal_keys_to_write_buffer_start(c, &wb, seq);
-
-                       struct bkey_i *k;
-                       jset_entry_for_each_key(i, k) {
-                               ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
-                               if (ret) {
-                                       bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
-                                       bch2_journal_keys_to_write_buffer_end(c, &wb);
-                                       return ret;
-                               }
-                       }
-                       i->type = BCH_JSET_ENTRY_btree_keys;
-                       break;
                }
 
                /* Can we merge with previous entry? */
@@ -1801,10 +1755,6 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
                        memmove_u64s_down(prev, i, jset_u64s(u64s));
        }
 
-       if (wb.wb)
-               bch2_journal_keys_to_write_buffer_end(c, &wb);
-       w->need_flush_to_write_buffer = false;
-
        prev = prev ? vstruct_next(prev) : jset->start;
        jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
 
@@ -1812,7 +1762,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
 
        end     = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
 
-       bch2_journal_super_entries_add_common(c, &end, seq);
+       bch2_journal_super_entries_add_common(c, &end,
+                               le64_to_cpu(jset->seq));
        u64s    = (u64 *) end - (u64 *) start;
        BUG_ON(u64s > j->entry_u64s_reserved);
 
@@ -1835,7 +1786,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
        SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
 
        if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset))
-               j->last_empty_seq = seq;
+               j->last_empty_seq = le64_to_cpu(jset->seq);
 
        if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
                validate_before_checksum = true;
@@ -1931,11 +1882,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
        if (ret)
                goto err;
 
-       mutex_lock(&j->buf_lock);
        journal_buf_realloc(j, w);
 
        ret = bch2_journal_write_prep(j, w);
-       mutex_unlock(&j->buf_lock);
        if (ret)
                goto err;
 
index 2aa4c0c6bbba480da77dd41fe59843f0d4f1c0f9..658aaa2c3c6d753cc92f9a494466ddcef6181d31 100644 (file)
@@ -3,7 +3,6 @@
 #include "bcachefs.h"
 #include "btree_key_cache.h"
 #include "btree_update.h"
-#include "btree_write_buffer.h"
 #include "buckets.h"
 #include "errcode.h"
 #include "error.h"
@@ -51,23 +50,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
        return available;
 }
 
-void bch2_journal_set_watermark(struct journal *j)
+static inline void journal_set_watermark(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        bool low_on_space = j->space[journal_space_clean].total * 4 <=
                j->space[journal_space_total].total;
        bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4;
-       bool low_on_wb = bch2_btree_write_buffer_must_wait(c);
-       unsigned watermark = low_on_space || low_on_pin || low_on_wb
+       unsigned watermark = low_on_space || low_on_pin
                ? BCH_WATERMARK_reclaim
                : BCH_WATERMARK_stripe;
 
        if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
                               &j->low_on_space_start, low_on_space) ||
            track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
-                              &j->low_on_pin_start, low_on_pin) ||
-           track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full],
-                              &j->write_buffer_full_start, low_on_wb))
+                              &j->low_on_pin_start, low_on_pin))
                trace_and_count(c, journal_full, c);
 
        swap(watermark, j->watermark);
@@ -234,7 +230,7 @@ void bch2_journal_space_available(struct journal *j)
        else
                clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
 
-       bch2_journal_set_watermark(j);
+       journal_set_watermark(j);
 out:
        j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        j->cur_entry_error      = ret;
@@ -307,7 +303,6 @@ void bch2_journal_reclaim_fast(struct journal *j)
         * all btree nodes got written out
         */
        while (!fifo_empty(&j->pin) &&
-              j->pin.front <= j->seq_ondisk &&
               !atomic_read(&fifo_peek_front(&j->pin).count)) {
                j->pin.front++;
                popped = true;
index ec84c334528177e8c865ebdbf9b9d7e265270718..7b15d682a0f51d28c47f7d881edb1b08ca24d10c 100644 (file)
@@ -16,7 +16,6 @@ static inline void journal_reclaim_kick(struct journal *j)
 unsigned bch2_journal_dev_buckets_available(struct journal *,
                                            struct journal_device *,
                                            enum journal_space_from);
-void bch2_journal_set_watermark(struct journal *);
 void bch2_journal_space_available(struct journal *);
 
 static inline bool journal_pin_active(struct journal_entry_pin *pin)
index f9d9aa95bf3a64640d3d1e6012fc319ca7aad05e..0200e299cfbb9c210d144bb056f1e85a910fe70f 100644 (file)
@@ -267,7 +267,7 @@ retry:
 
                while (!(ret = PTR_ERR_OR_ZERO(b)) &&
                       b &&
-                      !test_bit(BCH_FS_STOPPING, &c->flags))
+                      !test_bit(BCH_FS_stopping, &c->flags))
                        b = bch2_btree_iter_next_node(&iter);
 
                if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
index 85c543af60e5143a3a8546650906b09242026ee8..2427cce64fed93388214c3de8b6446875eaf01b6 100644 (file)
@@ -36,7 +36,6 @@ struct journal_buf {
        bool                    noflush;        /* write has already been kicked off, and was noflush */
        bool                    must_flush;     /* something wants a flush */
        bool                    separate_flush;
-       bool                    need_flush_to_write_buffer;
 };
 
 /*
@@ -182,12 +181,6 @@ struct journal {
         */
        darray_u64              early_journal_entries;
 
-       /*
-        * Protects journal_buf->data, when accessing without a jorunal
-        * reservation: for synchronization between the btree write buffer code
-        * and the journal write path:
-        */
-       struct mutex            buf_lock;
        /*
         * Two journal entries -- one is currently open for new entries, the
         * other is possibly being written out.
@@ -278,7 +271,6 @@ struct journal {
        u64                     low_on_space_start;
        u64                     low_on_pin_start;
        u64                     max_in_flight_start;
-       u64                     write_buffer_full_start;
 
        struct bch2_time_stats  *flush_write_time;
        struct bch2_time_stats  *noflush_write_time;
index 5340f2d0eebaa70e047417fd98169ff4c8df2110..82c08a987c69cc69d974d702dec9ded8906d0419 100644 (file)
@@ -123,9 +123,11 @@ int bch2_check_lru_key(struct btree_trans *trans,
        if (lru_k.k->type != KEY_TYPE_set ||
            lru_pos_time(lru_k.k->p) != idx) {
                if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
-                       *last_flushed_pos = lru_k.k->p;
-                       ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-                               -BCH_ERR_transaction_restart_write_buffer_flush;
+                       ret = bch2_btree_write_buffer_flush_sync(trans);
+                       if (!ret) {
+                               *last_flushed_pos = lru_k.k->p;
+                               ret = -BCH_ERR_transaction_restart_write_buffer_flush;
+                       }
                        goto out;
                }
 
index c5518a86627642addb83299fad23a5604e6184e6..db14ec3769287763bce168b13fb80723003b6352 100644 (file)
@@ -56,17 +56,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
        }
 }
 
-static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
-{
-       if (trace_move_extent_alloc_mem_fail_enabled()) {
-               struct printbuf buf = PRINTBUF;
-
-               bch2_bkey_val_to_text(&buf, c, k);
-               trace_move_extent_alloc_mem_fail(c, buf.buf);
-               printbuf_exit(&buf);
-       }
-}
-
 struct moving_io {
        struct list_head                read_list;
        struct list_head                io_list;
@@ -356,8 +345,16 @@ err:
        if (ret == -BCH_ERR_data_update_done)
                return 0;
 
-       this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
-       trace_move_extent_alloc_mem_fail2(c, k);
+       this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]);
+       if (trace_move_extent_start_fail_enabled()) {
+               struct printbuf buf = PRINTBUF;
+
+               bch2_bkey_val_to_text(&buf, c, k);
+               prt_str(&buf, ": ");
+               prt_str(&buf, bch2_err_str(ret));
+               trace_move_extent_start_fail(c, buf.buf);
+               printbuf_exit(&buf);
+       }
        return ret;
 }
 
index b7f9990c58485d6416d45a138f0223f3d08fef17..8526f177450a56900c907a2e4cba3950fe5f9e00 100644 (file)
@@ -233,6 +233,11 @@ enum fsck_err_opts {
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               true,                           \
          NULL,         "Stash pointer to in memory btree node in btree ptr")\
+       x(btree_write_buffer_size, u32,                                 \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_UINT(16, (1U << 20) - 1),                                 \
+         BCH2_NO_SB_OPT,               1U << 13,                       \
+         NULL,         "Number of btree write buffer entries")         \
        x(gc_reserve_percent,           u8,                             \
          OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                      \
          OPT_UINT(5, 21),                                              \
index 5f4f76e67cdda46755c9e16eb5e8b1720af98f19..3f8c3ba105a28dc74c1011a9519312f8e9465f52 100644 (file)
@@ -530,7 +530,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
        move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
        keys->gap = keys->nr;
 
-       set_bit(BCH_FS_MAY_GO_RW, &c->flags);
+       set_bit(BCH_FS_may_go_rw, &c->flags);
        if (keys->nr)
                return bch2_fs_read_write_early(c);
        return 0;
@@ -876,13 +876,13 @@ use_clean:
 
        /* If we fixed errors, verify that fs is actually clean now: */
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
-           test_bit(BCH_FS_ERRORS_FIXED, &c->flags) &&
-           !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) &&
-           !test_bit(BCH_FS_ERROR, &c->flags)) {
+           test_bit(BCH_FS_errors_fixed, &c->flags) &&
+           !test_bit(BCH_FS_errors_not_fixed, &c->flags) &&
+           !test_bit(BCH_FS_error, &c->flags)) {
                bch2_flush_fsck_errs(c);
 
                bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
-               clear_bit(BCH_FS_ERRORS_FIXED, &c->flags);
+               clear_bit(BCH_FS_errors_fixed, &c->flags);
 
                c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
 
@@ -890,13 +890,13 @@ use_clean:
                if (ret)
                        goto err;
 
-               if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags) ||
-                   test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
+               if (test_bit(BCH_FS_errors_fixed, &c->flags) ||
+                   test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
                        bch_err(c, "Second fsck run was not clean");
-                       set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
+                       set_bit(BCH_FS_errors_not_fixed, &c->flags);
                }
 
-               set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
+               set_bit(BCH_FS_errors_fixed, &c->flags);
        }
 
        if (enabled_qtypes(c)) {
@@ -913,14 +913,14 @@ use_clean:
                write_sb = true;
        }
 
-       if (!test_bit(BCH_FS_ERROR, &c->flags)) {
+       if (!test_bit(BCH_FS_error, &c->flags)) {
                c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
                write_sb = true;
        }
 
        if (c->opts.fsck &&
-           !test_bit(BCH_FS_ERROR, &c->flags) &&
-           !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
+           !test_bit(BCH_FS_error, &c->flags) &&
+           !test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
                SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
                SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0);
                write_sb = true;
@@ -954,7 +954,7 @@ use_clean:
 
        ret = 0;
 out:
-       set_bit(BCH_FS_FSCK_DONE, &c->flags);
+       set_bit(BCH_FS_fsck_done, &c->flags);
        bch2_flush_fsck_errs(c);
 
        if (!c->opts.keep_journal &&
@@ -962,7 +962,7 @@ out:
                bch2_journal_keys_put_initial(c);
        kfree(clean);
 
-       if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
+       if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) {
                bch2_fs_read_write_early(c);
                bch2_delete_dead_snapshots_async(c);
        }
@@ -1001,8 +1001,8 @@ int bch2_fs_initialize(struct bch_fs *c)
        mutex_unlock(&c->sb_lock);
 
        c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
-       set_bit(BCH_FS_MAY_GO_RW, &c->flags);
-       set_bit(BCH_FS_FSCK_DONE, &c->flags);
+       set_bit(BCH_FS_may_go_rw, &c->flags);
+       set_bit(BCH_FS_fsck_done, &c->flags);
 
        for (i = 0; i < BTREE_ID_NR; i++)
                bch2_btree_root_alloc(c, i);
index b23550b4409814baa9010d637cbed885df6a4482..e473c788fd64fa2b0d27e2d3273cbd0d3ed131a5 100644 (file)
@@ -318,7 +318,7 @@ int bch2_mark_snapshot(struct btree_trans *trans,
                __set_is_ancestor_bitmap(c, id);
 
                if (BCH_SNAPSHOT_DELETED(s.v)) {
-                       set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
+                       set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
                        if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
                                bch2_delete_dead_snapshots_async(c);
                }
@@ -1376,10 +1376,10 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
        u32 *i, id;
        int ret = 0;
 
-       if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags))
+       if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
                return 0;
 
-       if (!test_bit(BCH_FS_STARTED, &c->flags)) {
+       if (!test_bit(BCH_FS_started, &c->flags)) {
                ret = bch2_fs_read_write_early(c);
                if (ret) {
                        bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
@@ -1680,7 +1680,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct
        if (BCH_SNAPSHOT_DELETED(snap.v) ||
            bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
            (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
-               set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
+               set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
                return 0;
        }
 
index 512d5665786420dab9763995d3955441e3a91a3f..136c01403c95465c23488504c0f2e92798bd10e2 100644 (file)
@@ -916,9 +916,9 @@ int bch2_write_super(struct bch_fs *c)
 
        le64_add_cpu(&c->disk_sb.sb->seq, 1);
 
-       if (test_bit(BCH_FS_ERROR, &c->flags))
+       if (test_bit(BCH_FS_error, &c->flags))
                SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
-       if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags))
+       if (test_bit(BCH_FS_topology_error, &c->flags))
                SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1);
 
        SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
index 552d55dd963f8525ee9ee127118ca2d1177847df..e7f186b45df103ad51662a1497efb92184a3ba61 100644 (file)
@@ -73,6 +73,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
 MODULE_DESCRIPTION("bcachefs filesystem");
 
+const char * const bch2_fs_flag_strs[] = {
+#define x(n)           #n,
+       BCH_FS_FLAGS()
+#undef x
+       NULL
+};
+
 #define KTYPE(type)                                                    \
 static const struct attribute_group type ## _group = {                 \
        .attrs = type ## _files                                         \
@@ -240,8 +247,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
                    journal_cur_seq(&c->journal));
 
        if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
-           !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-               set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
+           !test_bit(BCH_FS_emergency_ro, &c->flags))
+               set_bit(BCH_FS_clean_shutdown, &c->flags);
        bch2_fs_journal_stop(&c->journal);
 
        /*
@@ -256,19 +263,19 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
 {
        struct bch_fs *c = container_of(writes, struct bch_fs, writes);
 
-       set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+       set_bit(BCH_FS_write_disable_complete, &c->flags);
        wake_up(&bch2_read_only_wait);
 }
 #endif
 
 void bch2_fs_read_only(struct bch_fs *c)
 {
-       if (!test_bit(BCH_FS_RW, &c->flags)) {
+       if (!test_bit(BCH_FS_rw, &c->flags)) {
                bch2_journal_reclaim_stop(&c->journal);
                return;
        }
 
-       BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
+       BUG_ON(test_bit(BCH_FS_write_disable_complete, &c->flags));
 
        bch_verbose(c, "going read-only");
 
@@ -276,7 +283,7 @@ void bch2_fs_read_only(struct bch_fs *c)
         * Block new foreground-end write operations from starting - any new
         * writes will return -EROFS:
         */
-       set_bit(BCH_FS_GOING_RO, &c->flags);
+       set_bit(BCH_FS_going_ro, &c->flags);
 #ifndef BCH_WRITE_REF_DEBUG
        percpu_ref_kill(&c->writes);
 #else
@@ -296,36 +303,35 @@ void bch2_fs_read_only(struct bch_fs *c)
         * that going RO is complete:
         */
        wait_event(bch2_read_only_wait,
-                  test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
-                  test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
+                  test_bit(BCH_FS_write_disable_complete, &c->flags) ||
+                  test_bit(BCH_FS_emergency_ro, &c->flags));
 
-       bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+       bool writes_disabled = test_bit(BCH_FS_write_disable_complete, &c->flags);
        if (writes_disabled)
                bch_verbose(c, "finished waiting for writes to stop");
 
        __bch2_fs_read_only(c);
 
        wait_event(bch2_read_only_wait,
-                  test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
+                  test_bit(BCH_FS_write_disable_complete, &c->flags));
 
        if (!writes_disabled)
                bch_verbose(c, "finished waiting for writes to stop");
 
-       clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-       clear_bit(BCH_FS_GOING_RO, &c->flags);
-       clear_bit(BCH_FS_RW, &c->flags);
+       clear_bit(BCH_FS_write_disable_complete, &c->flags);
+       clear_bit(BCH_FS_going_ro, &c->flags);
+       clear_bit(BCH_FS_rw, &c->flags);
 
        if (!bch2_journal_error(&c->journal) &&
-           !test_bit(BCH_FS_ERROR, &c->flags) &&
-           !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
-           test_bit(BCH_FS_STARTED, &c->flags) &&
-           test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) &&
+           !test_bit(BCH_FS_error, &c->flags) &&
+           !test_bit(BCH_FS_emergency_ro, &c->flags) &&
+           test_bit(BCH_FS_started, &c->flags) &&
+           test_bit(BCH_FS_clean_shutdown, &c->flags) &&
            !c->opts.norecovery) {
                BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
                BUG_ON(atomic_read(&c->btree_cache.dirty));
                BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
-               BUG_ON(c->btree_write_buffer.inc.keys.nr);
-               BUG_ON(c->btree_write_buffer.flushing.keys.nr);
+               BUG_ON(c->btree_write_buffer.state.nr);
 
                bch_verbose(c, "marking filesystem clean");
                bch2_fs_mark_clean(c);
@@ -351,7 +357,7 @@ static void bch2_fs_read_only_async(struct bch_fs *c)
 
 bool bch2_fs_emergency_read_only(struct bch_fs *c)
 {
-       bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
+       bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags);
 
        bch2_journal_halt(&c->journal);
        bch2_fs_read_only_async(c);
@@ -392,12 +398,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
        unsigned i;
        int ret;
 
-       if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) {
+       if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) {
                bch_err(c, "cannot go rw, unfixed btree errors");
                return -BCH_ERR_erofs_unfixed_errors;
        }
 
-       if (test_bit(BCH_FS_RW, &c->flags))
+       if (test_bit(BCH_FS_rw, &c->flags))
                return 0;
 
        if (c->opts.norecovery)
@@ -420,7 +426,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
        if (ret)
                goto err;
 
-       clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
+       clear_bit(BCH_FS_clean_shutdown, &c->flags);
 
        /*
         * First journal write must be a flush write: after a clean shutdown we
@@ -434,8 +440,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
 
-       set_bit(BCH_FS_RW, &c->flags);
-       set_bit(BCH_FS_WAS_RW, &c->flags);
+       set_bit(BCH_FS_rw, &c->flags);
+       set_bit(BCH_FS_was_rw, &c->flags);
 
 #ifndef BCH_WRITE_REF_DEBUG
        percpu_ref_reinit(&c->writes);
@@ -468,7 +474,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
        bch2_do_pending_node_rewrites(c);
        return 0;
 err:
-       if (test_bit(BCH_FS_RW, &c->flags))
+       if (test_bit(BCH_FS_rw, &c->flags))
                bch2_fs_read_only(c);
        else
                __bch2_fs_read_only(c);
@@ -568,7 +574,7 @@ void __bch2_fs_stop(struct bch_fs *c)
 
        bch_verbose(c, "shutting down");
 
-       set_bit(BCH_FS_STOPPING, &c->flags);
+       set_bit(BCH_FS_stopping, &c->flags);
 
        cancel_work_sync(&c->journal_seq_blacklist_gc_work);
 
@@ -960,7 +966,7 @@ int bch2_fs_start(struct bch_fs *c)
 
        down_write(&c->state_lock);
 
-       BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
+       BUG_ON(test_bit(BCH_FS_started, &c->flags));
 
        mutex_lock(&c->sb_lock);
 
@@ -995,12 +1001,12 @@ int bch2_fs_start(struct bch_fs *c)
                goto err;
        }
 
-       set_bit(BCH_FS_STARTED, &c->flags);
+       set_bit(BCH_FS_started, &c->flags);
 
        if (c->opts.read_only || c->opts.nochanges) {
                bch2_fs_read_only(c);
        } else {
-               ret = !test_bit(BCH_FS_RW, &c->flags)
+               ret = !test_bit(BCH_FS_rw, &c->flags)
                        ? bch2_fs_read_write(c)
                        : bch2_fs_read_write_late(c);
                if (ret)
index bf762df18012b1a1b463724d665551506fc74384..dada09331d2eb78e4f2e40841ed6f2de1d88f453 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/math64.h>
 
+extern const char * const bch2_fs_flag_strs[];
+
 struct bch_fs *bch2_dev_to_fs(dev_t);
 struct bch_fs *bch2_uuid_to_fs(__uuid_t);
 
@@ -37,8 +39,8 @@ int bch2_fs_read_write_early(struct bch_fs *);
  */
 static inline void bch2_fs_lazy_rw(struct bch_fs *c)
 {
-       if (!test_bit(BCH_FS_RW, &c->flags) &&
-           !test_bit(BCH_FS_WAS_RW, &c->flags))
+       if (!test_bit(BCH_FS_rw, &c->flags) &&
+           !test_bit(BCH_FS_was_rw, &c->flags))
                bch2_fs_read_write_early(c);
 }
 
index 7223418d3bb427d36921e957d59a584d7a9691e9..4a7c93bcf2f4929ae2d172a46a4313f07a854a6e 100644 (file)
@@ -145,6 +145,7 @@ rw_attribute(gc_gens_pos);
 
 read_attribute(uuid);
 read_attribute(minor);
+read_attribute(flags);
 read_attribute(bucket_size);
 read_attribute(first_bucket);
 read_attribute(nbuckets);
@@ -268,7 +269,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
 
        memset(s, 0, sizeof(s));
 
-       if (!test_bit(BCH_FS_STARTED, &c->flags))
+       if (!test_bit(BCH_FS_started, &c->flags))
                return -EPERM;
 
        trans = bch2_trans_get(c);
@@ -384,6 +385,9 @@ SHOW(bch2_fs)
        sysfs_print(minor,                      c->minor);
        sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
 
+       if (attr == &sysfs_flags)
+               prt_bitflags(out, bch2_fs_flag_strs, c->flags);
+
        sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
 
        if (attr == &sysfs_btree_write_stats)
@@ -416,7 +420,7 @@ SHOW(bch2_fs)
                bch2_btree_updates_to_text(out, c);
 
        if (attr == &sysfs_btree_cache)
-               bch2_btree_cache_to_text(out, &c->btree_cache);
+               bch2_btree_cache_to_text(out, c);
 
        if (attr == &sysfs_btree_key_cache)
                bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
@@ -497,12 +501,12 @@ STORE(bch2_fs)
 
        /* Debugging: */
 
-       if (!test_bit(BCH_FS_STARTED, &c->flags))
+       if (!test_bit(BCH_FS_started, &c->flags))
                return -EPERM;
 
        /* Debugging: */
 
-       if (!test_bit(BCH_FS_RW, &c->flags))
+       if (!test_bit(BCH_FS_rw, &c->flags))
                return -EROFS;
 
        if (attr == &sysfs_prune_cache) {
@@ -634,6 +638,7 @@ STORE(bch2_fs_internal)
 SYSFS_OPS(bch2_fs_internal);
 
 struct attribute *bch2_fs_internal_files[] = {
+       &sysfs_flags,
        &sysfs_journal_debug,
        &sysfs_btree_updates,
        &sysfs_btree_cache,
index 4980cfdd13b355c8558adb83187b009c3cba2272..6eced95ce3748676f3c399470d10bcfb61d633e1 100644 (file)
@@ -775,9 +775,9 @@ TRACE_EVENT(move_extent_fail,
        TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
 );
 
-DEFINE_EVENT(str, move_extent_alloc_mem_fail,
-       TP_PROTO(struct bch_fs *c, const char *k),
-       TP_ARGS(c, k)
+DEFINE_EVENT(str, move_extent_start_fail,
+       TP_PROTO(struct bch_fs *c, const char *str),
+       TP_ARGS(c, str)
 );
 
 TRACE_EVENT(move_data,