]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 26c226917f bcachefs: Start/stop io clock hands in read...
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 17 Oct 2020 22:16:50 +0000 (18:16 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Sat, 17 Oct 2020 22:17:36 +0000 (18:17 -0400)
19 files changed:
.bcachefs_revision
cmd_migrate.c
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/btree_gc.c
libbcachefs/btree_types.h
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/ec.c
libbcachefs/ec.h
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/io.c
libbcachefs/io.h
libbcachefs/move.c
libbcachefs/recovery.c
libbcachefs/super.c
libbcachefs/super.h

index d4dc4eade46ee979b6afc002c4966c4409f9d268..59bf491a458edd91a279004ed6aa7bce7a2dd35b 100644 (file)
@@ -1 +1 @@
-0568ed488651273d01891c3481613dd652677edb
+26c226917f0455877387c1a325282e67e3283f54
index 998275a014f9c1cb1214bad6656a3f8744679c43..797c51e0ef540988947e7ba82bde3eb022a06a65 100644 (file)
@@ -596,8 +596,7 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
        darray_free(s.extents);
        genradix_free(&s.hardlinks);
 
-       bool wrote;
-       bch2_alloc_write(c, false, &wrote);
+       bch2_alloc_write(c, false);
 }
 
 static void find_superblock_space(ranges extents, struct dev_opts *dev)
index 9aa0b42b26b671f483dd1969ae12c10835143721..54096e8311d35b8dff346fd8256d20e152d78bc0 100644 (file)
@@ -209,10 +209,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
 static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
                              unsigned level, struct bkey_s_c k)
 {
-       if (!level)
-               bch2_mark_key(c, k, 0, 0, NULL, 0,
-                             BTREE_TRIGGER_ALLOC_READ|
-                             BTREE_TRIGGER_NOATOMIC);
+       struct bch_dev *ca;
+       struct bucket *g;
+       struct bkey_alloc_unpacked u;
+
+       if (level || k.k->type != KEY_TYPE_alloc)
+               return 0;
+
+       ca = bch_dev_bkey_exists(c, k.k->p.inode);
+       g = __bucket(ca, k.k->p.offset, 0);
+       u = bch2_alloc_unpack(k);
+
+       g->_mark.gen            = u.gen;
+       g->_mark.data_type      = u.data_type;
+       g->_mark.dirty_sectors  = u.dirty_sectors;
+       g->_mark.cached_sectors = u.cached_sectors;
+       g->io_time[READ]        = u.read_time;
+       g->io_time[WRITE]       = u.write_time;
+       g->oldest_gen           = u.oldest_gen;
+       g->gen_valid            = 1;
 
        return 0;
 }
@@ -223,8 +238,11 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
        unsigned i;
        int ret = 0;
 
+       down_read(&c->gc_lock);
        ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
                                          NULL, bch2_alloc_read_fn);
+       up_read(&c->gc_lock);
+
        if (ret) {
                bch_err(c, "error reading alloc info: %i", ret);
                return ret;
@@ -253,12 +271,6 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
        return 0;
 }
 
-enum alloc_write_ret {
-       ALLOC_WROTE,
-       ALLOC_NOWROTE,
-       ALLOC_END,
-};
-
 static int bch2_alloc_write_key(struct btree_trans *trans,
                                struct btree_iter *iter,
                                unsigned flags)
@@ -288,26 +300,17 @@ retry:
 
        old_u = bch2_alloc_unpack(k);
 
-       if (iter->pos.inode >= c->sb.nr_devices ||
-           !c->devs[iter->pos.inode])
-               return ALLOC_END;
-
        percpu_down_read(&c->mark_lock);
        ca      = bch_dev_bkey_exists(c, iter->pos.inode);
        ba      = bucket_array(ca);
 
-       if (iter->pos.offset >= ba->nbuckets) {
-               percpu_up_read(&c->mark_lock);
-               return ALLOC_END;
-       }
-
        g       = &ba->b[iter->pos.offset];
        m       = READ_ONCE(g->mark);
        new_u   = alloc_mem_to_key(g, m);
        percpu_up_read(&c->mark_lock);
 
        if (!bkey_alloc_unpacked_cmp(old_u, new_u))
-               return ALLOC_NOWROTE;
+               return 0;
 
        a = bkey_alloc_init(&alloc_key.k);
        a->k.p = iter->pos;
@@ -325,50 +328,55 @@ err:
        return ret;
 }
 
-int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
+int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
-       struct bch_dev *ca;
-       unsigned i;
+       u64 first_bucket, nbuckets;
        int ret = 0;
 
+       percpu_down_read(&c->mark_lock);
+       first_bucket    = bucket_array(ca)->first_bucket;
+       nbuckets        = bucket_array(ca)->nbuckets;
+       percpu_up_read(&c->mark_lock);
+
        BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
+                                  POS(ca->dev_idx, first_bucket),
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
-       for_each_rw_member(ca, c, i) {
-               unsigned first_bucket;
+       while (iter->pos.offset < nbuckets) {
+               bch2_trans_cond_resched(&trans);
 
-               percpu_down_read(&c->mark_lock);
-               first_bucket = bucket_array(ca)->first_bucket;
-               percpu_up_read(&c->mark_lock);
+               ret = bch2_alloc_write_key(&trans, iter, flags);
+               if (ret)
+                       break;
+               bch2_btree_iter_next_slot(iter);
+       }
 
-               bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
+       bch2_trans_exit(&trans);
 
-               while (1) {
-                       bch2_trans_cond_resched(&trans);
+       return ret;
+}
 
-                       ret = bch2_alloc_write_key(&trans, iter, flags);
-                       if (ret < 0 || ret == ALLOC_END)
-                               break;
-                       if (ret == ALLOC_WROTE)
-                               *wrote = true;
-                       bch2_btree_iter_next_slot(iter);
-               }
+int bch2_alloc_write(struct bch_fs *c, unsigned flags)
+{
+       struct bch_dev *ca;
+       unsigned i;
+       int ret = 0;
 
-               if (ret < 0) {
+       for_each_rw_member(ca, c, i) {
+               bch2_dev_alloc_write(c, ca, flags);
+               if (ret) {
                        percpu_ref_put(&ca->io_ref);
                        break;
                }
        }
 
-       bch2_trans_exit(&trans);
-
-       return ret < 0 ? ret : 0;
+       return ret;
 }
 
 /* Bucket IO clocks: */
@@ -481,6 +489,53 @@ static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
        mutex_init(&clock->lock);
 }
 
+int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
+                             size_t bucket_nr, int rw)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
+       struct btree_iter *iter;
+       struct bucket *g;
+       struct bkey_i_alloc *a;
+       struct bkey_alloc_unpacked u;
+       u16 *time;
+       int ret = 0;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, POS(dev, bucket_nr),
+                                  BTREE_ITER_CACHED|
+                                  BTREE_ITER_CACHED_NOFILL|
+                                  BTREE_ITER_INTENT);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               goto out;
+
+       percpu_down_read(&c->mark_lock);
+       g = bucket(ca, bucket_nr);
+       u = alloc_mem_to_key(g, READ_ONCE(g->mark));
+       percpu_up_read(&c->mark_lock);
+
+       bkey_alloc_init(&a->k_i);
+       a->k.p = iter->pos;
+
+       time = rw == READ ? &u.read_time : &u.write_time;
+       if (*time == c->bucket_clock[rw].hand)
+               goto out;
+
+       *time = c->bucket_clock[rw].hand;
+
+       bch2_alloc_pack(a, u);
+
+       ret   = bch2_trans_update(trans, iter, &a->k_i, 0) ?:
+               bch2_trans_commit(trans, NULL, NULL, 0);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
 /* Background allocator thread: */
 
 /*
@@ -1259,18 +1314,6 @@ void bch2_recalc_capacity(struct bch_fs *c)
 
        c->bucket_size_max = bucket_size_max;
 
-       if (c->capacity) {
-               bch2_io_timer_add(&c->io_clock[READ],
-                                &c->bucket_clock[READ].rescale);
-               bch2_io_timer_add(&c->io_clock[WRITE],
-                                &c->bucket_clock[WRITE].rescale);
-       } else {
-               bch2_io_timer_del(&c->io_clock[READ],
-                                &c->bucket_clock[READ].rescale);
-               bch2_io_timer_del(&c->io_clock[WRITE],
-                                &c->bucket_clock[WRITE].rescale);
-       }
-
        /* Wake up case someone was waiting for buckets */
        closure_wake_up(&c->freelist_wait);
 }
index 4f462696b747a88f9f80fd6dc31b74e291cee517..870714ff16a7f496972bc1fec2c2fa564cd4c9f4 100644 (file)
@@ -28,6 +28,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
 void bch2_alloc_pack(struct bkey_i_alloc *,
                     const struct bkey_alloc_unpacked);
 
+int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
+
 static inline struct bkey_alloc_unpacked
 alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
 {
@@ -93,7 +95,8 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_stop(struct bch_dev *);
 int bch2_dev_allocator_start(struct bch_dev *);
 
-int bch2_alloc_write(struct bch_fs *, unsigned, bool *);
+int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned);
+int bch2_alloc_write(struct bch_fs *, unsigned);
 void bch2_fs_allocator_background_init(struct bch_fs *);
 
 #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
index 4a048828869bd91cccf0206f833edd6823914797..7a92e3d532548a2219985381e38f2c4081794b1b 100644 (file)
@@ -309,8 +309,6 @@ out:
                .dev    = ca->dev_idx,
        };
 
-       bucket_io_clock_reset(c, ca, bucket, READ);
-       bucket_io_clock_reset(c, ca, bucket, WRITE);
        spin_unlock(&ob->lock);
 
        if (c->blocked_allocate_open_bucket) {
index 2aa8140aec3297843e7775eb20c5f4d5f8ffa8e3..e8c1e752a25d63ec7fc32c5982d3ad3e32bc1f9c 100644 (file)
 
 static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
 {
+       preempt_disable();
        write_seqcount_begin(&c->gc_pos_lock);
        c->gc_pos = new_pos;
        write_seqcount_end(&c->gc_pos_lock);
+       preempt_enable();
 }
 
 static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
@@ -568,6 +570,7 @@ static int bch2_gc_done(struct bch_fs *c,
                        fsck_err(c, _msg ": got %llu, should be %llu"   \
                                , ##__VA_ARGS__, dst->_f, src->_f);     \
                dst->_f = src->_f;                                      \
+               ret = 1;                                                \
        }
 #define copy_stripe_field(_f, _msg, ...)                               \
        if (dst->_f != src->_f) {                                       \
@@ -578,6 +581,7 @@ static int bch2_gc_done(struct bch_fs *c,
                                dst->_f, src->_f);                      \
                dst->_f = src->_f;                                      \
                dst->dirty = true;                                      \
+               ret = 1;                                                \
        }
 #define copy_bucket_field(_f)                                          \
        if (dst->b[b].mark._f != src->b[b].mark._f) {                   \
@@ -588,6 +592,7 @@ static int bch2_gc_done(struct bch_fs *c,
                                bch2_data_types[dst->b[b].mark.data_type],\
                                dst->b[b].mark._f, src->b[b].mark._f);  \
                dst->b[b]._mark._f = src->b[b].mark._f;                 \
+               ret = 1;                                                \
        }
 #define copy_dev_field(_f, _msg, ...)                                  \
        copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
@@ -1394,7 +1399,7 @@ static int bch2_gc_thread(void *arg)
 #else
                ret = bch2_gc_gens(c);
 #endif
-               if (ret)
+               if (ret < 0)
                        bch_err(c, "btree gc failed: %i", ret);
 
                debug_check_no_locks_held();
index 683b416ef4274bf25594b6ad37ccd9ac05a4de5b..c1717b7c8c38712a4278cfd32abb787d5e9951f4 100644 (file)
@@ -602,7 +602,6 @@ enum btree_trigger_flags {
 
        __BTREE_TRIGGER_GC,
        __BTREE_TRIGGER_BUCKET_INVALIDATE,
-       __BTREE_TRIGGER_ALLOC_READ,
        __BTREE_TRIGGER_NOATOMIC,
 };
 
@@ -614,7 +613,6 @@ enum btree_trigger_flags {
 
 #define BTREE_TRIGGER_GC               (1U << __BTREE_TRIGGER_GC)
 #define BTREE_TRIGGER_BUCKET_INVALIDATE        (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-#define BTREE_TRIGGER_ALLOC_READ       (1U << __BTREE_TRIGGER_ALLOC_READ)
 #define BTREE_TRIGGER_NOATOMIC         (1U << __BTREE_TRIGGER_NOATOMIC)
 
 static inline bool btree_node_type_needs_gc(enum btree_node_type type)
index 797114353aa22537dc6959dc65a814a6c4d6f925..c3fc3abbc0dc1ef368efccf73bd5661f9b3f7963 100644 (file)
@@ -254,6 +254,7 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
 
        BUG_ON(idx >= 2);
 
+       preempt_disable();
        write_seqcount_begin(&c->usage_lock);
 
        acc_u64s_percpu((u64 *) c->usage_base,
@@ -261,6 +262,7 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
        percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
 
        write_seqcount_end(&c->usage_lock);
+       preempt_enable();
 }
 
 void bch2_fs_usage_to_text(struct printbuf *out,
@@ -482,6 +484,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
                bch2_wake_allocator(ca);
 }
 
+__flatten
 void bch2_dev_usage_from_buckets(struct bch_fs *c)
 {
        struct bch_dev *ca;
@@ -755,8 +758,7 @@ static int bch2_mark_alloc(struct bch_fs *c,
                }
        }));
 
-       if (!(flags & BTREE_TRIGGER_ALLOC_READ))
-               bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
+       bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
 
        g->io_time[READ]        = u.read_time;
        g->io_time[WRITE]       = u.write_time;
index 653f6761862e9b797af43c6f2d17084df27231ac..a3873becbb70111b173b6c42e369e2bc5012027f 100644 (file)
@@ -58,12 +58,6 @@ static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
        return __bucket(ca, b, false);
 }
 
-static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
-                                        size_t b, int rw)
-{
-       bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand;
-}
-
 static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
 {
        return c->bucket_clock[rw].hand - g->io_time[rw];
index 5514f65378ad720c355c1768486bfb2744f5835c..eac750ad2240f254b3690f386021d5140ae21f24 100644 (file)
@@ -1448,7 +1448,7 @@ static int __bch2_stripe_write_key(struct btree_trans *trans,
        return 0;
 }
 
-int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
+int bch2_stripes_write(struct bch_fs *c, unsigned flags)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
@@ -1476,8 +1476,6 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
 
                if (ret)
                        break;
-
-               *wrote = true;
        }
 
        bch2_trans_exit(&trans);
@@ -1497,7 +1495,6 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
 
                ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
                        bch2_mark_key(c, k, 0, 0, NULL, 0,
-                                     BTREE_TRIGGER_ALLOC_READ|
                                      BTREE_TRIGGER_NOATOMIC);
                if (ret)
                        return ret;
index f8fc3d616cd787d64d954dc2026cdd8e85c95d69..6db16cf768daa40c8c91b8e2523208c146bfeac7 100644 (file)
@@ -156,7 +156,7 @@ void bch2_ec_flush_new_stripes(struct bch_fs *);
 
 struct journal_keys;
 int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
-int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
+int bch2_stripes_write(struct bch_fs *, unsigned);
 
 int bch2_ec_mem_alloc(struct bch_fs *, bool);
 
index 60684380f191156cf399b7171985e5970baf6ca3..4ceeafcfa33c78d467450aab4f6a29b838bc643a 100644 (file)
@@ -868,7 +868,7 @@ retry:
                if (bkey_extent_is_allocation(k.k))
                        bch2_add_page_sectors(&rbio->bio, k);
 
-               bch2_read_extent(c, rbio, k, offset_into_extent, flags);
+               bch2_read_extent(trans, rbio, k, offset_into_extent, flags);
 
                if (flags & BCH_READ_LAST_FRAGMENT)
                        break;
index 5c80142e50ed940b93b533bdb6426631fe246b17..6a9820e83db77e35aa62648923744262b921f0eb 100644 (file)
@@ -1527,8 +1527,6 @@ got_sb:
        if (ret)
                goto err_put_super;
 
-       sb->s_bdi->congested_fn         = bch2_congested;
-       sb->s_bdi->congested_data       = c;
        sb->s_bdi->ra_pages             = VM_READAHEAD_PAGES;
 
        for_each_online_member(ca, c, i) {
index 5c9c3cf54edd1c70c80c857626428c428ffbed22..0a4b4eed465ced685aee2393c2af1c97efaec0dd 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "bkey_on_stack.h"
 #include "bset.h"
@@ -1635,7 +1636,7 @@ retry:
                goto out;
        }
 
-       ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags);
+       ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags);
        if (ret == READ_RETRY)
                goto retry;
        if (ret)
@@ -1692,7 +1693,7 @@ retry:
                bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
                swap(bvec_iter.bi_size, bytes);
 
-               ret = __bch2_read_extent(c, rbio, bvec_iter, k,
+               ret = __bch2_read_extent(&trans, rbio, bvec_iter, k,
                                offset_into_extent, failed, flags);
                switch (ret) {
                case READ_RETRY:
@@ -2020,11 +2021,12 @@ err:
        return ret;
 }
 
-int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
                       struct bvec_iter iter, struct bkey_s_c k,
                       unsigned offset_into_extent,
                       struct bch_io_failures *failed, unsigned flags)
 {
+       struct bch_fs *c = trans->c;
        struct extent_ptr_decoded pick;
        struct bch_read_bio *rbio = NULL;
        struct bch_dev *ca;
@@ -2192,9 +2194,9 @@ get_bio:
 
        bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
 
-       rcu_read_lock();
-       bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
-       rcu_read_unlock();
+       if (pick.ptr.cached)
+               bch2_bucket_io_time_reset(trans, pick.ptr.dev,
+                       PTR_BUCKET_NR(ca, &pick.ptr), READ);
 
        if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
                bio_inc_remaining(&orig->bio);
@@ -2336,7 +2338,7 @@ retry:
                if (rbio->bio.bi_iter.bi_size == bytes)
                        flags |= BCH_READ_LAST_FRAGMENT;
 
-               bch2_read_extent(c, rbio, k, offset_into_extent, flags);
+               bch2_read_extent(&trans, rbio, k, offset_into_extent, flags);
 
                if (flags & BCH_READ_LAST_FRAGMENT)
                        break;
index ded468d70f098353be3697caefc01bc8e00b6641..e6aac594f3e6a8e0267c0cad3aabde00059b0fae 100644 (file)
@@ -136,17 +136,17 @@ enum bch_read_flags {
        BCH_READ_IN_RETRY               = 1 << 7,
 };
 
-int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *,
+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
                       struct bvec_iter, struct bkey_s_c, unsigned,
                       struct bch_io_failures *, unsigned);
 
-static inline void bch2_read_extent(struct bch_fs *c,
+static inline void bch2_read_extent(struct btree_trans *trans,
                                    struct bch_read_bio *rbio,
                                    struct bkey_s_c k,
                                    unsigned offset_into_extent,
                                    unsigned flags)
 {
-       __bch2_read_extent(c, rbio, rbio->bio.bi_iter, k,
+       __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k,
                           offset_into_extent, NULL, flags);
 }
 
index 1ffb14a22f94d7a52199e16f7396485e02c4a1f9..62dcac79ed068ab3ef7aeca4755bcef3af5f5a2f 100644 (file)
@@ -415,7 +415,7 @@ static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
                atomic_read(&ctxt->write_sectors) != sectors_pending);
 }
 
-static int bch2_move_extent(struct bch_fs *c,
+static int bch2_move_extent(struct btree_trans *trans,
                            struct moving_context *ctxt,
                            struct write_point_specifier wp,
                            struct bch_io_opts io_opts,
@@ -424,6 +424,7 @@ static int bch2_move_extent(struct bch_fs *c,
                            enum data_cmd data_cmd,
                            struct data_opts data_opts)
 {
+       struct bch_fs *c = trans->c;
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        struct moving_io *io;
        const union bch_extent_entry *entry;
@@ -490,7 +491,7 @@ static int bch2_move_extent(struct bch_fs *c,
         * ctxt when doing wakeup
         */
        closure_get(&ctxt->cl);
-       bch2_read_extent(c, &io->rbio, k, 0,
+       bch2_read_extent(trans, &io->rbio, k, 0,
                         BCH_READ_NODECODE|
                         BCH_READ_LAST_FRAGMENT);
        return 0;
@@ -608,7 +609,7 @@ peek:
                k = bkey_i_to_s_c(sk.k);
                bch2_trans_unlock(&trans);
 
-               ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
+               ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
                                        data_cmd, data_opts);
                if (ret2) {
                        if (ret2 == -ENOMEM) {
index 6e829bf0a31f3f1d5254e023e05c53fcd43e94e4..d70fa968db50d95c5fa63ced0c8080e14434d903 100644 (file)
@@ -845,9 +845,11 @@ static int verify_superblock_clean(struct bch_fs *c,
        }
 
        mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
-                       "superblock read clock doesn't match journal after clean shutdown");
+                       "superblock read clock %u doesn't match journal %u after clean shutdown",
+                       clean->read_clock, j->read_clock);
        mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
-                       "superblock read clock doesn't match journal after clean shutdown");
+                       "superblock write clock %u doesn't match journal %u after clean shutdown",
+                       clean->write_clock, j->write_clock);
 
        for (i = 0; i < BTREE_ID_NR; i++) {
                char buf1[200], buf2[200];
@@ -961,7 +963,7 @@ int bch2_fs_recovery(struct bch_fs *c)
        const char *err = "cannot allocate memory";
        struct bch_sb_field_clean *clean = NULL;
        u64 journal_seq;
-       bool wrote = false, write_sb = false;
+       bool write_sb = false, need_write_alloc = false;
        int ret;
 
        if (c->sb.clean)
@@ -1090,8 +1092,10 @@ int bch2_fs_recovery(struct bch_fs *c)
                bch_info(c, "starting metadata mark and sweep");
                err = "error in mark and sweep";
                ret = bch2_gc(c, &c->journal_keys, true, true);
-               if (ret)
+               if (ret < 0)
                        goto err;
+               if (ret)
+                       need_write_alloc = true;
                bch_verbose(c, "mark and sweep done");
        }
 
@@ -1101,8 +1105,10 @@ int bch2_fs_recovery(struct bch_fs *c)
                bch_info(c, "starting mark and sweep");
                err = "error in mark and sweep";
                ret = bch2_gc(c, &c->journal_keys, true, false);
-               if (ret)
+               if (ret < 0)
                        goto err;
+               if (ret)
+                       need_write_alloc = true;
                bch_verbose(c, "mark and sweep done");
        }
 
@@ -1126,7 +1132,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                goto err;
        bch_verbose(c, "journal replay done");
 
-       if (!c->opts.nochanges) {
+       if (need_write_alloc && !c->opts.nochanges) {
                /*
                 * note that even when filesystem was clean there might be work
                 * to do here, if we ran gc (because of fsck) which recalculated
@@ -1134,8 +1140,8 @@ int bch2_fs_recovery(struct bch_fs *c)
                 */
                bch_verbose(c, "writing allocation info");
                err = "error writing out alloc info";
-               ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
-                       bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
+               ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW) ?:
+                       bch2_alloc_write(c, BTREE_INSERT_LAZY_RW);
                if (ret) {
                        bch_err(c, "error writing alloc info");
                        goto err;
@@ -1281,6 +1287,20 @@ int bch2_fs_initialize(struct bch_fs *c)
        bch2_fs_journal_start(&c->journal, 1, &journal);
        bch2_journal_set_replay_done(&c->journal);
 
+       err = "error going read-write";
+       ret = bch2_fs_read_write_early(c);
+       if (ret)
+               goto err;
+
+       /*
+        * Write out the superblock and journal buckets, now that we can do
+        * btree updates
+        */
+       err = "error writing alloc info";
+       ret = bch2_alloc_write(c, 0);
+       if (ret)
+               goto err;
+
        bch2_inode_init(c, &root_inode, 0, 0,
                        S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
        root_inode.bi_inum = BCACHEFS_ROOT_INO;
@@ -1289,7 +1309,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        err = "error creating root directory";
        ret = bch2_btree_insert(c, BTREE_ID_INODES,
                                &packed_inode.inode.k_i,
-                               NULL, NULL, BTREE_INSERT_LAZY_RW);
+                               NULL, NULL, 0);
        if (ret)
                goto err;
 
index c873b67115f71733ed76ef80c4926d4bebd14e3c..7f301fa6e5a1dc4023708b2e4953337519da6460 100644 (file)
@@ -149,44 +149,6 @@ struct bch_fs *bch2_uuid_to_fs(uuid_le uuid)
        return c;
 }
 
-int bch2_congested(void *data, int bdi_bits)
-{
-       struct bch_fs *c = data;
-       struct backing_dev_info *bdi;
-       struct bch_dev *ca;
-       unsigned i;
-       int ret = 0;
-
-       rcu_read_lock();
-       if (bdi_bits & (1 << WB_sync_congested)) {
-               /* Reads - check all devices: */
-               for_each_readable_member(ca, c, i) {
-                       bdi = ca->disk_sb.bdev->bd_bdi;
-
-                       if (bdi_congested(bdi, bdi_bits)) {
-                               ret = 1;
-                               break;
-                       }
-               }
-       } else {
-               const struct bch_devs_mask *devs =
-                       bch2_target_to_mask(c, c->opts.foreground_target) ?:
-                       &c->rw_devs[BCH_DATA_user];
-
-               for_each_member_device_rcu(ca, c, i, devs) {
-                       bdi = ca->disk_sb.bdev->bd_bdi;
-
-                       if (bdi_congested(bdi, bdi_bits)) {
-                               ret = 1;
-                               break;
-                       }
-               }
-       }
-       rcu_read_unlock();
-
-       return ret;
-}
-
 /* Filesystem RO/RW: */
 
 /*
@@ -207,9 +169,7 @@ int bch2_congested(void *data, int bdi_bits)
 static void __bch2_fs_read_only(struct bch_fs *c)
 {
        struct bch_dev *ca;
-       bool wrote = false;
        unsigned i, clean_passes = 0;
-       int ret;
 
        bch2_rebalance_stop(c);
        bch2_copygc_stop(c);
@@ -228,20 +188,6 @@ static void __bch2_fs_read_only(struct bch_fs *c)
        if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
                goto nowrote_alloc;
 
-       bch_verbose(c, "writing alloc info");
-       /*
-        * This should normally just be writing the bucket read/write clocks:
-        */
-       ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
-               bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
-       bch_verbose(c, "writing alloc info complete");
-
-       if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-               bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
-
-       if (ret)
-               goto nowrote_alloc;
-
        bch_verbose(c, "flushing journal and stopping allocators");
 
        bch2_journal_flush_all_pins(&c->journal);
@@ -278,6 +224,9 @@ nowrote_alloc:
        for_each_member_device(ca, c, i)
                bch2_dev_allocator_stop(ca);
 
+       bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
+       bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
+
        clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
        clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
 
@@ -454,6 +403,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
 
+       bch2_io_timer_add(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
+       bch2_io_timer_add(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
+
        for_each_rw_member(ca, c, i) {
                ret = bch2_dev_allocator_start(ca);
                if (ret) {
@@ -1701,6 +1653,11 @@ have_slot:
        bch2_write_super(c);
        mutex_unlock(&c->sb_lock);
 
+       err = "alloc write failed";
+       ret = bch2_dev_alloc_write(c, ca, 0);
+       if (ret)
+               goto err;
+
        if (ca->mi.state == BCH_MEMBER_STATE_RW) {
                err = __bch2_dev_read_write(c, ca);
                if (err)
index 048ffec622af86e45e232a6aa28c7c9bf6a4e57b..02c81f3555c3559d7a7d2b2f65bce28ded807899 100644 (file)
@@ -199,7 +199,6 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
 
 struct bch_fs *bch2_bdev_to_fs(struct block_device *);
 struct bch_fs *bch2_uuid_to_fs(uuid_le);
-int bch2_congested(void *, int);
 
 bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
                           enum bch_member_state, int);