]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 5242db9aec bcachefs: Fix bch2_check_fix_ptrs()
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 6 Jan 2022 00:39:57 +0000 (19:39 -0500)
committerKent Overstreet <kent.overstreet@gmail.com>
Thu, 6 Jan 2022 00:39:57 +0000 (19:39 -0500)
.bcachefs_revision
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/bcachefs.h
libbcachefs/btree_gc.c
libbcachefs/btree_iter.c
libbcachefs/btree_update.h
libbcachefs/buckets.c
libbcachefs/recovery.c

index 79a03365fc72df329f80df18aef5617a60d2a9fe..8226b3a6351d572c63a49990c2d0e07e5a63d9e8 100644 (file)
@@ -1 +1 @@
-50ac18afbb522a3103cecff9aaf9519d4eb5e908
+5242db9aec10220b6ee7162ba7bec173417348cf
index df340ebb01b65ef702e86d994f3dde525f24591e..688a53b4ca580f97b584fc4da0b3d338731a171b 100644 (file)
@@ -38,6 +38,15 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
 #undef x
 };
 
+struct bkey_alloc_buf {
+       struct bkey_i   k;
+       struct bch_alloc_v3 v;
+
+#define x(_name,  _bits)               + _bits / 8
+       u8              _pad[0 + BCH_ALLOC_FIELDS_V2()];
+#undef  x
+} __attribute__((packed, aligned(8)));
+
 /* Persistent alloc info: */
 
 static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
@@ -244,25 +253,24 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
        return ret;
 }
 
-struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
-                                      const struct bkey_alloc_unpacked src)
+static void bch2_alloc_pack(struct bch_fs *c,
+                           struct bkey_alloc_buf *dst,
+                           const struct bkey_alloc_unpacked src)
 {
-       struct bkey_alloc_buf *dst;
-
-       dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
-       if (!IS_ERR(dst))
-               bch2_alloc_pack_v3(dst, src);
-
-       return dst;
+       bch2_alloc_pack_v3(dst, src);
 }
 
 int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
                     struct bkey_alloc_unpacked *u, unsigned trigger_flags)
 {
-       struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
+       struct bkey_alloc_buf *a;
+
+       a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
+       if (IS_ERR(a))
+               return PTR_ERR(a);
 
-       return PTR_ERR_OR_ZERO(a) ?:
-               bch2_trans_update(trans, iter, &a->k, trigger_flags);
+       bch2_alloc_pack(trans->c, a, *u);
+       return bch2_trans_update(trans, iter, &a->k, trigger_flags);
 }
 
 static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
@@ -332,7 +340,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
 #undef  x
 }
 
-int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
+int bch2_alloc_read(struct bch_fs *c)
 {
        struct btree_trans trans;
        struct btree_iter iter;
@@ -343,43 +351,108 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
        int ret;
 
        bch2_trans_init(&trans, c, 0, 0);
+       down_read(&c->gc_lock);
 
        for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
+               if (!bkey_is_alloc(k.k))
+                       continue;
+
                ca = bch_dev_bkey_exists(c, k.k->p.inode);
-               g = __bucket(ca, k.k->p.offset, gc);
+               g = bucket(ca, k.k->p.offset);
                u = bch2_alloc_unpack(k);
 
-               if (!gc)
-                       *bucket_gen(ca, k.k->p.offset) = u.gen;
-
+               *bucket_gen(ca, k.k->p.offset) = u.gen;
                g->_mark.gen            = u.gen;
+               g->_mark.data_type      = u.data_type;
+               g->_mark.dirty_sectors  = u.dirty_sectors;
+               g->_mark.cached_sectors = u.cached_sectors;
+               g->_mark.stripe         = u.stripe != 0;
+               g->stripe               = u.stripe;
+               g->stripe_redundancy    = u.stripe_redundancy;
                g->io_time[READ]        = u.read_time;
                g->io_time[WRITE]       = u.write_time;
-               g->oldest_gen           = !gc ? u.oldest_gen : u.gen;
+               g->oldest_gen           = u.oldest_gen;
                g->gen_valid            = 1;
-
-               if (!gc ||
-                   (metadata_only &&
-                    (u.data_type == BCH_DATA_user ||
-                     u.data_type == BCH_DATA_cached ||
-                     u.data_type == BCH_DATA_parity))) {
-                       g->_mark.data_type      = u.data_type;
-                       g->_mark.dirty_sectors  = u.dirty_sectors;
-                       g->_mark.cached_sectors = u.cached_sectors;
-                       g->_mark.stripe         = u.stripe != 0;
-                       g->stripe               = u.stripe;
-                       g->stripe_redundancy    = u.stripe_redundancy;
-               }
-
        }
        bch2_trans_iter_exit(&trans, &iter);
 
+       up_read(&c->gc_lock);
        bch2_trans_exit(&trans);
 
-       if (ret)
+       if (ret) {
                bch_err(c, "error reading alloc info: %i", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+                               struct btree_iter *iter,
+                               unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c k;
+       struct bkey_alloc_unpacked old_u, new_u;
+       int ret;
+retry:
+       bch2_trans_begin(trans);
 
+       ret = bch2_btree_key_cache_flush(trans,
+                       BTREE_ID_alloc, iter->pos);
+       if (ret)
+               goto err;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       old_u   = bch2_alloc_unpack(k);
+       new_u   = alloc_mem_to_key(c, iter);
+
+       if (!bkey_alloc_unpacked_cmp(old_u, new_u))
+               return 0;
+
+       ret   = bch2_alloc_write(trans, iter, &new_u,
+                                 BTREE_TRIGGER_NORUN) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                               BTREE_INSERT_NOFAIL|flags);
+err:
+       if (ret == -EINTR)
+               goto retry;
+       return ret;
+}
+
+int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bch_dev *ca;
+       unsigned i;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
+                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+       for_each_member_device(ca, c, i) {
+               bch2_btree_iter_set_pos(&iter,
+                       POS(ca->dev_idx, ca->mi.first_bucket));
+
+               while (iter.pos.offset < ca->mi.nbuckets) {
+                       ret = bch2_alloc_write_key(&trans, &iter, flags);
+                       if (ret) {
+                               percpu_ref_put(&ca->ref);
+                               goto err;
+                       }
+                       bch2_btree_iter_advance(&iter);
+               }
+       }
+err:
+       bch2_trans_iter_exit(&trans, &iter);
+       bch2_trans_exit(&trans);
        return ret;
 }
 
@@ -390,20 +463,19 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
 {
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
-       struct bkey_s_c k;
        struct bkey_alloc_unpacked u;
        u64 *time, now;
        int ret = 0;
 
        bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
                             BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
                             BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_slot(&iter);
-       ret = bkey_err(k);
+       ret = bch2_btree_iter_traverse(&iter);
        if (ret)
                goto out;
 
-       u = bch2_alloc_unpack(k);
+       u = alloc_mem_to_key(c, &iter);
 
        time = rw == READ ? &u.read_time : &u.write_time;
        now = atomic64_read(&c->io_clock[rw].now);
@@ -586,34 +658,56 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
        return nr;
 }
 
+/*
+ * returns sequence number of most recent journal entry that updated this
+ * bucket:
+ */
+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
+{
+       if (m.journal_seq_valid) {
+               u64 journal_seq = atomic64_read(&c->journal.seq);
+               u64 bucket_seq  = journal_seq;
+
+               bucket_seq &= ~((u64) U16_MAX);
+               bucket_seq |= m.journal_seq;
+
+               if (bucket_seq > journal_seq)
+                       bucket_seq -= 1 << 16;
+
+               return bucket_seq;
+       } else {
+               return 0;
+       }
+}
+
 static int bucket_invalidate_btree(struct btree_trans *trans,
-                                  struct bch_dev *ca, u64 b,
-                                  struct bkey_alloc_unpacked *u)
+                                  struct bch_dev *ca, u64 b)
 {
        struct bch_fs *c = trans->c;
+       struct bkey_alloc_unpacked u;
        struct btree_iter iter;
-       struct bkey_s_c k;
        int ret;
 
        bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
                             POS(ca->dev_idx, b),
                             BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
                             BTREE_ITER_INTENT);
 
-       k = bch2_btree_iter_peek_slot(&iter);
-       ret = bkey_err(k);
+       ret = bch2_btree_iter_traverse(&iter);
        if (ret)
                goto err;
 
-       *u = bch2_alloc_unpack(k);
-       u->gen++;
-       u->data_type            = 0;
-       u->dirty_sectors        = 0;
-       u->cached_sectors       = 0;
-       u->read_time            = atomic64_read(&c->io_clock[READ].now);
-       u->write_time           = atomic64_read(&c->io_clock[WRITE].now);
+       u = alloc_mem_to_key(c, &iter);
+
+       u.gen++;
+       u.data_type     = 0;
+       u.dirty_sectors = 0;
+       u.cached_sectors = 0;
+       u.read_time     = atomic64_read(&c->io_clock[READ].now);
+       u.write_time    = atomic64_read(&c->io_clock[WRITE].now);
 
-       ret = bch2_alloc_write(trans, &iter, u,
+       ret = bch2_alloc_write(trans, &iter, &u,
                               BTREE_TRIGGER_BUCKET_INVALIDATE);
 err:
        bch2_trans_iter_exit(trans, &iter);
@@ -623,23 +717,21 @@ err:
 static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
                                      u64 *journal_seq, unsigned flags)
 {
-       struct bkey_alloc_unpacked u;
+       struct bucket *g;
+       struct bucket_mark m;
        size_t b;
        int ret = 0;
 
-       /*
-        * If the read-only path is trying to shut down, we can't be generating
-        * new btree updates:
-        */
-       if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags))
-               return 1;
-
        BUG_ON(!ca->alloc_heap.used ||
               !ca->alloc_heap.data[0].nr);
        b = ca->alloc_heap.data[0].bucket;
 
        /* first, put on free_inc and mark as owned by allocator: */
        percpu_down_read(&c->mark_lock);
+       g = bucket(ca, b);
+       m = READ_ONCE(g->mark);
+
+       BUG_ON(m.dirty_sectors);
 
        bch2_mark_alloc_bucket(c, ca, b, true);
 
@@ -648,15 +740,38 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
        BUG_ON(!fifo_push(&ca->free_inc, b));
        spin_unlock(&c->freelist_lock);
 
+       /*
+        * If we're not invalidating cached data, we only increment the bucket
+        * gen in memory here, the incremented gen will be updated in the btree
+        * by bch2_trans_mark_pointer():
+        */
+       if (!m.cached_sectors &&
+           !bucket_needs_journal_commit(m, c->journal.last_seq_ondisk)) {
+               BUG_ON(m.data_type);
+               bucket_cmpxchg(g, m, m.gen++);
+               *bucket_gen(ca, b) = m.gen;
+               percpu_up_read(&c->mark_lock);
+               goto out;
+       }
+
        percpu_up_read(&c->mark_lock);
 
+       /*
+        * If the read-only path is trying to shut down, we can't be generating
+        * new btree updates:
+        */
+       if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
+               ret = 1;
+               goto out;
+       }
+
        ret = bch2_trans_do(c, NULL, journal_seq,
                            BTREE_INSERT_NOCHECK_RW|
                            BTREE_INSERT_NOFAIL|
                            BTREE_INSERT_JOURNAL_RESERVED|
                            flags,
-                           bucket_invalidate_btree(&trans, ca, b, &u));
-
+                           bucket_invalidate_btree(&trans, ca, b));
+out:
        if (!ret) {
                /* remove from alloc_heap: */
                struct alloc_heap_entry e, *top = ca->alloc_heap.data;
@@ -672,7 +787,7 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
                 * bucket (i.e. deleting the last reference) before writing to
                 * this bucket again:
                 */
-               *journal_seq = max(*journal_seq, u.journal_seq);
+               *journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
        } else {
                size_t b2;
 
index 98c7866e20b57ded9f8d629d8427d5966f97bfb5..86b64177b3d0bc2a378e7961aff478363ce0c210 100644 (file)
@@ -38,23 +38,40 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
        ;
 }
 
-struct bkey_alloc_buf {
-       struct bkey_i   k;
-       struct bch_alloc_v3 v;
-
-#define x(_name,  _bits)               + _bits / 8
-       u8              _pad[0 + BCH_ALLOC_FIELDS_V2()];
-#undef  x
-} __attribute__((packed, aligned(8)));
-
 struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
-struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
-                                      const struct bkey_alloc_unpacked);
 int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
                     struct bkey_alloc_unpacked *, unsigned);
 
 int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
 
+static inline struct bkey_alloc_unpacked
+alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter)
+{
+       struct bch_dev *ca;
+       struct bucket *g;
+       struct bkey_alloc_unpacked ret;
+
+       percpu_down_read(&c->mark_lock);
+       ca      = bch_dev_bkey_exists(c, iter->pos.inode);
+       g       = bucket(ca, iter->pos.offset);
+       ret     = (struct bkey_alloc_unpacked) {
+               .dev            = iter->pos.inode,
+               .bucket         = iter->pos.offset,
+               .gen            = g->mark.gen,
+               .oldest_gen     = g->oldest_gen,
+               .data_type      = g->mark.data_type,
+               .dirty_sectors  = g->mark.dirty_sectors,
+               .cached_sectors = g->mark.cached_sectors,
+               .read_time      = g->io_time[READ],
+               .write_time     = g->io_time[WRITE],
+               .stripe         = g->stripe,
+               .stripe_redundancy = g->stripe_redundancy,
+       };
+       percpu_up_read(&c->mark_lock);
+
+       return ret;
+}
+
 #define ALLOC_SCAN_BATCH(ca)           max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
 
 const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
@@ -84,7 +101,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
                k->type == KEY_TYPE_alloc_v3;
 }
 
-int bch2_alloc_read(struct bch_fs *, bool, bool);
+int bch2_alloc_read(struct bch_fs *);
 
 static inline void bch2_wake_allocator(struct bch_dev *ca)
 {
@@ -122,6 +139,7 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_stop(struct bch_dev *);
 int bch2_dev_allocator_start(struct bch_dev *);
 
+int bch2_alloc_write_all(struct bch_fs *, unsigned);
 void bch2_fs_allocator_background_init(struct bch_fs *);
 
 #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
index 7b39a4191257a4d99fdb7a7baf6aade4cbcc850b..c64db2bfd2a5532f94affef96b53e384001a58aa 100644 (file)
@@ -534,6 +534,7 @@ enum {
        /* misc: */
        BCH_FS_NEED_ANOTHER_GC,
        BCH_FS_DELETED_NODES,
+       BCH_FS_NEED_ALLOC_WRITE,
        BCH_FS_REBUILD_REPLICAS,
        BCH_FS_HOLD_BTREE_WRITES,
 };
index 268ad74d539e0c3111e0ec662a44ce49fdf55471..a201052e8259191012d5761c1f18ee8dca2839b6 100644 (file)
@@ -9,7 +9,6 @@
 #include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "bkey_buf.h"
-#include "btree_key_cache.h"
 #include "btree_locking.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
@@ -534,6 +533,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
        bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
                struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
                struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+               struct bucket *g2 = PTR_BUCKET(ca, &p.ptr);
                enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
 
                if (fsck_err_on(!g->gen_valid, c,
@@ -544,8 +544,9 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                p.ptr.gen,
                                (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
                        if (!p.ptr.cached) {
-                               g->_mark.gen            = p.ptr.gen;
-                               g->gen_valid            = true;
+                               g2->_mark.gen   = g->_mark.gen          = p.ptr.gen;
+                               g2->gen_valid   = g->gen_valid          = true;
+                               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
                        } else {
                                do_update = true;
                        }
@@ -559,12 +560,13 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                p.ptr.gen, g->mark.gen,
                                (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
                        if (!p.ptr.cached) {
-                               g->_mark.gen            = p.ptr.gen;
-                               g->gen_valid            = true;
-                               g->_mark.data_type      = 0;
-                               g->_mark.dirty_sectors  = 0;
-                               g->_mark.cached_sectors = 0;
+                               g2->_mark.gen   = g->_mark.gen  = p.ptr.gen;
+                               g2->gen_valid   = g->gen_valid  = true;
+                               g2->_mark.data_type             = 0;
+                               g2->_mark.dirty_sectors         = 0;
+                               g2->_mark.cached_sectors        = 0;
                                set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+                               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
                        } else {
                                do_update = true;
                        }
@@ -601,8 +603,9 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                bch2_data_types[data_type],
                                (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
                        if (data_type == BCH_DATA_btree) {
-                               g->_mark.data_type      = data_type;
-                               set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+                               g2->_mark.data_type     = g->_mark.data_type    = data_type;
+                               g2->gen_valid           = g->gen_valid          = true;
+                               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
                        } else {
                                do_update = true;
                        }
@@ -1166,14 +1169,13 @@ static int bch2_gc_done(struct bch_fs *c,
        unsigned i, dev;
        int ret = 0;
 
-       percpu_down_write(&c->mark_lock);
-
 #define copy_field(_f, _msg, ...)                                      \
        if (dst->_f != src->_f) {                                       \
                if (verify)                                             \
                        fsck_err(c, _msg ": got %llu, should be %llu"   \
                                , ##__VA_ARGS__, dst->_f, src->_f);     \
                dst->_f = src->_f;                                      \
+               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
        }
 #define copy_stripe_field(_f, _msg, ...)                               \
        if (dst->_f != src->_f) {                                       \
@@ -1183,6 +1185,18 @@ static int bch2_gc_done(struct bch_fs *c,
                                iter.pos, ##__VA_ARGS__,                \
                                dst->_f, src->_f);                      \
                dst->_f = src->_f;                                      \
+               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
+       }
+#define copy_bucket_field(_f)                                          \
+       if (dst->b[b]._f != src->b[b]._f) {                             \
+               if (verify)                                             \
+                       fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f  \
+                               ": got %u, should be %u", dev, b,       \
+                               dst->b[b].mark.gen,                     \
+                               bch2_data_types[dst->b[b].mark.data_type],\
+                               dst->b[b]._f, src->b[b]._f);            \
+               dst->b[b]._f = src->b[b]._f;                            \
+               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
        }
 #define copy_dev_field(_f, _msg, ...)                                  \
        copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
@@ -1193,18 +1207,36 @@ static int bch2_gc_done(struct bch_fs *c,
                bch2_fs_usage_acc_to_base(c, i);
 
        for_each_member_device(ca, c, dev) {
-               struct bch_dev_usage *dst = ca->usage_base;
-               struct bch_dev_usage *src = (void *)
-                       bch2_acc_percpu_u64s((void *) ca->usage_gc,
-                                            dev_usage_u64s());
-
-               copy_dev_field(buckets_ec,              "buckets_ec");
-               copy_dev_field(buckets_unavailable,     "buckets_unavailable");
-
-               for (i = 0; i < BCH_DATA_NR; i++) {
-                       copy_dev_field(d[i].buckets,    "%s buckets", bch2_data_types[i]);
-                       copy_dev_field(d[i].sectors,    "%s sectors", bch2_data_types[i]);
-                       copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
+               struct bucket_array *dst = __bucket_array(ca, 0);
+               struct bucket_array *src = __bucket_array(ca, 1);
+               size_t b;
+
+               for (b = 0; b < src->nbuckets; b++) {
+                       copy_bucket_field(_mark.gen);
+                       copy_bucket_field(_mark.data_type);
+                       copy_bucket_field(_mark.stripe);
+                       copy_bucket_field(_mark.dirty_sectors);
+                       copy_bucket_field(_mark.cached_sectors);
+                       copy_bucket_field(stripe_redundancy);
+                       copy_bucket_field(stripe);
+
+                       dst->b[b].oldest_gen = src->b[b].oldest_gen;
+               }
+
+               {
+                       struct bch_dev_usage *dst = ca->usage_base;
+                       struct bch_dev_usage *src = (void *)
+                               bch2_acc_percpu_u64s((void *) ca->usage_gc,
+                                                    dev_usage_u64s());
+
+                       copy_dev_field(buckets_ec,              "buckets_ec");
+                       copy_dev_field(buckets_unavailable,     "buckets_unavailable");
+
+                       for (i = 0; i < BCH_DATA_NR; i++) {
+                               copy_dev_field(d[i].buckets,    "%s buckets", bch2_data_types[i]);
+                               copy_dev_field(d[i].sectors,    "%s sectors", bch2_data_types[i]);
+                               copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
+                       }
                }
        };
 
@@ -1246,6 +1278,7 @@ static int bch2_gc_done(struct bch_fs *c,
 
 #undef copy_fs_field
 #undef copy_dev_field
+#undef copy_bucket_field
 #undef copy_stripe_field
 #undef copy_field
 fsck_err:
@@ -1253,8 +1286,6 @@ fsck_err:
                percpu_ref_put(&ca->ref);
        if (ret)
                bch_err(c, "%s: ret %i", __func__, ret);
-
-       percpu_up_write(&c->mark_lock);
        return ret;
 }
 
@@ -1277,6 +1308,15 @@ static int bch2_gc_start(struct bch_fs *c,
                BUG_ON(ca->buckets[1]);
                BUG_ON(ca->usage_gc);
 
+               ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
+                               ca->mi.nbuckets * sizeof(struct bucket),
+                               GFP_KERNEL|__GFP_ZERO);
+               if (!ca->buckets[1]) {
+                       percpu_ref_put(&ca->ref);
+                       bch_err(c, "error allocating ca->buckets[gc]");
+                       return -ENOMEM;
+               }
+
                ca->usage_gc = alloc_percpu(struct bch_dev_usage);
                if (!ca->usage_gc) {
                        bch_err(c, "error allocating ca->usage_gc");
@@ -1285,184 +1325,39 @@ static int bch2_gc_start(struct bch_fs *c,
                }
        }
 
-       return 0;
-}
-
-static int bch2_alloc_write_key(struct btree_trans *trans,
-                               struct btree_iter *iter,
-                               bool initial, bool metadata_only)
-{
-       struct bch_fs *c = trans->c;
-       struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
-       struct bucket *g;
-       struct bkey_s_c k;
-       struct bkey_alloc_unpacked old_u, new_u, gc_u;
-       struct bkey_alloc_buf *a;
-       int ret;
+       percpu_down_write(&c->mark_lock);
 
        /*
-        * For this to be correct at runtime, we'll need to figure out a way for
-        * it to actually lock the key in the btree key cache:
+        * indicate to stripe code that we need to allocate for the gc stripes
+        * radix tree, too
         */
-
-       if (!initial) {
-               ret = bch2_btree_key_cache_flush(trans,
-                               BTREE_ID_alloc, iter->pos);
-               if (ret)
-                       return ret;
-       }
-
-       k = bch2_btree_iter_peek_slot(iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-
-       old_u = new_u   = bch2_alloc_unpack(k);
-
-       percpu_down_read(&c->mark_lock);
-       g       = gc_bucket(ca, iter->pos.offset);
-       gc_u = (struct bkey_alloc_unpacked) {
-               .dev            = iter->pos.inode,
-               .bucket         = iter->pos.offset,
-               .gen            = g->mark.gen,
-               .oldest_gen     = g->oldest_gen,
-               .data_type      = g->mark.data_type,
-               .dirty_sectors  = g->mark.dirty_sectors,
-               .cached_sectors = g->mark.cached_sectors,
-               .read_time      = g->io_time[READ],
-               .write_time     = g->io_time[WRITE],
-               .stripe         = g->stripe,
-               .stripe_redundancy = g->stripe_redundancy,
-       };
-       percpu_up_read(&c->mark_lock);
-
-       if (metadata_only &&
-           gc_u.data_type != BCH_DATA_sb &&
-           gc_u.data_type != BCH_DATA_journal &&
-           gc_u.data_type != BCH_DATA_btree)
-               return 0;
-
-       if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
-           gen_after(old_u.gen, gc_u.gen))
-               return 0;
-
-#define copy_bucket_field(_f)                                          \
-       if (fsck_err_on(new_u._f != gc_u._f, c,                         \
-                       "bucket %llu:%llu gen %u data type %s has wrong " #_f   \
-                       ": got %u, should be %u",                       \
-                       iter->pos.inode, iter->pos.offset,              \
-                       new_u.gen,                                      \
-                       bch2_data_types[new_u.data_type],               \
-                       new_u._f, gc_u._f))                             \
-               new_u._f = gc_u._f;                                     \
-
-       copy_bucket_field(gen);
-       copy_bucket_field(data_type);
-       copy_bucket_field(stripe);
-       copy_bucket_field(dirty_sectors);
-       copy_bucket_field(cached_sectors);
-       copy_bucket_field(stripe_redundancy);
-       copy_bucket_field(stripe);
-#undef copy_bucket_field
-
-       new_u.oldest_gen = gc_u.oldest_gen;
-
-       if (!bkey_alloc_unpacked_cmp(old_u, new_u))
-               return 0;
-
-       a = bch2_alloc_pack(trans, new_u);
-       if (IS_ERR(a))
-               return PTR_ERR(a);
-
-       ret = initial
-               ? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k)
-               : bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
-fsck_err:
-       return ret;
-}
-
-static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only)
-{
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct bch_dev *ca;
-       unsigned i;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c, 0, 0);
+       gc_pos_set(c, gc_phase(GC_PHASE_START));
 
        for_each_member_device(ca, c, i) {
-               for_each_btree_key(&trans, iter, BTREE_ID_alloc,
-                                  POS(ca->dev_idx, ca->mi.first_bucket),
-                                  BTREE_ITER_SLOTS|
-                                  BTREE_ITER_PREFETCH, k, ret) {
-                       if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
-                               break;
+               struct bucket_array *dst = __bucket_array(ca, 1);
+               struct bucket_array *src = __bucket_array(ca, 0);
+               size_t b;
 
-                       ret = __bch2_trans_do(&trans, NULL, NULL,
-                                             BTREE_INSERT_LAZY_RW,
-                                       bch2_alloc_write_key(&trans, &iter,
-                                                            initial, metadata_only));
-                       if (ret)
-                               break;
-               }
-               bch2_trans_iter_exit(&trans, &iter);
+               dst->first_bucket       = src->first_bucket;
+               dst->nbuckets           = src->nbuckets;
 
-               if (ret) {
-                       bch_err(c, "error writing alloc info: %i", ret);
-                       percpu_ref_put(&ca->ref);
-                       break;
-               }
-       }
+               for (b = 0; b < src->nbuckets; b++) {
+                       struct bucket *d = &dst->b[b];
+                       struct bucket *s = &src->b[b];
 
-       bch2_trans_exit(&trans);
-       return ret;
-}
-
-static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only)
-{
-       struct bch_dev *ca;
-       unsigned i;
+                       d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
+                       d->gen_valid = s->gen_valid;
 
-       for_each_member_device(ca, c, i) {
-               struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
-                               ca->mi.nbuckets * sizeof(struct bucket),
-                               GFP_KERNEL|__GFP_ZERO);
-               if (!buckets) {
-                       percpu_ref_put(&ca->ref);
-                       percpu_up_write(&c->mark_lock);
-                       bch_err(c, "error allocating ca->buckets[gc]");
-                       return -ENOMEM;
+                       if (metadata_only &&
+                           (s->mark.data_type == BCH_DATA_user ||
+                            s->mark.data_type == BCH_DATA_cached))
+                               d->_mark = s->mark;
                }
-
-               buckets->first_bucket   = ca->mi.first_bucket;
-               buckets->nbuckets       = ca->mi.nbuckets;
-               rcu_assign_pointer(ca->buckets[1], buckets);
        };
 
-       return bch2_alloc_read(c, true, metadata_only);
-}
-
-static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_only)
-{
-       struct bch_dev *ca;
-       unsigned i;
-
-       for_each_member_device(ca, c, i) {
-               struct bucket_array *buckets = __bucket_array(ca, true);
-               struct bucket *g;
+       percpu_up_write(&c->mark_lock);
 
-               for_each_bucket(g, buckets) {
-                       if (metadata_only &&
-                           (g->mark.data_type == BCH_DATA_user ||
-                            g->mark.data_type == BCH_DATA_cached ||
-                            g->mark.data_type == BCH_DATA_parity))
-                               continue;
-                       g->_mark.dirty_sectors = 0;
-                       g->_mark.cached_sectors = 0;
-               }
-       };
+       return 0;
 }
 
 static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
@@ -1535,55 +1430,6 @@ fsck_err:
        return ret;
 }
 
-static void bch2_gc_reflink_reset(struct bch_fs *c, bool initial,
-                                 bool metadata_only)
-{
-       struct genradix_iter iter;
-       struct reflink_gc *r;
-
-       genradix_for_each(&c->reflink_gc_table, iter, r)
-               r->refcount = 0;
-}
-
-static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
-                                bool metadata_only)
-{
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct reflink_gc *r;
-       int ret = 0;
-
-       if (metadata_only)
-               return 0;
-
-       bch2_trans_init(&trans, c, 0, 0);
-       c->reflink_gc_nr = 0;
-
-       for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               const __le64 *refcount = bkey_refcount_c(k);
-
-               if (!refcount)
-                       continue;
-
-               r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
-                                      GFP_KERNEL);
-               if (!r) {
-                       ret = -ENOMEM;
-                       break;
-               }
-
-               r->offset       = k.k->p.offset;
-               r->size         = k.k->size;
-               r->refcount     = 0;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
-
-       bch2_trans_exit(&trans);
-       return ret;
-}
-
 static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
                                bool metadata_only)
 {
@@ -1647,10 +1493,43 @@ fsck_err:
        return ret;
 }
 
-static void bch2_gc_stripes_reset(struct bch_fs *c, bool initial,
-                               bool metadata_only)
+static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
+                                bool metadata_only)
 {
-       genradix_free(&c->gc_stripes);
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct reflink_gc *r;
+       int ret = 0;
+
+       if (metadata_only)
+               return 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       c->reflink_gc_nr = 0;
+
+       for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
+                          BTREE_ITER_PREFETCH, k, ret) {
+               const __le64 *refcount = bkey_refcount_c(k);
+
+               if (!refcount)
+                       continue;
+
+               r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
+                                      GFP_KERNEL);
+               if (!r) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               r->offset       = k.k->p.offset;
+               r->size         = k.k->size;
+               r->refcount     = 0;
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+
+       bch2_trans_exit(&trans);
+       return ret;
 }
 
 /**
@@ -1686,14 +1565,11 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
        /* flush interior btree updates: */
        closure_wait_event(&c->btree_interior_update_wait,
                           !bch2_btree_interior_updates_nr_pending(c));
-
+again:
        ret   = bch2_gc_start(c, metadata_only) ?:
-               bch2_gc_alloc_start(c, initial, metadata_only) ?:
                bch2_gc_reflink_start(c, initial, metadata_only);
        if (ret)
                goto out;
-again:
-       gc_pos_set(c, gc_phase(GC_PHASE_START));
 
        bch2_mark_superblocks(c);
 
@@ -1731,40 +1607,40 @@ again:
 
        if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
            (!iter && bch2_test_restart_gc)) {
-               if (iter++ > 2) {
-                       bch_info(c, "Unable to fix bucket gens, looping");
-                       ret = -EINVAL;
-                       goto out;
-               }
-
                /*
                 * XXX: make sure gens we fixed got saved
                 */
-               bch_info(c, "Second GC pass needed, restarting:");
-               clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
-               __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+               if (iter++ <= 2) {
+                       bch_info(c, "Second GC pass needed, restarting:");
+                       clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+                       __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+
+                       percpu_down_write(&c->mark_lock);
+                       bch2_gc_free(c);
+                       percpu_up_write(&c->mark_lock);
+                       /* flush fsck errors, reset counters */
+                       bch2_flush_fsck_errs(c);
 
-               bch2_gc_stripes_reset(c, initial, metadata_only);
-               bch2_gc_alloc_reset(c, initial, metadata_only);
-               bch2_gc_reflink_reset(c, initial, metadata_only);
+                       goto again;
+               }
 
-               /* flush fsck errors, reset counters */
-               bch2_flush_fsck_errs(c);
-               goto again;
+               bch_info(c, "Unable to fix bucket gens, looping");
+               ret = -EINVAL;
        }
 out:
        if (!ret) {
                bch2_journal_block(&c->journal);
 
-               ret   = bch2_gc_stripes_done(c, initial, metadata_only) ?:
-                       bch2_gc_reflink_done(c, initial, metadata_only) ?:
-                       bch2_gc_alloc_done(c, initial, metadata_only) ?:
+               percpu_down_write(&c->mark_lock);
+               ret   = bch2_gc_reflink_done(c, initial, metadata_only) ?:
+                       bch2_gc_stripes_done(c, initial, metadata_only) ?:
                        bch2_gc_done(c, initial, metadata_only);
 
                bch2_journal_unblock(&c->journal);
+       } else {
+               percpu_down_write(&c->mark_lock);
        }
 
-       percpu_down_write(&c->mark_lock);
        /* Indicates that gc is no longer in progress: */
        __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
 
index db179013cfe82a866df5d69fb9dfad09588ed551..2ae4e523ff3b20192cc0d07992d5cc22c0a567ad 100644 (file)
@@ -2182,23 +2182,6 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
        return ret;
 }
 
-static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
-                                                     enum btree_id btree_id,
-                                                     struct bpos pos)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update(trans, i)
-               if ((cmp_int(btree_id,  i->btree_id) ?:
-                    bpos_cmp(pos,      i->k->k.p)) <= 0) {
-                       if (btree_id == i->btree_id)
-                               return i->k;
-                       break;
-               }
-
-       return NULL;
-}
-
 static noinline
 struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
                                          struct btree_path *path)
index 90ea018d6eb5ab18be43e121f019a5b65374ea29..16ebf1a2b1f9977455491a303219b5eafbab7530 100644 (file)
@@ -135,4 +135,21 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
             (_i) < (_trans)->updates + (_trans)->nr_updates;           \
             (_i)++)
 
+static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
+                                                     enum btree_id btree_id,
+                                                     struct bpos pos)
+{
+       struct btree_insert_entry *i;
+
+       trans_for_each_update(trans, i)
+               if ((cmp_int(btree_id,  i->btree_id) ?:
+                    bpos_cmp(pos,      i->k->k.p)) <= 0) {
+                       if (btree_id == i->btree_id)
+                               return i->k;
+                       break;
+               }
+
+       return NULL;
+}
+
 #endif /* _BCACHEFS_BTREE_UPDATE_H */
index fb0f64f084ea493e12340e0b7788f0105b3f3c2e..895ff2555662a4ae90626d246b4e6315e4284c53 100644 (file)
@@ -1459,22 +1459,24 @@ static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-       struct bkey_s_c k;
+       struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
+       struct bkey_i *update = btree_trans_peek_updates(trans, BTREE_ID_alloc, pos);
        int ret;
 
-       bch2_trans_iter_init(trans, iter, BTREE_ID_alloc,
-                            POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)),
-                            BTREE_ITER_WITH_UPDATES|
+       bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
                             BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
                             BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_slot(iter);
-       ret = bkey_err(k);
+       ret = bch2_btree_iter_traverse(iter);
        if (ret) {
                bch2_trans_iter_exit(trans, iter);
                return ret;
        }
 
-       *u = bch2_alloc_unpack(k);
+       *u = update && !bpos_cmp(update->k.p, pos)
+               ? bch2_alloc_unpack(bkey_i_to_s_c(update))
+               : alloc_mem_to_key(c, iter);
+
        return 0;
 }
 
index 7e4400cc02a9249381e5c03b6b7ff7776c4ee910..b818093eab39333265a9a4004b49bc6da0bee198 100644 (file)
@@ -1095,11 +1095,7 @@ use_clean:
 
        bch_verbose(c, "starting alloc read");
        err = "error reading allocation information";
-
-       down_read(&c->gc_lock);
-       ret = bch2_alloc_read(c, false, false);
-       up_read(&c->gc_lock);
-
+       ret = bch2_alloc_read(c);
        if (ret)
                goto err;
        bch_verbose(c, "alloc read done");
@@ -1157,6 +1153,23 @@ use_clean:
        if (c->opts.verbose || !c->sb.clean)
                bch_info(c, "journal replay done");
 
+       if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
+           !c->opts.nochanges) {
+               /*
+                * note that even when filesystem was clean there might be work
+                * to do here, if we ran gc (because of fsck) which recalculated
+                * oldest_gen:
+                */
+               bch_verbose(c, "writing allocation info");
+               err = "error writing out alloc info";
+               ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW);
+               if (ret) {
+                       bch_err(c, "error writing alloc info");
+                       goto err;
+               }
+               bch_verbose(c, "alloc write done");
+       }
+
        if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
                bch2_fs_lazy_rw(c);