]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 9922afc8b6 bcachefs: Add repair code for out of order...
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 29 Mar 2021 04:21:38 +0000 (00:21 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Mon, 29 Mar 2021 04:22:49 +0000 (00:22 -0400)
.bcachefs_revision
libbcachefs/btree_io.c
libbcachefs/btree_key_cache.h
libbcachefs/btree_update_leaf.c
libbcachefs/fs-common.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/journal_reclaim.c
libbcachefs/move.c

index 385c19f6011e37294a39d6e408fbaff01de1ad6a..2e71c6c8922058e6dff46dfa86dcc55c6d1edf8f 100644 (file)
@@ -1 +1 @@
-18686af68412ebfad9c2adc6ee976ffdb9e1b886
+9922afc8b6d6227f4193feef6442f8c3d881f78c
index b43d4468ad8ec1b9a27c0d9c53df3d3bacff6859..7fbacd9e0a96be590a81dc528c5ee0129fb3699f 100644 (file)
@@ -578,6 +578,10 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
                mutex_unlock(&c->sb_lock);
        }
 
+       btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
+                    BTREE_ERR_FATAL, c, ca, b, i,
+                    "BSET_SEPARATE_WHITEOUTS no longer supported");
+
        if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
                         BTREE_ERR_FIXABLE, c, ca, b, i,
                         "bset past end of btree node")) {
@@ -660,14 +664,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 {
        unsigned version = le16_to_cpu(i->version);
        struct bkey_packed *k, *prev = NULL;
-       bool seen_non_whiteout = false;
        int ret = 0;
 
-       if (!BSET_SEPARATE_WHITEOUTS(i)) {
-               seen_non_whiteout = true;
-               *whiteout_u64s = 0;
-       }
-
        for (k = i->start;
             k != vstruct_last(i);) {
                struct bkey_s u;
@@ -719,18 +717,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                                    BSET_BIG_ENDIAN(i), write,
                                    &b->format, k);
 
-               /*
-                * with the separate whiteouts thing (used for extents), the
-                * second set of keys actually can have whiteouts too, so we
-                * can't solely go off bkey_deleted()...
-                */
-
-               if (!seen_non_whiteout &&
-                   (!bkey_deleted(k) ||
-                    (prev && bkey_iter_cmp(b, prev, k) > 0))) {
-                       *whiteout_u64s = k->_data - i->_data;
-                       seen_non_whiteout = true;
-               } else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
+               if (prev && bkey_iter_cmp(b, prev, k) > 0) {
                        char buf1[80];
                        char buf2[80];
                        struct bkey up = bkey_unpack_key(b, prev);
@@ -739,10 +726,15 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                        bch2_bkey_to_text(&PBUF(buf2), u.k);
 
                        bch2_dump_bset(c, b, i, 0);
-                       btree_err(BTREE_ERR_FATAL, c, NULL, b, i,
-                                 "keys out of order: %s > %s",
-                                 buf1, buf2);
-                       /* XXX: repair this */
+
+                       if (btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i,
+                                     "keys out of order: %s > %s",
+                                     buf1, buf2)) {
+                               i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
+                               memmove_u64s_down(k, bkey_next(k),
+                                                 (u64 *) vstruct_end(i) - (u64 *) k);
+                               continue;
+                       }
                }
 
                prev = k;
index 02715cd258ab0e235557adb246d1639ab236b578..4e1e5a9c765614a4cdd3da686f8e657965bf3707 100644 (file)
@@ -1,6 +1,15 @@
 #ifndef _BCACHEFS_BTREE_KEY_CACHE_H
 #define _BCACHEFS_BTREE_KEY_CACHE_H
 
+static inline size_t bch2_nr_btree_keys_want_flush(struct bch_fs *c)
+{
+       size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
+       size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
+       size_t max_dirty = nr_keys / 4;
+
+       return max_t(ssize_t, 0, nr_dirty - max_dirty);
+}
+
 static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
 {
        size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
index 67a2c65bc8fb0ae3bf3b0cb666773982c05652db..221a600496373026e2d48f645edf78937d189836 100644 (file)
@@ -1188,7 +1188,7 @@ retry:
                goto retry;
        }
 
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_free(trans, iter);
        return ret;
 }
 
index 83c2168ce480a92169b68a22c82ea9a64a8a02dd..281a6135e59977f7d057c8b7958a85ec87435dfd 100644 (file)
@@ -36,7 +36,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
        if (!name)
                new_inode->bi_flags |= BCH_INODE_UNLINKED;
 
-       inode_iter = bch2_inode_create(trans, new_inode);
+       inode_iter = bch2_inode_create(trans, new_inode, U32_MAX);
        ret = PTR_ERR_OR_ZERO(inode_iter);
        if (ret)
                goto err;
@@ -80,6 +80,10 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
                new_inode->bi_dir_offset        = dir_offset;
        }
 
+       /* XXX use bch2_btree_iter_set_snapshot() */
+       inode_iter->snapshot = U32_MAX;
+       bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX));
+
        ret = bch2_inode_write(trans, inode_iter, new_inode);
 err:
        bch2_trans_iter_put(trans, inode_iter);
index 62788ae15eff3f9cf5078afaa93f9bd24f89f2db..acf128f06310ef06aab0d2efe123b3ca580e1fc6 100644 (file)
@@ -1361,6 +1361,7 @@ static int check_inode(struct btree_trans *trans,
                struct bkey_inode_buf p;
 
                bch2_inode_pack(c, &p, &u);
+               p.inode.k.p = iter->pos;
 
                ret = __bch2_trans_do(trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
index f1665ca85da694962c756b6b30b8490cf7b2aade..d4c328397156e9b102e9ce0424dcd6067be1581a 100644 (file)
@@ -471,12 +471,13 @@ static inline u32 bkey_generation(struct bkey_s_c k)
 }
 
 struct btree_iter *bch2_inode_create(struct btree_trans *trans,
-                                    struct bch_inode_unpacked *inode_u)
+                                    struct bch_inode_unpacked *inode_u,
+                                    u32 snapshot)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter *iter = NULL;
        struct bkey_s_c k;
-       u64 min, max, start, *hint;
+       u64 min, max, start, pos, *hint;
        int ret;
 
        u64 cpu = raw_smp_processor_id();
@@ -493,39 +494,70 @@ struct btree_iter *bch2_inode_create(struct btree_trans *trans,
 
        if (start >= max || start < min)
                start = min;
+
+       pos = start;
+       iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, pos),
+                                  BTREE_ITER_ALL_SNAPSHOTS|
+                                  BTREE_ITER_INTENT);
 again:
-       for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start),
-                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               if (bkey_cmp(iter->pos, POS(0, max)) > 0)
-                       break;
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k)) &&
+              bkey_cmp(k.k->p, POS(0, max)) < 0) {
+               while (pos < iter->pos.offset) {
+                       if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
+                               goto found_slot;
+
+                       pos++;
+               }
+
+               if (k.k->p.snapshot == snapshot &&
+                   k.k->type != KEY_TYPE_inode &&
+                   !bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
+                       bch2_btree_iter_next(iter);
+                       continue;
+               }
 
                /*
-                * There's a potential cache coherency issue with the btree key
-                * cache code here - we're iterating over the btree, skipping
-                * that cache. We should never see an empty slot that isn't
-                * actually empty due to a pending update in the key cache
-                * because the update that creates the inode isn't done with a
-                * cached iterator, but - better safe than sorry, check the
-                * cache before using a slot:
+                * We don't need to iterate over keys in every snapshot once
+                * we've found just one:
                 */
-               if (k.k->type != KEY_TYPE_inode &&
-                   !bch2_btree_key_cache_find(c, BTREE_ID_inodes, iter->pos))
+               pos = iter->pos.offset + 1;
+               bch2_btree_iter_set_pos(iter, POS(0, pos));
+       }
+
+       while (!ret && pos < max) {
+               if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
                        goto found_slot;
+
+               pos++;
        }
 
-       bch2_trans_iter_put(trans, iter);
+       if (!ret && start == min)
+               ret = -ENOSPC;
 
-       if (ret)
+       if (ret) {
+               bch2_trans_iter_put(trans, iter);
                return ERR_PTR(ret);
-
-       if (start != min) {
-               /* Retry from start */
-               start = min;
-               goto again;
        }
 
-       return ERR_PTR(-ENOSPC);
+       /* Retry from start */
+       pos = start = min;
+       bch2_btree_iter_set_pos(iter, POS(0, pos));
+       goto again;
 found_slot:
+       bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret) {
+               bch2_trans_iter_put(trans, iter);
+               return ERR_PTR(ret);
+       }
+
+       /* We may have raced while the iterator wasn't pointing at pos: */
+       if (k.k->type == KEY_TYPE_inode ||
+           bch2_btree_key_cache_find(c, BTREE_ID_inodes, k.k->p))
+               goto again;
+
        *hint                   = k.k->p.offset;
        inode_u->bi_inum        = k.k->p.offset;
        inode_u->bi_generation  = bkey_generation(k);
index 6bad6dfb79891ccf941ded70f125bb4d33b61bfb..23c322d9a85b0e64a0fea7717a0a5618b2d47a98 100644 (file)
@@ -70,7 +70,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
                     struct bch_inode_unpacked *);
 
 struct btree_iter *bch2_inode_create(struct btree_trans *,
-                                    struct bch_inode_unpacked *);
+                                    struct bch_inode_unpacked *, u32);
 
 int bch2_inode_rm(struct bch_fs *, u64, bool);
 
index 4a5b50ed71b0dd46519915f57706eb2d1d89be59..93b5e07e05bc503f1608a729fb69891a8838d171 100644 (file)
@@ -602,7 +602,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
                if (fifo_free(&j->pin) <= 32)
                        min_nr = 1;
 
-               min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c));
+               min_nr = max(min_nr, bch2_nr_btree_keys_want_flush(c));
 
                trace_journal_reclaim_start(c,
                                min_nr,
index 732e2dbbea87678deafdd5eeb97dccbb5bb46d2d..c9e18491b18122a3cb3a17af07942d6016812463 100644 (file)
@@ -509,6 +509,32 @@ err:
        return ret;
 }
 
+static int lookup_inode(struct btree_trans *trans, struct bpos pos,
+                       struct bch_inode_unpacked *inode)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, pos,
+                                  BTREE_ITER_ALL_SNAPSHOTS);
+       k = bch2_btree_iter_peek(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
+       if (ret)
+               goto err;
+
+       ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
+       if (ret)
+               goto err;
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
 static int __bch2_move_data(struct bch_fs *c,
                struct moving_context *ctxt,
                struct bch_ratelimit *rate,
@@ -566,7 +592,7 @@ static int __bch2_move_data(struct bch_fs *c,
                                try_to_freeze();
                        }
                } while (delay);
-peek:
+
                k = bch2_btree_iter_peek(iter);
 
                stats->pos = iter->pos;
@@ -586,14 +612,18 @@ peek:
                    cur_inum != k.k->p.inode) {
                        struct bch_inode_unpacked inode;
 
-                       /* don't hold btree locks while looking up inode: */
-                       bch2_trans_unlock(&trans);
-
                        io_opts = bch2_opts_to_inode_opts(c->opts);
-                       if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
+
+                       ret = lookup_inode(&trans,
+                                       SPOS(0, k.k->p.inode, k.k->p.snapshot),
+                                       &inode);
+                       if (ret == -EINTR)
+                               continue;
+
+                       if (!ret)
                                bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
+
                        cur_inum = k.k->p.inode;
-                       goto peek;
                }
 
                switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {