]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to d82da7126f fixup! bcachefs: for_each_btree_key2()
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 17 Jul 2022 09:20:47 +0000 (05:20 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Sun, 17 Jul 2022 09:20:54 +0000 (05:20 -0400)
.bcachefs_revision
include/linux/jiffies.h
libbcachefs/alloc_background.c
libbcachefs/btree_gc.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/errcode.h
libbcachefs/fsck.c
libbcachefs/quota.c

index 32ec17bf372840527020d1ce08db4331c7ff6865..0040d1deee29e48d233d7d87988a36b571e31541 100644 (file)
@@ -1 +1 @@
-2f11bb05b0df04b7e0e190fd27b111e9f20cd749
+d82da7126f2db01a0d320ad7ed13cd4016c36221
index c3f3e1f20cbd1cb0de77e3087c097c495dc5a7c3..4fd3b68d4cfe63cca48e8b27c01310c47735157b 100644 (file)
@@ -81,6 +81,11 @@ static inline u64 local_clock(void)
        return sched_clock();
 }
 
+static inline u64 ktime_get_ns(void)
+{
+       return sched_clock();
+}
+
 #define jiffies                        nsecs_to_jiffies(sched_clock())
 
 #endif
index fc77747c78c49b3507f793dd07fe7f1aa2d79a15..ca1f45cc80b7f24de2be1cf86886e7cb796ecbfb 100644 (file)
@@ -1007,16 +1007,10 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_NOFAIL|
-                                     BTREE_INSERT_LAZY_RW,
-                       bch2_check_alloc_to_lru_ref(&trans, &iter));
-               if (ret)
-                       break;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+       for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+                       POS_MIN, BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+               bch2_check_alloc_to_lru_ref(&trans, &iter));
 
        bch2_trans_exit(&trans);
        return ret < 0 ? ret : 0;
index e260689ba830cc97f2c169754366baef967a4f43..214529b613f97c40f7d8733269ce3a928fadddc0 100644 (file)
@@ -1848,10 +1848,15 @@ out:
        return ret;
 }
 
-static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
+static int gc_btree_gens_key(struct btree_trans *trans,
+                            struct btree_iter *iter,
+                            struct bkey_s_c k)
 {
+       struct bch_fs *c = trans->c;
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr;
+       struct bkey_i *u;
+       int ret;
 
        percpu_down_read(&c->mark_lock);
        bkey_for_each_ptr(ptrs, ptr) {
@@ -1859,7 +1864,7 @@ static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
 
                if (ptr_stale(ca, ptr) > 16) {
                        percpu_up_read(&c->mark_lock);
-                       return true;
+                       goto update;
                }
        }
 
@@ -1871,77 +1876,27 @@ static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
                        *gen = ptr->gen;
        }
        percpu_up_read(&c->mark_lock);
+       return 0;
+update:
+       u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       ret = PTR_ERR_OR_ZERO(u);
+       if (ret)
+               return ret;
 
-       return false;
-}
-
-/*
- * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
- * node pointers currently never have cached pointers that can become stale:
- */
-static int bch2_gc_btree_gens(struct btree_trans *trans, enum btree_id btree_id)
-{
-       struct bch_fs *c = trans->c;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct bkey_buf sk;
-       int ret = 0, commit_err = 0;
-
-       bch2_bkey_buf_init(&sk);
-
-       bch2_trans_iter_init(trans, &iter, btree_id, POS_MIN,
-                            BTREE_ITER_PREFETCH|
-                            BTREE_ITER_NOT_EXTENTS|
-                            BTREE_ITER_ALL_SNAPSHOTS);
-
-       while ((bch2_trans_begin(trans),
-               k = bch2_btree_iter_peek(&iter)).k) {
-               ret = bkey_err(k);
-
-               if (ret == -EINTR)
-                       continue;
-               if (ret)
-                       break;
-
-               c->gc_gens_pos = iter.pos;
-
-               if (gc_btree_gens_key(c, k) && !commit_err) {
-                       bch2_bkey_buf_reassemble(&sk, c, k);
-                       bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-
-                       commit_err =
-                               bch2_trans_update(trans, &iter, sk.k, 0) ?:
-                               bch2_trans_commit(trans, NULL, NULL,
-                                                 BTREE_INSERT_NOWAIT|
-                                                 BTREE_INSERT_NOFAIL);
-                       if (commit_err == -EINTR) {
-                               commit_err = 0;
-                               continue;
-                       }
-               }
-
-               bch2_btree_iter_advance(&iter);
-       }
-       bch2_trans_iter_exit(trans, &iter);
-
-       bch2_bkey_buf_exit(&sk, c);
+       bkey_reassemble(u, k);
 
-       return ret;
+       bch2_extent_normalize(c, bkey_i_to_s(u));
+       return bch2_trans_update(trans, iter, u, 0);
 }
 
-static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter)
+static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter,
+                                      struct bkey_s_c k)
 {
        struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode);
-       struct bkey_s_c k;
        struct bch_alloc_v4 a;
        struct bkey_i_alloc_v4 *a_mut;
        int ret;
 
-       k = bch2_btree_iter_peek_slot(iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-
        bch2_alloc_to_v4(k, &a);
 
        if (a.oldest_gen == ca->oldest_gen[iter->pos.offset])
@@ -2001,26 +1956,35 @@ int bch2_gc_gens(struct bch_fs *c)
 
        for (i = 0; i < BTREE_ID_NR; i++)
                if ((1 << i) & BTREE_ID_HAS_PTRS) {
+                       struct btree_iter iter;
+                       struct bkey_s_c k;
+
                        c->gc_gens_btree = i;
                        c->gc_gens_pos = POS_MIN;
-                       ret = bch2_gc_btree_gens(&trans, i);
+                       ret = for_each_btree_key_commit(&trans, iter, i,
+                                       POS_MIN,
+                                       BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
+                                       k,
+                                       NULL, NULL,
+                                       BTREE_INSERT_NOFAIL,
+                               gc_btree_gens_key(&trans, &iter, k));
                        if (ret) {
                                bch_err(c, "error recalculating oldest_gen: %i", ret);
                                goto err;
                        }
                }
 
-       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_NOFAIL,
-                               bch2_alloc_write_oldest_gen(&trans, &iter));
-               if (ret) {
-                       bch_err(c, "error writing oldest_gen: %i", ret);
-                       break;
-               }
+       ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+                       POS_MIN,
+                       BTREE_ITER_PREFETCH,
+                       k,
+                       NULL, NULL,
+                       BTREE_INSERT_NOFAIL,
+               bch2_alloc_write_oldest_gen(&trans, &iter, k));
+       if (ret) {
+               bch_err(c, "error writing oldest_gen: %i", ret);
+               goto err;
        }
-       bch2_trans_iter_exit(&trans, &iter);
 
        c->gc_gens_btree        = 0;
        c->gc_gens_pos          = POS_MIN;
index 05bd0d60b5fb929bda98d319d75789c422e4424c..e014dfc7e62eca3040a1f0a2be154487fba6b1d0 100644 (file)
@@ -1663,6 +1663,9 @@ out:
 int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
                                          struct btree_path *path, unsigned flags)
 {
+       if (!(local_clock() % 128))
+               return btree_trans_restart(trans);
+
        if (path->uptodate < BTREE_ITER_NEED_RELOCK)
                return 0;
 
@@ -3242,12 +3245,19 @@ void bch2_trans_begin(struct btree_trans *trans)
                        path->preserve = false;
        }
 
-       bch2_trans_cond_resched(trans);
+       if (!trans->restarted &&
+           (need_resched() ||
+            ktime_get_ns() - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
+               bch2_trans_unlock(trans);
+               cond_resched();
+               bch2_trans_relock(trans);
+       }
 
        if (trans->restarted)
                bch2_btree_path_traverse_all(trans);
 
        trans->restarted = false;
+       trans->last_begin_time = ktime_get_ns();
 }
 
 static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
@@ -3281,6 +3291,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
        memset(trans, 0, sizeof(*trans));
        trans->c                = c;
        trans->fn               = fn;
+       trans->last_begin_time  = ktime_get_ns();
        trans->task             = current;
 
        bch2_trans_alloc_paths(trans, c);
index 22da3e4bfe715c116914f403b41107a47c7a0aae..4b9d03b875ef4740dd7524b441abb6801d11c0c6 100644 (file)
@@ -367,8 +367,10 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
        do {                                                            \
                bch2_trans_begin(_trans);                               \
                (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags));   \
-               if (!(_k).k)                                            \
+               if (!(_k).k) {                                          \
+                       _ret = 0;                                       \
                        break;                                          \
+               }                                                       \
                                                                        \
                _ret = bkey_err(_k) ?: (_do);                           \
                if (!_ret)                                              \
index 1e4d1fecc6bd336539025e15d7de80b3f50028a0..be12c9ff7ea3cf7d823d8a6dff0ce162892c6d23 100644 (file)
@@ -384,10 +384,13 @@ struct btree_trans_commit_hook {
 
 #define BTREE_TRANS_MEM_MAX    (1U << 16)
 
+#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS      10000
+
 struct btree_trans {
        struct bch_fs           *c;
        const char              *fn;
        struct list_head        list;
+       u64                     last_begin_time;
        struct btree            *locking;
        unsigned                locking_path_idx;
        struct bpos             locking_pos;
index f7d12915c1ccda5f7674dcb9fca7098a030f2bba..0581f3c7a0d82c421ef2250486112dc107abe391 100644 (file)
@@ -7,6 +7,7 @@ enum {
        OPEN_BUCKETS_EMPTY =    2048,
        FREELIST_EMPTY,         /* Allocator thread not keeping up */
        INSUFFICIENT_DEVICES,
+       NEED_SNAPSHOT_CLEANUP,
 };
 
 #endif /* _BCACHFES_ERRCODE_H */
index 787658f52982bc9977ca0d6b1e903f117d90e9c1..6165878c2ddc4c90172b4aa7d7c7d0d370f97788 100644 (file)
@@ -489,6 +489,28 @@ static inline void snapshots_seen_init(struct snapshots_seen *s)
        memset(s, 0, sizeof(*s));
 }
 
+static int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
+{
+       struct snapshots_seen_entry *i, n = { id, id };
+       int ret;
+
+       darray_for_each(s->ids, i) {
+               if (n.equiv < i->equiv)
+                       break;
+
+               if (i->equiv == n.equiv) {
+                       bch_err(c, "adding duplicate snapshot in snapshots_seen_add()");
+                       return -EINVAL;
+               }
+       }
+
+       ret = darray_insert_item(&s->ids, i - s->ids.data, n);
+       if (ret)
+               bch_err(c, "error reallocating snapshots_seen table (size %zu)",
+                       s->ids.size);
+       return ret;
+}
+
 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
                                 enum btree_id btree_id, struct bpos pos)
 {
@@ -512,7 +534,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
                                        bch2_btree_ids[btree_id],
                                        pos.inode, pos.offset,
                                        i->id, n.id, n.equiv);
-                               return -EINVAL;
+                               return -NEED_SNAPSHOT_CLEANUP;
                        }
 
                        return 0;
@@ -954,7 +976,7 @@ static int check_inode(struct btree_trans *trans,
        }
 
        if (do_update) {
-               ret = write_inode(trans, &u, iter->pos.snapshot);
+               ret = __write_inode(trans, &u, iter->pos.snapshot);
                if (ret)
                        bch_err(c, "error in fsck: error %i "
                                "updating inode", ret);
@@ -1216,20 +1238,38 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                goto out;
        }
 
-       if (!bch2_snapshot_internal_node(c, equiv.snapshot)) {
-               for_each_visible_inode(c, s, inode, equiv.snapshot, i) {
-                       if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-                                       k.k->type != KEY_TYPE_reservation &&
-                                       k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
-                                       "extent type %u offset %llu past end of inode %llu, i_size %llu",
-                                       k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) {
-                               bch2_fs_lazy_rw(c);
-                               ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
-                                               SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9,
-                                                    equiv.snapshot),
-                                               POS(k.k->p.inode, U64_MAX),
-                                               0, NULL) ?: -EINTR;
-                               goto out;
+       /*
+        * Check inodes in reverse order, from oldest snapshots to newest, so
+        * that we emit the fewest number of whiteouts necessary:
+        */
+       for (i = inode->inodes.data + inode->inodes.nr - 1;
+            i >= inode->inodes.data;
+            --i) {
+               if (i->snapshot > equiv.snapshot ||
+                   !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot))
+                       continue;
+
+               if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+                               k.k->type != KEY_TYPE_reservation &&
+                               k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
+                               "extent type past end of inode %llu:%u, i_size %llu\n  %s",
+                               i->inode.bi_inum, i->snapshot, i->inode.bi_size,
+                               (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+                       struct btree_iter iter2;
+
+                       bch2_trans_copy_iter(&iter2, iter);
+                       bch2_btree_iter_set_snapshot(&iter2, i->snapshot);
+                       ret =   bch2_btree_iter_traverse(&iter2) ?:
+                               bch2_btree_delete_at(trans, &iter2,
+                                       BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+                       bch2_trans_iter_exit(trans, &iter2);
+                       if (ret)
+                               goto err;
+
+                       if (i->snapshot != equiv.snapshot) {
+                               ret = snapshots_seen_add(c, s, i->snapshot);
+                               if (ret)
+                                       goto err;
                        }
                }
        }
@@ -2140,7 +2180,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
                            d.v->d_type != DT_SUBVOL)
                                inc_link(c, &s, links, range_start, range_end,
                                         le64_to_cpu(d.v->d_inum),
-                                        d.k->p.snapshot);
+                                        bch2_snapshot_equiv(c, d.k->p.snapshot));
                        break;
                }
        }
@@ -2326,7 +2366,9 @@ static int fix_reflink_p(struct bch_fs *c)
  */
 int bch2_fsck_full(struct bch_fs *c)
 {
-       return  bch2_fs_check_snapshots(c) ?:
+       int ret;
+again:
+       ret =   bch2_fs_check_snapshots(c) ?:
                bch2_fs_check_subvols(c) ?:
                bch2_delete_dead_snapshots(c) ?:
                check_inodes(c, true) ?:
@@ -2337,6 +2379,13 @@ int bch2_fsck_full(struct bch_fs *c)
                check_directory_structure(c) ?:
                check_nlinks(c) ?:
                fix_reflink_p(c);
+
+       if (ret == -NEED_SNAPSHOT_CLEANUP) {
+               set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
+               goto again;
+       }
+
+       return ret;
 }
 
 int bch2_fsck_walk_inodes_only(struct bch_fs *c)
index d764dc7abfe8b76ccc6db1507b53b31a17488ea1..e35a6d1f31e907a5d9d706201ba6cc9428761cd8 100644 (file)
@@ -455,22 +455,14 @@ static void bch2_sb_quota_read(struct bch_fs *c)
 }
 
 static int bch2_fs_quota_read_inode(struct btree_trans *trans,
-                                   struct btree_iter *iter)
+                                   struct btree_iter *iter,
+                                   struct bkey_s_c k)
 {
        struct bch_fs *c = trans->c;
        struct bch_inode_unpacked u;
        struct bch_subvolume subvolume;
-       struct bkey_s_c k;
        int ret;
 
-       k = bch2_btree_iter_peek(iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-
-       if (!k.k)
-               return 1;
-
        ret = bch2_snapshot_get_subvol(trans, k.k->p.snapshot, &subvolume);
        if (ret)
                return ret;
@@ -503,6 +495,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
        struct bch_memquota_type *q;
        struct btree_trans trans;
        struct btree_iter iter;
+       struct bkey_s_c k;
        int ret;
 
        mutex_lock(&c->sb_lock);
@@ -517,18 +510,18 @@ int bch2_fs_quota_read(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN,
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_inodes,
+                            POS_MIN,
                             BTREE_ITER_INTENT|
                             BTREE_ITER_PREFETCH|
-                            BTREE_ITER_ALL_SNAPSHOTS);
-       do {
-               ret = lockrestart_do(&trans,
-                                    bch2_fs_quota_read_inode(&trans, &iter));
-       } while (!ret);
-       bch2_trans_iter_exit(&trans, &iter);
+                            BTREE_ITER_ALL_SNAPSHOTS,
+                            k,
+               bch2_fs_quota_read_inode(&trans, &iter, k));
+       if (ret)
+               bch_err(c, "err reading inodes in quota init: %i", ret);
 
        bch2_trans_exit(&trans);
-       return ret < 0 ? ret : 0;
+       return ret;
 }
 
 /* Enable/disable/delete quotas for an entire filesystem: */