From: Kent Overstreet Date: Sun, 17 Jul 2022 09:20:47 +0000 (-0400) Subject: Update bcachefs sources to d82da7126f fixup! bcachefs: for_each_btree_key2() X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=79d39bd7cec19ea03356652d7add5e99b4da3a97;p=bcachefs-tools-debian Update bcachefs sources to d82da7126f fixup! bcachefs: for_each_btree_key2() --- diff --git a/.bcachefs_revision b/.bcachefs_revision index 32ec17b..0040d1d 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -2f11bb05b0df04b7e0e190fd27b111e9f20cd749 +d82da7126f2db01a0d320ad7ed13cd4016c36221 diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index c3f3e1f..4fd3b68 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -81,6 +81,11 @@ static inline u64 local_clock(void) return sched_clock(); } +static inline u64 ktime_get_ns(void) +{ + return sched_clock(); +} + #define jiffies nsecs_to_jiffies(sched_clock()) #endif diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index fc77747..ca1f45c 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -1007,16 +1007,10 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - bch2_check_alloc_to_lru_ref(&trans, &iter)); - if (ret) - break; - } - bch2_trans_iter_exit(&trans, &iter); + for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc, + POS_MIN, BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + bch2_check_alloc_to_lru_ref(&trans, &iter)); bch2_trans_exit(&trans); return ret < 0 ? ret : 0; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index e260689..214529b 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1848,10 +1848,15 @@ out: return ret; } -static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k) +static int gc_btree_gens_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; + struct bkey_i *u; + int ret; percpu_down_read(&c->mark_lock); bkey_for_each_ptr(ptrs, ptr) { @@ -1859,7 +1864,7 @@ static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k) if (ptr_stale(ca, ptr) > 16) { percpu_up_read(&c->mark_lock); - return true; + goto update; } } @@ -1871,77 +1876,27 @@ static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k) *gen = ptr->gen; } percpu_up_read(&c->mark_lock); + return 0; +update: + u = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + return ret; - return false; -} - -/* - * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree - * node pointers currently never have cached pointers that can become stale: - */ -static int bch2_gc_btree_gens(struct btree_trans *trans, enum btree_id btree_id) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf sk; - int ret = 0, commit_err = 0; - - bch2_bkey_buf_init(&sk); - - bch2_trans_iter_init(trans, &iter, btree_id, POS_MIN, - BTREE_ITER_PREFETCH| - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS); - - while ((bch2_trans_begin(trans), - k = bch2_btree_iter_peek(&iter)).k) { - ret = bkey_err(k); - - if (ret == -EINTR) - continue; - if (ret) - break; - - c->gc_gens_pos = iter.pos; - - if (gc_btree_gens_key(c, k) && !commit_err) { - bch2_bkey_buf_reassemble(&sk, c, k); - bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - - commit_err = - bch2_trans_update(trans, &iter, sk.k, 0) ?: - bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOWAIT| - BTREE_INSERT_NOFAIL); - if (commit_err == -EINTR) { - commit_err = 0; - continue; - } - } - - bch2_btree_iter_advance(&iter); - } - bch2_trans_iter_exit(trans, &iter); - - bch2_bkey_buf_exit(&sk, c); + bkey_reassemble(u, k); - return ret; + bch2_extent_normalize(c, bkey_i_to_s(u)); + return bch2_trans_update(trans, iter, u, 0); } -static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter) +static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) { struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode); - struct bkey_s_c k; struct bch_alloc_v4 a; struct bkey_i_alloc_v4 *a_mut; int ret; - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; - bch2_alloc_to_v4(k, &a); if (a.oldest_gen == ca->oldest_gen[iter->pos.offset]) @@ -2001,26 +1956,35 @@ int bch2_gc_gens(struct bch_fs *c) for (i = 0; i < BTREE_ID_NR; i++) if ((1 << i) & BTREE_ID_HAS_PTRS) { + struct btree_iter iter; + struct bkey_s_c k; + c->gc_gens_btree = i; c->gc_gens_pos = POS_MIN; - ret = bch2_gc_btree_gens(&trans, i); + ret = for_each_btree_key_commit(&trans, iter, i, + POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, + k, + NULL, NULL, + BTREE_INSERT_NOFAIL, + gc_btree_gens_key(&trans, &iter, k)); if (ret) { bch_err(c, "error recalculating oldest_gen: %i", ret); goto err; } } - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL, - bch2_alloc_write_oldest_gen(&trans, &iter)); - if (ret) { - bch_err(c, "error writing oldest_gen: %i", ret); - break; - } + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc, + POS_MIN, + BTREE_ITER_PREFETCH, + k, + NULL, NULL, + BTREE_INSERT_NOFAIL, + bch2_alloc_write_oldest_gen(&trans, &iter, k)); + if (ret) { + bch_err(c, "error writing oldest_gen: %i", ret); + goto err; } - bch2_trans_iter_exit(&trans, &iter); c->gc_gens_btree = 0; c->gc_gens_pos = POS_MIN; diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 05bd0d6..e014dfc 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1663,6 +1663,9 @@ out: int __must_check bch2_btree_path_traverse(struct btree_trans *trans, struct btree_path *path, unsigned flags) { + if (!(local_clock() % 128)) + return btree_trans_restart(trans); + if (path->uptodate < BTREE_ITER_NEED_RELOCK) return 0; @@ -3242,12 +3245,19 @@ void bch2_trans_begin(struct btree_trans *trans) path->preserve = false; } - bch2_trans_cond_resched(trans); + if (!trans->restarted && + (need_resched() || + ktime_get_ns() - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) { + bch2_trans_unlock(trans); + cond_resched(); + bch2_trans_relock(trans); + } if (trans->restarted) bch2_btree_path_traverse_all(trans); trans->restarted = false; + trans->last_begin_time = ktime_get_ns(); } static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) @@ -3281,6 +3291,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, memset(trans, 0, sizeof(*trans)); trans->c = c; trans->fn = fn; + trans->last_begin_time = ktime_get_ns(); trans->task = current; bch2_trans_alloc_paths(trans, c); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 22da3e4..4b9d03b 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -367,8 +367,10 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, do { \ bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ - if (!(_k).k) \ + if (!(_k).k) { \ + _ret = 0; \ break; \ + } \ \ _ret = bkey_err(_k) ?: (_do); \ if (!_ret) \ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 1e4d1fe..be12c9f 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -384,10 +384,13 @@ struct btree_trans_commit_hook { #define BTREE_TRANS_MEM_MAX (1U << 16) +#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 + struct btree_trans { struct bch_fs *c; const char *fn; struct list_head list; + u64 last_begin_time; struct btree *locking; unsigned locking_path_idx; struct bpos locking_pos; diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index f7d1291..0581f3c 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -7,6 +7,7 @@ enum { OPEN_BUCKETS_EMPTY = 2048, FREELIST_EMPTY, /* Allocator thread not keeping up */ INSUFFICIENT_DEVICES, + NEED_SNAPSHOT_CLEANUP, }; #endif /* _BCACHFES_ERRCODE_H */ diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 787658f..6165878 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -489,6 +489,28 @@ static inline void snapshots_seen_init(struct snapshots_seen *s) memset(s, 0, sizeof(*s)); } +static int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) +{ + struct snapshots_seen_entry *i, n = { id, id }; + int ret; + + darray_for_each(s->ids, i) { + if (n.equiv < i->equiv) + break; + + if (i->equiv == n.equiv) { + bch_err(c, "adding duplicate snapshot in snapshots_seen_add()"); + return -EINVAL; + } + } + + ret = darray_insert_item(&s->ids, i - s->ids.data, n); + if (ret) + bch_err(c, "error reallocating snapshots_seen table (size %zu)", + s->ids.size); + return ret; +} + static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, enum btree_id btree_id, struct bpos pos) { @@ -512,7 +534,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, bch2_btree_ids[btree_id], pos.inode, pos.offset, i->id, n.id, n.equiv); - return -EINVAL; + return -NEED_SNAPSHOT_CLEANUP; } return 0; @@ -954,7 +976,7 @@ static int check_inode(struct btree_trans *trans, } if (do_update) { - ret = write_inode(trans, &u, iter->pos.snapshot); + ret = __write_inode(trans, &u, iter->pos.snapshot); if (ret) bch_err(c, "error in fsck: error %i " "updating inode", ret); @@ -1216,20 +1238,38 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto out; } - if (!bch2_snapshot_internal_node(c, equiv.snapshot)) { - for_each_visible_inode(c, s, inode, equiv.snapshot, i) { - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != KEY_TYPE_reservation && - k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, - "extent type %u offset %llu past end of inode %llu, i_size %llu", - k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) { - bch2_fs_lazy_rw(c); - ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, - SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9, - equiv.snapshot), - POS(k.k->p.inode, U64_MAX), - 0, NULL) ?: -EINTR; - goto out; + /* + * Check inodes in reverse order, from oldest snapshots to newest, so + * that we emit the fewest number of whiteouts necessary: + */ + for (i = inode->inodes.data + inode->inodes.nr - 1; + i >= inode->inodes.data; + --i) { + if (i->snapshot > equiv.snapshot || + !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot)) + continue; + + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + k.k->type != KEY_TYPE_reservation && + k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, + "extent type past end of inode %llu:%u, i_size %llu\n %s", + i->inode.bi_inum, i->snapshot, i->inode.bi_size, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + struct btree_iter iter2; + + bch2_trans_copy_iter(&iter2, iter); + bch2_btree_iter_set_snapshot(&iter2, i->snapshot); + ret = bch2_btree_iter_traverse(&iter2) ?: + bch2_btree_delete_at(trans, &iter2, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + bch2_trans_iter_exit(trans, &iter2); + if (ret) + goto err; + + if (i->snapshot != equiv.snapshot) { + ret = snapshots_seen_add(c, s, i->snapshot); + if (ret) + goto err; } } } @@ -2140,7 +2180,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links d.v->d_type != DT_SUBVOL) inc_link(c, &s, links, range_start, range_end, le64_to_cpu(d.v->d_inum), - d.k->p.snapshot); + bch2_snapshot_equiv(c, d.k->p.snapshot)); break; } } @@ -2326,7 +2366,9 @@ static int fix_reflink_p(struct bch_fs *c) */ int bch2_fsck_full(struct bch_fs *c) { - return bch2_fs_check_snapshots(c) ?: + int ret; +again: + ret = bch2_fs_check_snapshots(c) ?: bch2_fs_check_subvols(c) ?: bch2_delete_dead_snapshots(c) ?: check_inodes(c, true) ?: @@ -2337,6 +2379,13 @@ int bch2_fsck_full(struct bch_fs *c) check_directory_structure(c) ?: check_nlinks(c) ?: fix_reflink_p(c); + + if (ret == -NEED_SNAPSHOT_CLEANUP) { + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); + goto again; + } + + return ret; } int bch2_fsck_walk_inodes_only(struct bch_fs *c) diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index d764dc7..e35a6d1 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -455,22 +455,14 @@ static void bch2_sb_quota_read(struct bch_fs *c) } static int bch2_fs_quota_read_inode(struct btree_trans *trans, - struct btree_iter *iter) + struct btree_iter *iter, + struct bkey_s_c k) { struct bch_fs *c = trans->c; struct bch_inode_unpacked u; struct bch_subvolume subvolume; - struct bkey_s_c k; int ret; - k = bch2_btree_iter_peek(iter); - ret = bkey_err(k); - if (ret) - return ret; - - if (!k.k) - return 1; - ret = bch2_snapshot_get_subvol(trans, k.k->p.snapshot, &subvolume); if (ret) return ret; @@ -503,6 +495,7 @@ int bch2_fs_quota_read(struct bch_fs *c) struct bch_memquota_type *q; struct btree_trans trans; struct btree_iter iter; + struct bkey_s_c k; int ret; mutex_lock(&c->sb_lock); @@ -517,18 +510,18 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN, + ret = for_each_btree_key2(&trans, iter, BTREE_ID_inodes, + POS_MIN, BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - do { - ret = lockrestart_do(&trans, - bch2_fs_quota_read_inode(&trans, &iter)); - } while (!ret); - bch2_trans_iter_exit(&trans, &iter); + BTREE_ITER_ALL_SNAPSHOTS, + k, + bch2_fs_quota_read_inode(&trans, &iter, k)); + if (ret) + bch_err(c, "err reading inodes in quota init: %i", ret); bch2_trans_exit(&trans); - return ret < 0 ? ret : 0; + return ret; } /* Enable/disable/delete quotas for an entire filesystem: */