From 55142cd0b5ef2a2150d4708dad0c3fd54a3ffd39 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Jun 2021 20:45:30 -0400 Subject: [PATCH] Update bcachefs sources to ca3cfad39f fixup! bcachefs: Improve iter->should_be_locked --- .bcachefs_revision | 2 +- Kbuild.include | 6 +- libbcachefs/bcachefs.h | 1 + libbcachefs/btree_gc.c | 70 ++-- libbcachefs/btree_io.c | 39 +- libbcachefs/btree_iter.c | 69 ++-- libbcachefs/btree_iter.h | 17 +- libbcachefs/btree_types.h | 6 +- libbcachefs/btree_update_interior.c | 6 +- libbcachefs/btree_update_leaf.c | 77 ++-- libbcachefs/buckets.c | 581 +++++++++++++--------------- libbcachefs/buckets.h | 23 +- libbcachefs/ec.c | 6 +- libbcachefs/error.c | 1 + libbcachefs/error.h | 1 + libbcachefs/extent_update.c | 4 + libbcachefs/extents.h | 11 + libbcachefs/fs-common.c | 3 +- libbcachefs/fs-io.c | 50 +-- libbcachefs/fs-io.h | 3 +- libbcachefs/fs.c | 19 +- libbcachefs/fs.h | 4 + libbcachefs/fsck.c | 14 +- libbcachefs/inode.c | 17 +- libbcachefs/journal.c | 2 +- libbcachefs/recovery.c | 15 +- libbcachefs/reflink.c | 6 +- libbcachefs/super-io.c | 2 +- libbcachefs/super.c | 8 +- libbcachefs/tests.c | 9 +- libbcachefs/util.c | 6 +- libbcachefs/util.h | 5 +- linux/six.c | 2 - 33 files changed, 518 insertions(+), 567 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 14415fb..57211d2 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -b00cf89c56077d5e91c134d066ba2b45bc3136d7 +ca3cfad39f915257eecda93599c8b434ce80e0b2 diff --git a/Kbuild.include b/Kbuild.include index 08e0111..509e085 100644 --- a/Kbuild.include +++ b/Kbuild.include @@ -141,13 +141,9 @@ cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || e # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) -# ld-version -# Note this is mainly for HJ Lu's 3 number binutil versions -ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh) - # ld-ifversion # Usage: $(call ld-ifversion, -ge, 22252, y) -ld-ifversion = $(shell [ $(ld-version) $(1) $(2) ] && echo $(3) || echo $(4)) +ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4)) ###### diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 8be95d8..125cf09 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -497,6 +497,7 @@ enum { BCH_FS_ALLOCATOR_STOPPING, BCH_FS_INITIAL_GC_DONE, BCH_FS_INITIAL_GC_UNFIXED, + BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_BTREE_INTERIOR_REPLAY_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 8af6d32..5c03eb9 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -86,12 +86,17 @@ static int bch2_gc_check_topology(struct bch_fs *c, if (bpos_cmp(expected_start, bp->v.min_key)) { bch2_topology_error(c); - if (fsck_err(c, "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " cur %s", - bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, - (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2))) { + if (__fsck_err(c, + FSCK_CAN_FIX| + FSCK_CAN_IGNORE| + FSCK_NO_RATELIMIT, + "btree node with incorrect min_key at btree %s level %u:\n" + " prev %s\n" + " cur %s", + bch2_btree_ids[b->c.btree_id], b->c.level, + buf1, + (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)) && + !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); return FSCK_ERR_START_TOPOLOGY_REPAIR; } else { @@ -103,12 +108,17 @@ static int bch2_gc_check_topology(struct bch_fs *c, if (is_last && bpos_cmp(cur.k->k.p, node_end)) { bch2_topology_error(c); - if (fsck_err(c, "btree node with incorrect max_key at btree %s level %u:\n" - " %s\n" - " expected %s", - bch2_btree_ids[b->c.btree_id], b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1), - (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2))) { + if (__fsck_err(c, + FSCK_CAN_FIX| + FSCK_CAN_IGNORE| + FSCK_NO_RATELIMIT, + "btree node with incorrect max_key at btree %s level %u:\n" + " %s\n" + " expected %s", + bch2_btree_ids[b->c.btree_id], b->c.level, + (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1), + (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)) && + !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); return FSCK_ERR_START_TOPOLOGY_REPAIR; } else { @@ -286,6 +296,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b) if (!b->c.level) return 0; again: + prev = NULL; have_child = dropped_children = false; bch2_bkey_buf_init(&prev_k); bch2_bkey_buf_init(&cur_k); @@ -310,7 +321,7 @@ again: ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); if (ret) - goto err; + break; continue; } @@ -328,19 +339,24 @@ again: ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); if (ret) - goto err; + break; continue; } if (prev) six_unlock_read(&prev->c.lock); + prev = NULL; if (ret == DROP_PREV_NODE) { bch2_btree_node_evict(c, prev_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, prev_k.k->k.p); if (ret) - goto err; + break; + + bch2_btree_and_journal_iter_exit(&iter); + bch2_bkey_buf_exit(&prev_k, c); + bch2_bkey_buf_exit(&cur_k, c); goto again; } else if (ret) break; @@ -640,6 +656,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id, struct bkey_ptrs_c ptrs; const struct bch_extent_ptr *ptr; unsigned flags = + BTREE_TRIGGER_INSERT| BTREE_TRIGGER_GC| (initial ? BTREE_TRIGGER_NOATOMIC : 0); int ret = 0; @@ -681,7 +698,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id, *max_stale = max(*max_stale, ptr_stale(ca, ptr)); } - bch2_mark_key(c, *k, 0, k->k->size, NULL, 0, flags); + bch2_mark_key(c, *k, flags); fsck_err: err: if (ret) @@ -854,11 +871,16 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, if (ret == -EIO) { bch2_topology_error(c); - if (fsck_err(c, "Unreadable btree node at btree %s level %u:\n" - " %s", - bch2_btree_ids[b->c.btree_id], - b->c.level - 1, - (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur.k)), buf))) { + if (__fsck_err(c, + FSCK_CAN_FIX| + FSCK_CAN_IGNORE| + FSCK_NO_RATELIMIT, + "Unreadable btree node at btree %s level %u:\n" + " %s", + bch2_btree_ids[b->c.btree_id], + b->c.level - 1, + (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur.k)), buf)) && + !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { ret = FSCK_ERR_START_TOPOLOGY_REPAIR; bch_info(c, "Halting mark and sweep to start topology repair pass"); goto fsck_err; @@ -1052,8 +1074,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) for_each_pending_btree_node_free(c, as, d) if (d->index_update_done) bch2_mark_key(c, bkey_i_to_s_c(&d->key), - 0, 0, NULL, 0, - BTREE_TRIGGER_GC); + BTREE_TRIGGER_INSERT|BTREE_TRIGGER_GC); mutex_unlock(&c->btree_interior_update_lock); } @@ -1558,11 +1579,14 @@ again: if (ret) goto out; bch_info(c, "topology repair pass done"); + + set_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags); } ret = bch2_gc_btrees(c, initial, metadata_only); if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR && + !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) && !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); ret = 0; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 47cfd8a..f2ccb5f 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1179,31 +1179,27 @@ static void btree_node_read_all_replicas_done(struct closure *cl) container_of(cl, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; - bool have_good_copy = false; bool dump_bset_maps = false; bool have_retry = false; - int ret = 0, write = READ; + int ret = 0, best = -1, write = READ; unsigned i, written, written2; __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; for (i = 0; i < ra->nr; i++) { + struct btree_node *bn = ra->buf[i]; + if (ra->err[i]) continue; - if (!have_good_copy) { - memcpy(b->data, ra->buf[i], btree_bytes(c)); - have_good_copy = true; - written = btree_node_sectors_written(c, b->data); - } + if (le64_to_cpu(bn->magic) != bset_magic(c) || + (seq && seq != bn->keys.seq)) + continue; - /* Try to get the right btree node: */ - if (have_good_copy && - seq && - b->data->keys.seq != seq && - ((struct btree_node *) ra->buf[i])->keys.seq == seq) { - memcpy(b->data, ra->buf[i], btree_bytes(c)); - written = btree_node_sectors_written(c, b->data); + if (best < 0) { + best = i; + written = btree_node_sectors_written(c, bn); + continue; } written2 = btree_node_sectors_written(c, ra->buf[i]); @@ -1213,14 +1209,14 @@ static void btree_node_read_all_replicas_done(struct closure *cl) btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), BTREE_ERR_FIXABLE, c, NULL, b, NULL, "found bset signature after last bset") || - btree_err_on(memcmp(b->data, ra->buf[i], written << 9), + btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), BTREE_ERR_FIXABLE, c, NULL, b, NULL, "btree node replicas content mismatch")) dump_bset_maps = true; if (written2 > written) { written = written2; - memcpy(b->data, ra->buf[i], btree_bytes(c)); + best = i; } } fsck_err: @@ -1273,9 +1269,14 @@ fsck_err: } } - if (have_good_copy) - bch2_btree_node_read_done(c, NULL, b, false); - else + if (best >= 0) { + memcpy(b->data, ra->buf[best], btree_bytes(c)); + ret = bch2_btree_node_read_done(c, NULL, b, false); + } else { + ret = -1; + } + + if (ret) set_btree_node_read_error(b); for (i = 0; i < ra->nr; i++) { diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 64ceea4..c356032 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1808,35 +1808,54 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) { - struct bpos search_key = btree_iter_search_key(iter); + struct bpos search_key; struct bkey_s_c k; int ret; - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); + EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS && + btree_iter_type(iter) != BTREE_ITER_CACHED); bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); - btree_iter_set_search_pos(iter, search_key); - /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_IS_EXTENTS) && - iter->pos.offset == KEY_OFFSET_MAX) { + unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { if (iter->pos.inode == KEY_INODE_MAX) return bkey_s_c_null; bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); } + search_key = btree_iter_search_key(iter); + btree_iter_set_search_pos(iter, search_key); + ret = btree_iter_traverse(iter); if (unlikely(ret)) return bkey_s_c_err(ret); - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) { - struct bkey_i *next_update = btree_trans_peek_updates(iter, search_key); + if (btree_iter_type(iter) == BTREE_ITER_CACHED || + !(iter->flags & BTREE_ITER_IS_EXTENTS)) { + struct bkey_i *next_update; + struct bkey_cached *ck; - k = btree_iter_level_peek_all(iter, &iter->l[0]); - EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0); + switch (btree_iter_type(iter)) { + case BTREE_ITER_KEYS: + k = btree_iter_level_peek_all(iter, &iter->l[0]); + EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0); + break; + case BTREE_ITER_CACHED: + ck = (void *) iter->l[0].b; + EBUG_ON(iter->btree_id != ck->key.btree_id || + bkey_cmp(iter->pos, ck->key.pos)); + BUG_ON(!ck->valid); + k = bkey_i_to_s_c(ck->k); + break; + case BTREE_ITER_NODES: + BUG(); + } + + next_update = btree_trans_peek_updates(iter, search_key); if (next_update && (!k.k || bpos_cmp(next_update->k.p, k.k->p) <= 0)) { iter->k = next_update->k; @@ -1913,34 +1932,6 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) return bch2_btree_iter_peek_slot(iter); } -struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter) -{ - struct bkey_i *next_update; - struct bkey_cached *ck; - int ret; - - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED); - bch2_btree_iter_verify(iter); - - next_update = btree_trans_peek_updates(iter, iter->pos); - if (next_update && !bpos_cmp(next_update->k.p, iter->pos)) - return bkey_i_to_s_c(next_update); - - ret = btree_iter_traverse(iter); - if (unlikely(ret)) - return bkey_s_c_err(ret); - - ck = (void *) iter->l[0].b; - - EBUG_ON(iter->btree_id != ck->key.btree_id || - bkey_cmp(iter->pos, ck->key.pos)); - BUG_ON(!ck->valid); - - iter->should_be_locked = true; - - return bkey_i_to_s_c(ck->k); -} - static inline void bch2_btree_iter_init(struct btree_trans *trans, struct btree_iter *iter, enum btree_id btree_id) { @@ -2304,9 +2295,11 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags) { struct btree_iter *iter; - trans_for_each_iter(trans, iter) + trans_for_each_iter(trans, iter) { iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT| BTREE_ITER_SET_POS_AFTER_COMMIT); + iter->should_be_locked = false; + } bch2_trans_unlink_iters(trans); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index ba98cfe..6efea28 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -160,8 +160,6 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *); - bool bch2_btree_iter_advance(struct btree_iter *); bool bch2_btree_iter_rewind(struct btree_iter *); @@ -178,6 +176,12 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos iter->should_be_locked = false; } +static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter) +{ + BUG_ON(!(iter->flags & BTREE_ITER_IS_EXTENTS)); + iter->pos = bkey_start_pos(&iter->k); +} + static inline struct btree_iter *btree_iter_child(struct btree_iter *iter) { return iter->child_idx == U8_MAX ? NULL @@ -224,12 +228,9 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans) static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, unsigned flags) { - if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED) - return bch2_btree_iter_peek_cached(iter); - else - return flags & BTREE_ITER_SLOTS - ? bch2_btree_iter_peek_slot(iter) - : bch2_btree_iter_peek(iter); + return flags & BTREE_ITER_SLOTS + ? bch2_btree_iter_peek_slot(iter) + : bch2_btree_iter_peek(iter); } static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter, diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 982a3b1..a923757 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -644,7 +644,6 @@ enum btree_trigger_flags { __BTREE_TRIGGER_INSERT, __BTREE_TRIGGER_OVERWRITE, - __BTREE_TRIGGER_OVERWRITE_SPLIT, __BTREE_TRIGGER_GC, __BTREE_TRIGGER_BUCKET_INVALIDATE, @@ -655,12 +654,15 @@ enum btree_trigger_flags { #define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) #define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) -#define BTREE_TRIGGER_OVERWRITE_SPLIT (1U << __BTREE_TRIGGER_OVERWRITE_SPLIT) #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) #define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) +#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \ + ((1U << KEY_TYPE_stripe)| \ + (1U << KEY_TYPE_inode)) + static inline bool btree_node_type_needs_gc(enum btree_node_type type) { return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 2d8093d..cd21459 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -511,7 +511,7 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, ret = bch2_trans_mark_key(trans, bkey_s_c_null, bkey_i_to_s_c(k), - 0, 0, BTREE_TRIGGER_INSERT); + BTREE_TRIGGER_INSERT); if (ret) return ret; } @@ -520,7 +520,7 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k), bkey_s_c_null, - 0, 0, BTREE_TRIGGER_OVERWRITE); + BTREE_TRIGGER_OVERWRITE); if (ret) return ret; } @@ -937,6 +937,8 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level, int journal_flags = 0; int ret = 0; + BUG_ON(!iter->should_be_locked); + if (flags & BTREE_INSERT_JOURNAL_RESERVED) journal_flags |= JOURNAL_RES_GET_RESERVED; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 7939fbb..fa08470 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -362,7 +362,7 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans) BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED); if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) - bch2_mark_update(trans, i->iter, i->k, NULL, + bch2_mark_update(trans, i->iter, i->k, i->trigger_flags|BTREE_TRIGGER_GC); } } @@ -468,7 +468,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, trans_for_each_update(trans, i) if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) bch2_mark_update(trans, i->iter, i->k, - NULL, i->trigger_flags); + i->trigger_flags); if (marking && trans->fs_usage_deltas) bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas); @@ -771,28 +771,6 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) return 0; } -static int __btree_delete_at(struct btree_trans *trans, enum btree_id btree_id, - struct bpos pos, unsigned trigger_flags) -{ - struct btree_iter *iter; - struct bkey_i *update; - int ret; - - update = bch2_trans_kmalloc(trans, sizeof(struct bkey)); - if ((ret = PTR_ERR_OR_ZERO(update))) - return ret; - - bkey_init(&update->k); - update->k.p = pos; - - iter = bch2_trans_get_iter(trans, btree_id, pos, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_INTENT); - bch2_trans_update(trans, iter, update, trigger_flags); - bch2_trans_iter_put(trans, iter); - return 0; -} - static int extent_handle_overwrites(struct btree_trans *trans, struct btree_insert_entry *i) { @@ -812,8 +790,6 @@ static int extent_handle_overwrites(struct btree_trans *trans, goto out; if (bch2_bkey_maybe_mergable(k.k, &i->k->k)) { - struct bpos l_pos = k.k->p; - update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); if ((ret = PTR_ERR_OR_ZERO(update))) goto out; @@ -821,8 +797,11 @@ static int extent_handle_overwrites(struct btree_trans *trans, bkey_reassemble(update, k); if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(i->k))) { - ret = __btree_delete_at(trans, i->btree_id, l_pos, - i->trigger_flags); + update_iter = bch2_trans_copy_iter(trans, iter); + ret = bch2_btree_delete_at(trans, update_iter, + i->trigger_flags); + bch2_trans_iter_put(trans, update_iter); + if (ret) goto out; @@ -857,13 +836,20 @@ static int extent_handle_overwrites(struct btree_trans *trans, update_iter = bch2_trans_get_iter(trans, i->btree_id, update->k.p, BTREE_ITER_NOT_EXTENTS| BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(update_iter); + if (ret) + goto out; + bch2_trans_update(trans, update_iter, update, i->trigger_flags); bch2_trans_iter_put(trans, update_iter); } if (bkey_cmp(k.k->p, i->k->k.p) <= 0) { - ret = __btree_delete_at(trans, i->btree_id, k.k->p, - i->trigger_flags); + update_iter = bch2_trans_copy_iter(trans, iter); + ret = bch2_btree_delete_at(trans, update_iter, + i->trigger_flags); + bch2_trans_iter_put(trans, update_iter); + if (ret) goto out; } @@ -876,12 +862,7 @@ static int extent_handle_overwrites(struct btree_trans *trans, bkey_reassemble(update, k); bch2_cut_front(i->k->k.p, update); - update_iter = bch2_trans_get_iter(trans, i->btree_id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_INTENT); - bch2_trans_update(trans, update_iter, update, - i->trigger_flags); - bch2_trans_iter_put(trans, update_iter); + bch2_trans_update(trans, iter, update, i->trigger_flags); goto out; } next: @@ -1042,6 +1023,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, int ret = 0; BUG_ON(trans->nr_updates >= BTREE_ITER_MAX); + BUG_ON(!iter->should_be_locked); #ifdef CONFIG_BCACHEFS_DEBUG trans_for_each_update(trans, i) @@ -1105,7 +1087,8 @@ int __bch2_btree_insert(struct btree_trans *trans, iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); - ret = bch2_trans_update(trans, iter, k, 0); + ret = bch2_btree_iter_traverse(iter) ?: + bch2_trans_update(trans, iter, k, 0); bch2_trans_iter_put(trans, iter); return ret; } @@ -1127,16 +1110,17 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, } int bch2_btree_delete_at(struct btree_trans *trans, - struct btree_iter *iter, unsigned flags) + struct btree_iter *iter, unsigned trigger_flags) { - struct bkey_i k; + struct bkey_i *k; - bkey_init(&k.k); - k.k.p = iter->pos; + k = bch2_trans_kmalloc(trans, sizeof(*k)); + if (IS_ERR(k)) + return PTR_ERR(k); - return bch2_trans_update(trans, iter, &k, 0) ?: - bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL|flags); + bkey_init(&k->k); + k->k.p = iter->pos; + return bch2_trans_update(trans, iter, k, trigger_flags); } int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, @@ -1149,13 +1133,12 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, iter = bch2_trans_get_iter(trans, id, start, BTREE_ITER_INTENT); retry: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((bch2_trans_begin(trans), + (k = bch2_btree_iter_peek(iter)).k) && !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { struct bkey_i delete; - bch2_trans_begin(trans); - bkey_init(&delete.k); /* diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 20862a4..76945e5 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -351,17 +351,16 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage, } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, - struct bch_fs_usage *fs_usage, struct bucket_mark old, struct bucket_mark new, u64 journal_seq, bool gc) { + struct bch_fs_usage *fs_usage; struct bch_dev_usage *u; percpu_rwsem_assert_held(&c->mark_lock); preempt_disable(); - if (!fs_usage) - fs_usage = fs_usage_ptr(c, journal_seq, gc); + fs_usage = fs_usage_ptr(c, journal_seq, gc); u = dev_usage_ptr(ca, journal_seq, gc); if (bucket_type(old)) @@ -390,30 +389,48 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_wake_allocator(ca); } +static inline int __update_replicas(struct bch_fs *c, + struct bch_fs_usage *fs_usage, + struct bch_replicas_entry *r, + s64 sectors) +{ + int idx = bch2_replicas_entry_idx(c, r); + + if (idx < 0) + return -1; + + fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); + fs_usage->replicas[idx] += sectors; + return 0; +} + static inline int update_replicas(struct bch_fs *c, - struct bch_fs_usage *fs_usage, - struct bch_replicas_entry *r, - s64 sectors) + struct bch_replicas_entry *r, s64 sectors, + unsigned journal_seq, bool gc) { + struct bch_fs_usage __percpu *fs_usage; int idx = bch2_replicas_entry_idx(c, r); if (idx < 0) return -1; + preempt_disable(); + fs_usage = fs_usage_ptr(c, journal_seq, gc); fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); fs_usage->replicas[idx] += sectors; + preempt_enable(); return 0; } static inline int update_cached_sectors(struct bch_fs *c, - struct bch_fs_usage *fs_usage, - unsigned dev, s64 sectors) + unsigned dev, s64 sectors, + unsigned journal_seq, bool gc) { struct bch_replicas_padded r; bch2_replicas_entry_cached(&r.e, dev); - return update_replicas(c, fs_usage, &r.e, sectors); + return update_replicas(c, &r.e, sectors, journal_seq, gc); } static struct replicas_delta_list * @@ -507,7 +524,6 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c old, struct bkey_s_c new, - struct bch_fs_usage *fs_usage, u64 journal_seq, unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; @@ -549,7 +565,7 @@ static int bch2_mark_alloc(struct bch_fs *c, } })); - bch2_dev_usage_update(c, ca, fs_usage, old_m, m, journal_seq, gc); + bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc); g->io_time[READ] = u.read_time; g->io_time[WRITE] = u.write_time; @@ -565,8 +581,8 @@ static int bch2_mark_alloc(struct bch_fs *c, if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && old_m.cached_sectors) { - if (update_cached_sectors(c, fs_usage, ca->dev_idx, - -old_m.cached_sectors)) { + if (update_cached_sectors(c, ca->dev_idx, -old_m.cached_sectors, + journal_seq, gc)) { bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors"); return -1; } @@ -617,8 +633,7 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, old.dirty_sectors, sectors); if (c) - bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc), - old, new, 0, gc); + bch2_dev_usage_update(c, ca, old, new, 0, gc); return 0; } @@ -637,54 +652,20 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, if (b >= ca->mi.nbuckets) return; - preempt_disable(); - if (likely(c)) { do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags, ca, b, type, sectors); } else { __bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0); } - - preempt_enable(); } -static s64 disk_sectors_scaled(unsigned n, unsigned d, unsigned sectors) +static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) { - return DIV_ROUND_UP(sectors * n, d); -} - -static s64 __ptr_disk_sectors_delta(unsigned old_size, - unsigned offset, s64 delta, - unsigned flags, - unsigned n, unsigned d) -{ - BUG_ON(!n || !d); - - if (flags & BTREE_TRIGGER_OVERWRITE_SPLIT) { - BUG_ON(offset + -delta > old_size); - - return -disk_sectors_scaled(n, d, old_size) + - disk_sectors_scaled(n, d, offset) + - disk_sectors_scaled(n, d, old_size - offset + delta); - } else if (flags & BTREE_TRIGGER_OVERWRITE) { - BUG_ON(offset + -delta > old_size); - - return -disk_sectors_scaled(n, d, old_size) + - disk_sectors_scaled(n, d, old_size + delta); - } else { - return disk_sectors_scaled(n, d, delta); - } -} - -static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, - unsigned offset, s64 delta, - unsigned flags) -{ - return __ptr_disk_sectors_delta(p.crc.live_size, - offset, delta, flags, - p.crc.compressed_size, - p.crc.uncompressed_size); + return p.crc.compression_type + ? DIV_ROUND_UP(sectors * p.crc.compressed_size, + p.crc.uncompressed_size) + : sectors; } static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, @@ -763,7 +744,6 @@ static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, static int mark_stripe_bucket(struct bch_fs *c, struct bkey_s_c k, unsigned ptr_idx, - struct bch_fs_usage *fs_usage, u64 journal_seq, unsigned flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; @@ -805,7 +785,7 @@ static int mark_stripe_bucket(struct bch_fs *c, struct bkey_s_c k, g->stripe = k.k->p.offset; g->stripe_redundancy = s->nr_redundant; - bch2_dev_usage_update(c, ca, fs_usage, old, new, journal_seq, gc); + bch2_dev_usage_update(c, ca, old, new, journal_seq, gc); return 0; } @@ -834,7 +814,6 @@ static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type, - struct bch_fs_usage *fs_usage, u64 journal_seq, unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; @@ -872,7 +851,7 @@ static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, old.v.counter, new.v.counter)) != old.v.counter); - bch2_dev_usage_update(c, ca, fs_usage, old, new, journal_seq, gc); + bch2_dev_usage_update(c, ca, old, new, journal_seq, gc); BUG_ON(!gc && bucket_became_unavailable(old, new)); @@ -882,8 +861,8 @@ static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, static int bch2_mark_stripe_ptr(struct bch_fs *c, struct bch_extent_stripe_ptr p, enum bch_data_type data_type, - struct bch_fs_usage *fs_usage, - s64 sectors, unsigned flags) + s64 sectors, + unsigned journal_seq, unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; struct bch_replicas_padded r; @@ -918,40 +897,46 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, spin_unlock(&c->ec_stripes_heap_lock); r.e.data_type = data_type; - update_replicas(c, fs_usage, &r.e, sectors); + update_replicas(c, &r.e, sectors, journal_seq, gc); return 0; } static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c old, struct bkey_s_c new, - unsigned offset, s64 sectors, - enum bch_data_type data_type, - struct bch_fs_usage *fs_usage, unsigned journal_seq, unsigned flags) { + bool gc = flags & BTREE_TRIGGER_GC; struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; struct bch_replicas_padded r; + enum bch_data_type data_type = bkey_is_btree_ptr(k.k) + ? BCH_DATA_btree + : BCH_DATA_user; + s64 sectors = bkey_is_btree_ptr(k.k) + ? c->opts.btree_node_size + : k.k->size; s64 dirty_sectors = 0; bool stale; int ret; + BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == + (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)); + + if (flags & BTREE_TRIGGER_OVERWRITE) + sectors = -sectors; + r.e.data_type = data_type; r.e.nr_devs = 0; r.e.nr_required = 1; - BUG_ON(!sectors); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - s64 disk_sectors = data_type == BCH_DATA_btree - ? sectors - : ptr_disk_sectors_delta(p, offset, sectors, flags); + s64 disk_sectors = ptr_disk_sectors(sectors, p); ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type, - fs_usage, journal_seq, flags); + journal_seq, flags); if (ret < 0) return ret; @@ -959,8 +944,8 @@ static int bch2_mark_extent(struct bch_fs *c, if (p.ptr.cached) { if (!stale) - if (update_cached_sectors(c, fs_usage, p.ptr.dev, - disk_sectors)) { + if (update_cached_sectors(c, p.ptr.dev, disk_sectors, + journal_seq, gc)) { bch2_fs_fatal_error(c, "bch2_mark_extent(): no replicas entry while updating cached sectors"); return -1; @@ -970,7 +955,7 @@ static int bch2_mark_extent(struct bch_fs *c, r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { ret = bch2_mark_stripe_ptr(c, p.ec, data_type, - fs_usage, disk_sectors, flags); + disk_sectors, journal_seq, flags); if (ret) return ret; @@ -984,7 +969,7 @@ static int bch2_mark_extent(struct bch_fs *c, } if (r.e.nr_devs) { - if (update_replicas(c, fs_usage, &r.e, dirty_sectors)) { + if (update_replicas(c, &r.e, dirty_sectors, journal_seq, gc)) { char buf[200]; bch2_bkey_val_to_text(&PBUF(buf), c, k); @@ -997,9 +982,8 @@ static int bch2_mark_extent(struct bch_fs *c, } static int bch2_mark_stripe(struct bch_fs *c, - struct bkey_s_c old, struct bkey_s_c new, - struct bch_fs_usage *fs_usage, - u64 journal_seq, unsigned flags) + struct bkey_s_c old, struct bkey_s_c new, + u64 journal_seq, unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; size_t idx = new.k->p.offset; @@ -1060,14 +1044,14 @@ static int bch2_mark_stripe(struct bch_fs *c, m->blocks_nonempty = 0; for (i = 0; i < new_s->nr_blocks; i++) { - ret = mark_stripe_bucket(c, new, i, fs_usage, - journal_seq, flags); + ret = mark_stripe_bucket(c, new, i, journal_seq, flags); if (ret) return ret; } - if (update_replicas(c, fs_usage, &m->r.e, - ((s64) m->sectors * m->nr_redundant))) { + if (update_replicas(c, &m->r.e, + ((s64) m->sectors * m->nr_redundant), + journal_seq, gc)) { char buf[200]; bch2_bkey_val_to_text(&PBUF(buf), c, new); @@ -1079,13 +1063,47 @@ static int bch2_mark_stripe(struct bch_fs *c, return 0; } -static int __bch2_mark_reflink_p(struct bch_fs *c, - struct bkey_s_c_reflink_p p, - u64 idx, unsigned sectors, - unsigned front_frag, - unsigned back_frag, - unsigned flags, - size_t *r_idx) +static int bch2_mark_inode(struct bch_fs *c, + struct bkey_s_c old, struct bkey_s_c new, + u64 journal_seq, unsigned flags) +{ + struct bch_fs_usage __percpu *fs_usage; + + preempt_disable(); + fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); + fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode; + fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode; + preempt_enable(); + return 0; +} + +static int bch2_mark_reservation(struct bch_fs *c, + struct bkey_s_c old, struct bkey_s_c new, + u64 journal_seq, unsigned flags) +{ + struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; + struct bch_fs_usage __percpu *fs_usage; + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + s64 sectors = (s64) k.k->size; + + if (flags & BTREE_TRIGGER_OVERWRITE) + sectors = -sectors; + sectors *= replicas; + + preempt_disable(); + fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); + replicas = clamp_t(unsigned, replicas, 1, + ARRAY_SIZE(fs_usage->persistent_reserved)); + + fs_usage->reserved += sectors; + fs_usage->persistent_reserved[replicas - 1] += sectors; + preempt_enable(); + + return 0; +} + +static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p, + u64 idx, unsigned flags, size_t *r_idx) { struct reflink_gc *r; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; @@ -1096,7 +1114,7 @@ static int __bch2_mark_reflink_p(struct bch_fs *c, r = genradix_ptr(&c->reflink_gc_table, *r_idx); BUG_ON(!r); - if (r->offset > idx) + if (idx < r->offset) break; (*r_idx)++; } @@ -1104,7 +1122,7 @@ static int __bch2_mark_reflink_p(struct bch_fs *c, BUG_ON((s64) r->refcount + add < 0); r->refcount += add; - return min_t(u64, sectors, r->offset - idx); + return r->offset - idx; not_found: bch2_fs_inconsistent(c, "%llu:%llu len %u points to nonexistent indirect extent %llu", @@ -1114,22 +1132,19 @@ not_found: } static int bch2_mark_reflink_p(struct bch_fs *c, - struct bkey_s_c_reflink_p p, unsigned offset, - s64 sectors, unsigned flags) + struct bkey_s_c old, struct bkey_s_c new, + u64 journal_seq, unsigned flags) { - u64 idx = le64_to_cpu(p.v->idx) + offset; + struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; - unsigned front_frag, back_frag; + u64 idx = le64_to_cpu(p.v->idx); + unsigned sectors = p.k->size; s64 ret = 0; - if (sectors < 0) - sectors = -sectors; - - BUG_ON(offset + sectors > p.k->size); - - front_frag = offset; - back_frag = offset + sectors; + BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == + (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)); l = 0; r = c->reflink_gc_nr; @@ -1144,11 +1159,11 @@ static int bch2_mark_reflink_p(struct bch_fs *c, } while (sectors) { - ret = __bch2_mark_reflink_p(c, p, idx, sectors, - front_frag, back_frag, flags, &l); + ret = __bch2_mark_reflink_p(c, p, idx, flags, &l); if (ret < 0) return ret; + ret = min_t(s64, ret, sectors); idx += ret; sectors -= ret; } @@ -1159,99 +1174,55 @@ static int bch2_mark_reflink_p(struct bch_fs *c, static int bch2_mark_key_locked(struct bch_fs *c, struct bkey_s_c old, struct bkey_s_c new, - unsigned offset, s64 sectors, - struct bch_fs_usage *fs_usage, u64 journal_seq, unsigned flags) { struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; - int ret = 0; BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE))); - preempt_disable(); - - if (!fs_usage || (flags & BTREE_TRIGGER_GC)) - fs_usage = fs_usage_ptr(c, journal_seq, - flags & BTREE_TRIGGER_GC); - switch (k.k->type) { case KEY_TYPE_alloc: case KEY_TYPE_alloc_v2: - ret = bch2_mark_alloc(c, old, new, fs_usage, journal_seq, flags); - break; + return bch2_mark_alloc(c, old, new, journal_seq, flags); case KEY_TYPE_btree_ptr: case KEY_TYPE_btree_ptr_v2: - sectors = !(flags & BTREE_TRIGGER_OVERWRITE) - ? c->opts.btree_node_size - : -c->opts.btree_node_size; - - ret = bch2_mark_extent(c, old, new, offset, sectors, - BCH_DATA_btree, fs_usage, journal_seq, flags); - break; case KEY_TYPE_extent: case KEY_TYPE_reflink_v: - ret = bch2_mark_extent(c, old, new, offset, sectors, - BCH_DATA_user, fs_usage, journal_seq, flags); - break; + return bch2_mark_extent(c, old, new, journal_seq, flags); case KEY_TYPE_stripe: - ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags); - break; + return bch2_mark_stripe(c, old, new, journal_seq, flags); case KEY_TYPE_inode: - fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode; - fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode; - break; - case KEY_TYPE_reservation: { - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - - sectors *= replicas; - replicas = clamp_t(unsigned, replicas, 1, - ARRAY_SIZE(fs_usage->persistent_reserved)); - - fs_usage->reserved += sectors; - fs_usage->persistent_reserved[replicas - 1] += sectors; - break; - } + return bch2_mark_inode(c, old, new, journal_seq, flags); + case KEY_TYPE_reservation: + return bch2_mark_reservation(c, old, new, journal_seq, flags); case KEY_TYPE_reflink_p: - ret = bch2_mark_reflink_p(c, bkey_s_c_to_reflink_p(k), - offset, sectors, flags); - break; + return bch2_mark_reflink_p(c, old, new, journal_seq, flags); + default: + return 0; } - - preempt_enable(); - - return ret; } -int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new, - unsigned offset, s64 sectors, - struct bch_fs_usage *fs_usage, - u64 journal_seq, unsigned flags) +int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new, unsigned flags) { - struct bkey deleted; + struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; int ret; - bkey_init(&deleted); - percpu_down_read(&c->mark_lock); - ret = bch2_mark_key_locked(c, old, new, offset, sectors, - fs_usage, journal_seq, - BTREE_TRIGGER_INSERT|flags); + ret = bch2_mark_key_locked(c, old, new, 0, flags); percpu_up_read(&c->mark_lock); return ret; } -int bch2_mark_update(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *new, - struct bch_fs_usage *fs_usage, - unsigned flags) +int bch2_mark_update(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_i *new, unsigned flags) { struct bch_fs *c = trans->c; + struct bkey _deleted = KEY(0, 0, 0); + struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; struct bkey_s_c old; - struct bkey unpacked; - int ret = 0; + int iter_flags, ret; if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; @@ -1259,87 +1230,36 @@ int bch2_mark_update(struct btree_trans *trans, if (!btree_node_type_needs_gc(iter->btree_id)) return 0; - bkey_init(&unpacked); - old = (struct bkey_s_c) { &unpacked, NULL }; + if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) { + iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES; + iter->flags &= ~BTREE_ITER_WITH_UPDATES; - if (!btree_node_type_is_extents(iter->btree_id)) { - /* iterators should be uptodate, shouldn't get errors here: */ - if (btree_iter_type(iter) != BTREE_ITER_CACHED) { - old = bch2_btree_iter_peek_slot(iter); - BUG_ON(bkey_err(old)); - } else { - struct bkey_cached *ck = (void *) iter->l[0].b; + old = bch2_btree_iter_peek_slot(iter); + iter->flags |= iter_flags; - if (ck->valid) - old = bkey_i_to_s_c(ck->k); - } + ret = bkey_err(old); + if (ret) + return ret; + } else { + /* + * If BTREE_ITER_CACHED_NOFILL was used, we better not be + * running triggers that do anything on removal (alloc btree): + */ + old = deleted; + } - if (old.k->type == new->k.type) { - bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0, - fs_usage, trans->journal_res.seq, + if (old.k->type == new->k.type && + ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + ret = bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), + trans->journal_res.seq, BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); - - } else { - bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0, - fs_usage, trans->journal_res.seq, - BTREE_TRIGGER_INSERT|flags); - bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0, - fs_usage, trans->journal_res.seq, - BTREE_TRIGGER_OVERWRITE|flags); - } } else { - struct btree_iter *copy; - - BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); - bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), - 0, new->k.size, - fs_usage, trans->journal_res.seq, - BTREE_TRIGGER_INSERT|flags); - - copy = bch2_trans_copy_iter(trans, iter); - - for_each_btree_key_continue(copy, 0, old, ret) { - unsigned offset = 0; - s64 sectors = -((s64) old.k->size); - - flags |= BTREE_TRIGGER_OVERWRITE; - - if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) - break; - - switch (bch2_extent_overlap(&new->k, old.k)) { - case BCH_EXTENT_OVERLAP_ALL: - offset = 0; - sectors = -((s64) old.k->size); - break; - case BCH_EXTENT_OVERLAP_BACK: - offset = bkey_start_offset(&new->k) - - bkey_start_offset(old.k); - sectors = bkey_start_offset(&new->k) - - old.k->p.offset; - break; - case BCH_EXTENT_OVERLAP_FRONT: - offset = 0; - sectors = bkey_start_offset(old.k) - - new->k.p.offset; - break; - case BCH_EXTENT_OVERLAP_MIDDLE: - offset = bkey_start_offset(&new->k) - - bkey_start_offset(old.k); - sectors = -((s64) new->k.size); - flags |= BTREE_TRIGGER_OVERWRITE_SPLIT; - break; - } - - BUG_ON(sectors >= 0); - - ret = bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), - offset, sectors, fs_usage, - trans->journal_res.seq, flags) ?: 1; - if (ret <= 0) - break; - } - bch2_trans_iter_put(trans, copy); + ret = bch2_mark_key_locked(c, deleted, bkey_i_to_s_c(new), + trans->journal_res.seq, + BTREE_TRIGGER_INSERT|flags) ?: + bch2_mark_key_locked(c, old, deleted, + trans->journal_res.seq, + BTREE_TRIGGER_OVERWRITE|flags); } return ret; @@ -1416,7 +1336,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, added += d->delta; } - BUG_ON(update_replicas(c, dst, &d->r, d->delta)); + BUG_ON(__update_replicas(c, dst, &d->r, d->delta)); } dst->nr_inodes += deltas->nr_inodes; @@ -1433,7 +1353,14 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, */ should_not_have_added = added - (s64) disk_res_sectors; if (unlikely(should_not_have_added > 0)) { - atomic64_sub(should_not_have_added, &c->sectors_available); + u64 old, new, v = atomic64_read(&c->sectors_available); + + do { + old = v; + new = max_t(s64, 0, old - should_not_have_added); + } while ((v = atomic64_cmpxchg(&c->sectors_available, + old, new)) != old); + added -= should_not_have_added; warn = true; } @@ -1598,31 +1525,38 @@ err: } static int bch2_trans_mark_extent(struct btree_trans *trans, - struct bkey_s_c k, unsigned offset, - s64 sectors, unsigned flags, - enum bch_data_type data_type) + struct bkey_s_c k, unsigned flags) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; struct bch_replicas_padded r; + enum bch_data_type data_type = bkey_is_btree_ptr(k.k) + ? BCH_DATA_btree + : BCH_DATA_user; + s64 sectors = bkey_is_btree_ptr(k.k) + ? c->opts.btree_node_size + : k.k->size; s64 dirty_sectors = 0; bool stale; int ret; + BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == + (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)); + + if (flags & BTREE_TRIGGER_OVERWRITE) + sectors = -sectors; + r.e.data_type = data_type; r.e.nr_devs = 0; r.e.nr_required = 1; - BUG_ON(!sectors); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - s64 disk_sectors = data_type == BCH_DATA_btree - ? sectors - : ptr_disk_sectors_delta(p, offset, sectors, flags); + s64 disk_sectors = ptr_disk_sectors(sectors, p); - ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors, - data_type); + ret = bch2_trans_mark_pointer(trans, k, p, + disk_sectors, data_type); if (ret < 0) return ret; @@ -1758,10 +1692,49 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, return ret; } +static int bch2_trans_mark_inode(struct btree_trans *trans, + struct bkey_s_c old, + struct bkey_s_c new, + unsigned flags) +{ + int nr = (new.k->type == KEY_TYPE_inode) - + (old.k->type == KEY_TYPE_inode); + + if (nr) { + struct replicas_delta_list *d = + replicas_deltas_realloc(trans, 0); + d->nr_inodes += nr; + } + + return 0; +} + +static int bch2_trans_mark_reservation(struct btree_trans *trans, + struct bkey_s_c k, unsigned flags) +{ + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + s64 sectors = (s64) k.k->size; + struct replicas_delta_list *d; + + BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == + (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)); + + if (flags & BTREE_TRIGGER_OVERWRITE) + sectors = -sectors; + sectors *= replicas; + + d = replicas_deltas_realloc(trans, 0); + + replicas = clamp_t(unsigned, replicas, 1, + ARRAY_SIZE(d->persistent_reserved)); + + d->persistent_reserved[replicas - 1] += sectors; + return 0; +} + static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, - u64 idx, unsigned sectors, - unsigned flags) + u64 idx, unsigned flags) { struct bch_fs *c = trans->c; struct btree_iter *iter; @@ -1779,8 +1752,6 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, if (ret) goto err; - sectors = min_t(u64, sectors, k.k->p.offset - idx); - n = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); ret = PTR_ERR_OR_ZERO(n); if (ret) @@ -1806,34 +1777,31 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, set_bkey_val_u64s(&n->k, 0); } - bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); + bch2_btree_iter_set_pos_to_extent_start(iter); ret = bch2_trans_update(trans, iter, n, 0); if (ret) goto err; - ret = sectors; + ret = k.k->p.offset - idx; err: bch2_trans_iter_put(trans, iter); return ret; } static int bch2_trans_mark_reflink_p(struct btree_trans *trans, - struct bkey_s_c_reflink_p p, unsigned offset, - s64 sectors, unsigned flags) + struct bkey_s_c k, unsigned flags) { - u64 idx = le64_to_cpu(p.v->idx) + offset; + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + u64 idx = le64_to_cpu(p.v->idx); + unsigned sectors = p.k->size; s64 ret = 0; - if (sectors < 0) - sectors = -sectors; - - BUG_ON(offset || sectors != p.k->size); - while (sectors) { - ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags); + ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags); if (ret < 0) return ret; + ret = min_t(s64, ret, sectors); idx += ret; sectors -= ret; } @@ -1841,59 +1809,27 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans, return 0; } -int bch2_trans_mark_key(struct btree_trans *trans, - struct bkey_s_c old, - struct bkey_s_c new, - unsigned offset, s64 sectors, unsigned flags) +int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, + struct bkey_s_c new, unsigned flags) { - struct bch_fs *c = trans->c; struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old; - struct replicas_delta_list *d; BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE))); switch (k.k->type) { case KEY_TYPE_btree_ptr: case KEY_TYPE_btree_ptr_v2: - sectors = !(flags & BTREE_TRIGGER_OVERWRITE) - ? c->opts.btree_node_size - : -c->opts.btree_node_size; - - return bch2_trans_mark_extent(trans, k, offset, sectors, - flags, BCH_DATA_btree); case KEY_TYPE_extent: case KEY_TYPE_reflink_v: - return bch2_trans_mark_extent(trans, k, offset, sectors, - flags, BCH_DATA_user); + return bch2_trans_mark_extent(trans, k, flags); case KEY_TYPE_stripe: return bch2_trans_mark_stripe(trans, old, new, flags); - case KEY_TYPE_inode: { - int nr = (new.k->type == KEY_TYPE_inode) - - (old.k->type == KEY_TYPE_inode); - - if (nr) { - d = replicas_deltas_realloc(trans, 0); - d->nr_inodes += nr; - } - - return 0; - } - case KEY_TYPE_reservation: { - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - - d = replicas_deltas_realloc(trans, 0); - - sectors *= replicas; - replicas = clamp_t(unsigned, replicas, 1, - ARRAY_SIZE(d->persistent_reserved)); - - d->persistent_reserved[replicas - 1] += sectors; - return 0; - } + case KEY_TYPE_inode: + return bch2_trans_mark_inode(trans, old, new, flags); + case KEY_TYPE_reservation: + return bch2_trans_mark_reservation(trans, k, flags); case KEY_TYPE_reflink_p: - return bch2_trans_mark_reflink_p(trans, - bkey_s_c_to_reflink_p(k), - offset, sectors, flags); + return bch2_trans_mark_reflink_p(trans, k, flags); default: return 0; } @@ -1904,8 +1840,10 @@ int bch2_trans_mark_update(struct btree_trans *trans, struct bkey_i *new, unsigned flags) { - struct bkey_s_c old; - int ret; + struct bkey _deleted = KEY(0, 0, 0); + struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; + struct bkey_s_c old; + int iter_flags, ret; if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; @@ -1913,26 +1851,33 @@ int bch2_trans_mark_update(struct btree_trans *trans, if (!btree_node_type_needs_gc(iter->btree_id)) return 0; - if (btree_iter_type(iter) != BTREE_ITER_CACHED) { + + if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) { + iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES; + iter->flags &= ~BTREE_ITER_WITH_UPDATES; + old = bch2_btree_iter_peek_slot(iter); + iter->flags |= iter_flags; + ret = bkey_err(old); if (ret) return ret; } else { - struct bkey_cached *ck = (void *) iter->l[0].b; - - BUG_ON(!ck->valid); - old = bkey_i_to_s_c(ck->k); + /* + * If BTREE_ITER_CACHED_NOFILL was used, we better not be + * running triggers that do anything on removal (alloc btree): + */ + old = deleted; } if (old.k->type == new->k.type && - !btree_node_type_is_extents(iter->btree_id)) { - ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0, + ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); } else { - ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, new->k.size, + ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|flags) ?: - bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, -((s64) old.k->size), + bch2_trans_mark_key(trans, old, deleted, BTREE_TRIGGER_OVERWRITE|flags); } diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 04a2a93..0f544b6 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -125,20 +125,6 @@ static inline u8 ptr_stale(struct bch_dev *ca, return gen_after(ptr_bucket_mark(ca, ptr).gen, ptr->gen); } -static inline s64 __ptr_disk_sectors(struct extent_ptr_decoded p, - unsigned live_size) -{ - return live_size && p.crc.compression_type - ? max(1U, DIV_ROUND_UP(live_size * p.crc.compressed_size, - p.crc.uncompressed_size)) - : live_size; -} - -static inline s64 ptr_disk_sectors(struct extent_ptr_decoded p) -{ - return __ptr_disk_sectors(p, p.crc.live_size); -} - /* bucket gc marks */ static inline unsigned bucket_sectors_used(struct bucket_mark mark) @@ -240,14 +226,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, size_t, enum bch_data_type, unsigned, struct gc_pos, unsigned); -int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, - s64, struct bch_fs_usage *, u64, unsigned); +int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned); int bch2_mark_update(struct btree_trans *, struct btree_iter *, - struct bkey_i *, struct bch_fs_usage *, unsigned); + struct bkey_i *, unsigned); -int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, - unsigned, s64, unsigned); +int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, + struct bkey_s_c, unsigned); int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter, struct bkey_i *insert, unsigned); void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 48f9232..328e042 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -863,7 +863,8 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ec_ptr, block); bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); - ret = bch2_trans_update(&trans, iter, sk.k, 0) ?: + ret = bch2_btree_iter_traverse(iter) ?: + bch2_trans_update(&trans, iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); if (ret == -EINTR) @@ -1633,7 +1634,8 @@ static int bch2_stripes_read_fn(struct bch_fs *c, struct bkey_s_c k) if (k.k->type == KEY_TYPE_stripe) ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?: - bch2_mark_key(c, k, 0, 0, NULL, 0, + bch2_mark_key(c, k, + BTREE_TRIGGER_INSERT| BTREE_TRIGGER_NOATOMIC); return ret; diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 90c3b98..2cea694 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -111,6 +111,7 @@ found: list_move(&s->list, &c->fsck_errors); s->nr++; if (c->opts.ratelimit_errors && + !(flags & FSCK_NO_RATELIMIT) && s->nr >= FSCK_ERR_RATELIMIT_NR) { if (s->nr == FSCK_ERR_RATELIMIT_NR) suppressing = true; diff --git a/libbcachefs/error.h b/libbcachefs/error.h index d8cd19b..9869382 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -104,6 +104,7 @@ struct fsck_err_state { #define FSCK_CAN_FIX (1 << 0) #define FSCK_CAN_IGNORE (1 << 1) #define FSCK_NEED_FSCK (1 << 2) +#define FSCK_NO_RATELIMIT (1 << 3) __printf(3, 4) __cold enum fsck_err_ret bch2_fsck_err(struct bch_fs *, diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index ef4aaf1..4a8dd08 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -104,6 +104,10 @@ int bch2_extent_atomic_end(struct btree_iter *iter, unsigned nr_iters = 0; int ret; + ret = bch2_btree_iter_traverse(iter); + if (ret) + return ret; + *end = insert->k.p; /* extent_update_to_keys(): */ diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 3f6224f..43cef0a 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -426,6 +426,17 @@ void bch2_extent_crc_append(struct bkey_i *, /* Generic code for keys with pointers: */ +static inline bool bkey_is_btree_ptr(const struct bkey *k) +{ + switch (k->type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: + return true; + default: + return false; + } +} + static inline bool bkey_extent_is_direct_data(const struct bkey *k) { switch (k->type) { diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index 00a63fe..60c5443 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -85,7 +85,8 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, inode_iter->snapshot = U32_MAX; bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX)); - ret = bch2_inode_write(trans, inode_iter, new_inode); + ret = bch2_btree_iter_traverse(inode_iter) ?: + bch2_inode_write(trans, inode_iter, new_inode); err: bch2_trans_iter_put(trans, inode_iter); bch2_trans_iter_put(trans, dir_iter); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 4ec3360..0fbfa62 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -2028,7 +2028,9 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) } bio = bio_alloc_bioset(GFP_KERNEL, - iov_iter_npages(iter, BIO_MAX_VECS), + iov_iter_is_bvec(iter) + ? 0 + : iov_iter_npages(iter, BIO_MAX_VECS), &c->dio_write_bioset); dio = container_of(bio, struct dio_write, op.wbio.bio); init_completion(&dio->done); @@ -2252,11 +2254,11 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from) from, round_up(from, PAGE_SIZE)); } -static int bch2_extend(struct bch_inode_info *inode, +static int bch2_extend(struct user_namespace *mnt_userns, + struct bch_inode_info *inode, struct bch_inode_unpacked *inode_u, struct iattr *iattr) { - struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; int ret; @@ -2270,25 +2272,15 @@ static int bch2_extend(struct bch_inode_info *inode, return ret; truncate_setsize(&inode->v, iattr->ia_size); - /* ATTR_MODE will never be set here, ns argument isn't needed: */ - setattr_copy(NULL, &inode->v, iattr); - - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, inode->v.i_size, - ATTR_MTIME|ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - return ret; + return bch2_setattr_nonsize(mnt_userns, inode, iattr); } static int bch2_truncate_finish_fn(struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { - struct bch_fs *c = inode->v.i_sb->s_fs_info; - bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; - bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); return 0; } @@ -2302,7 +2294,8 @@ static int bch2_truncate_start_fn(struct bch_inode_info *inode, return 0; } -int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) +int bch2_truncate(struct user_namespace *mnt_userns, + struct bch_inode_info *inode, struct iattr *iattr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; @@ -2313,6 +2306,18 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) s64 i_sectors_delta = 0; int ret = 0; + /* + * Don't update timestamps if we're not doing anything: + */ + if (iattr->ia_size == inode->v.i_size) + return 0; + + if (!(iattr->ia_valid & ATTR_MTIME)) + ktime_get_coarse_real_ts64(&iattr->ia_mtime); + if (!(iattr->ia_valid & ATTR_CTIME)) + ktime_get_coarse_real_ts64(&iattr->ia_ctime); + iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; + inode_dio_wait(&inode->v); bch2_pagecache_block_get(&inode->ei_pagecache_lock); @@ -2342,10 +2347,12 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) inode->v.i_size < inode_u.bi_size); if (iattr->ia_size > inode->v.i_size) { - ret = bch2_extend(inode, &inode_u, iattr); + ret = bch2_extend(mnt_userns, inode, &inode_u, iattr); goto err; } + iattr->ia_valid &= ~ATTR_SIZE; + ret = bch2_truncate_page(inode, iattr->ia_size); if (unlikely(ret)) goto err; @@ -2389,13 +2396,11 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) if (unlikely(ret)) goto err; - /* ATTR_MODE will never be set here, ns argument isn't needed: */ - setattr_copy(NULL, &inode->v, iattr); - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, - ATTR_MTIME|ATTR_CTIME); + ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); mutex_unlock(&inode->ei_update_lock); + + ret = bch2_setattr_nonsize(mnt_userns, inode, iattr); err: bch2_pagecache_block_put(&inode->ei_pagecache_lock); return ret; @@ -2611,7 +2616,8 @@ reassemble: BUG_ON(ret); } - ret = bch2_trans_update(&trans, del, &delete, trigger_flags) ?: + ret = bch2_btree_iter_traverse(del) ?: + bch2_trans_update(&trans, del, &delete, trigger_flags) ?: bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?: bch2_trans_commit(&trans, &disk_res, &inode->ei_journal_seq, diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h index 2537a3d..b24efea 100644 --- a/libbcachefs/fs-io.h +++ b/libbcachefs/fs-io.h @@ -31,7 +31,8 @@ ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *); int bch2_fsync(struct file *, loff_t, loff_t, int); -int bch2_truncate(struct bch_inode_info *, struct iattr *); +int bch2_truncate(struct user_namespace *, + struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); loff_t bch2_remap_file_range(struct file *, loff_t, struct file *, diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 07e1edc..d213305 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -647,10 +647,10 @@ err: return ret; } -void bch2_setattr_copy(struct user_namespace *mnt_userns, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - struct iattr *attr) +static void bch2_setattr_copy(struct user_namespace *mnt_userns, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + struct iattr *attr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; unsigned int ia_valid = attr->ia_valid; @@ -660,6 +660,9 @@ void bch2_setattr_copy(struct user_namespace *mnt_userns, if (ia_valid & ATTR_GID) bi->bi_gid = from_kgid(mnt_userns, attr->ia_gid); + if (ia_valid & ATTR_SIZE) + bi->bi_size = attr->ia_size; + if (ia_valid & ATTR_ATIME) bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime); if (ia_valid & ATTR_MTIME) @@ -680,9 +683,9 @@ void bch2_setattr_copy(struct user_namespace *mnt_userns, } } -static int bch2_setattr_nonsize(struct user_namespace *mnt_userns, - struct bch_inode_info *inode, - struct iattr *attr) +int bch2_setattr_nonsize(struct user_namespace *mnt_userns, + struct bch_inode_info *inode, + struct iattr *attr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; @@ -806,7 +809,7 @@ static int bch2_setattr(struct user_namespace *mnt_userns, return ret; return iattr->ia_valid & ATTR_SIZE - ? bch2_truncate(inode, iattr) + ? bch2_truncate(mnt_userns, inode, iattr) : bch2_setattr_nonsize(mnt_userns, inode, iattr); } diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h index 2d82ed7..36cc6ba 100644 --- a/libbcachefs/fs.h +++ b/libbcachefs/fs.h @@ -167,6 +167,10 @@ void bch2_inode_update_after_write(struct bch_fs *, int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, inode_set_fn, void *, unsigned); +int bch2_setattr_nonsize(struct user_namespace *, + struct bch_inode_info *, + struct iattr *); + void bch2_vfs_exit(void); int bch2_vfs_init(void); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 89a130d..7ea1a41 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -78,7 +78,8 @@ static int __write_inode(struct btree_trans *trans, bch2_trans_get_iter(trans, BTREE_ID_inodes, SPOS(0, inode->bi_inum, snapshot), BTREE_ITER_INTENT); - int ret = bch2_inode_write(trans, inode_iter, inode); + int ret = bch2_btree_iter_traverse(inode_iter) ?: + bch2_inode_write(trans, inode_iter, inode); bch2_trans_iter_put(trans, inode_iter); return ret; } @@ -305,7 +306,8 @@ static int hash_redo_key(struct btree_trans *trans, bkey_init(&delete->k); delete->k.p = k_iter->pos; - return bch2_trans_update(trans, k_iter, delete, 0) ?: + return bch2_btree_iter_traverse(k_iter) ?: + bch2_trans_update(trans, k_iter, delete, 0) ?: bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); } @@ -491,6 +493,7 @@ static int check_inode(struct btree_trans *trans, ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, + bch2_btree_iter_traverse(iter) ?: bch2_inode_write(trans, iter, &u)); if (ret) bch_err(c, "error in fsck: error %i " @@ -562,7 +565,8 @@ static int fix_overlapping_extent(struct btree_trans *trans, BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); - ret = bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?: + ret = bch2_btree_iter_traverse(iter) ?: + bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); @@ -761,7 +765,7 @@ retry: mode_to_type(w.inode.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { - ret = lockrestart_do(&trans, + ret = __bch2_trans_do(&trans, NULL, NULL, 0, bch2_btree_delete_at(&trans, iter, 0)); if (ret) goto err; @@ -886,6 +890,7 @@ retry: ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, + bch2_btree_iter_traverse(iter) ?: bch2_trans_update(&trans, iter, &n->k_i, 0)); kfree(n); if (ret) @@ -1338,6 +1343,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, + bch2_btree_iter_traverse(iter) ?: bch2_inode_write(&trans, iter, &u)); if (ret) bch_err(c, "error in fsck: error %i updating inode", ret); diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 17d8eb5..59edb4c 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -302,7 +302,7 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans, iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, inum), BTREE_ITER_CACHED|flags); - k = bch2_btree_iter_peek_cached(iter); + k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) goto err; @@ -600,15 +600,12 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) retry: bch2_trans_begin(&trans); - if (cached) { - iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS(0, inode_nr), - BTREE_ITER_CACHED|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_cached(iter); - } else { - iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS(0, inode_nr), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); - } + iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS(0, inode_nr), + (cached + ? BTREE_ITER_CACHED + : BTREE_ITER_SLOTS)| + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index d714779..ac4071f 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1071,7 +1071,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq, bch2_journal_space_available(j); spin_unlock(&j->lock); - return 0; + return bch2_journal_reclaim_start(j); } /* init/exit: */ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index f324141..c6fa4ca 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -509,16 +509,8 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, iter = bch2_trans_get_node_iter(trans, id, k->k.p, BTREE_MAX_DEPTH, level, - BTREE_ITER_INTENT); - - /* - * iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run - * extent_handle_overwrites() and extent_update_to_keys() - but we don't - * want that here, journal replay is supposed to treat extents like - * regular keys: - */ - BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); - + BTREE_ITER_INTENT| + BTREE_ITER_NOT_EXTENTS); ret = bch2_btree_iter_traverse(iter) ?: bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); bch2_trans_iter_put(trans, iter); @@ -546,7 +538,8 @@ static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k) BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL| BTREE_ITER_INTENT); - ret = bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); + ret = bch2_btree_iter_traverse(iter) ?: + bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); bch2_trans_iter_put(trans, iter); return ret; } diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index ba70081..ebf3912 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -142,7 +142,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, goto err; /* rewind iter to start of hole, if necessary: */ - bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k)); + bch2_btree_iter_set_pos_to_extent_start(reflink_iter); r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); @@ -257,11 +257,11 @@ s64 bch2_remap_range(struct bch_fs *c, } if (src_k.k->type != KEY_TYPE_reflink_p) { + bch2_btree_iter_set_pos_to_extent_start(src_iter); + bch2_bkey_buf_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); - bch2_btree_iter_set_pos(src_iter, bkey_start_pos(src_k.k)); - ret = bch2_make_extent_indirect(&trans, src_iter, new_src.k); if (ret) diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 9778851..c771b92 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -680,7 +680,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) sb->offset = sb->layout.sb_offset[idx]; - SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum); + SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false)); sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), null_nonce(), sb); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 2a570eb..13a5ca7 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -269,7 +269,7 @@ static void bch2_writes_disabled(struct percpu_ref *writes) void bch2_fs_read_only(struct bch_fs *c) { if (!test_bit(BCH_FS_RW, &c->flags)) { - BUG_ON(c->journal.reclaim_thread); + bch2_journal_reclaim_stop(&c->journal); return; } @@ -431,12 +431,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) for_each_rw_member(ca, c, i) bch2_wake_allocator(ca); - ret = bch2_journal_reclaim_start(&c->journal); - if (ret) { - bch_err(c, "error starting journal reclaim: %i", ret); - return ret; - } - if (!early) { ret = bch2_fs_read_write_late(c); if (ret) diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index 63f4a83..59f34b4 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -54,14 +54,16 @@ static int test_delete(struct bch_fs *c, u64 nr) } pr_info("deleting once"); - ret = bch2_btree_delete_at(&trans, iter, 0); + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + bch2_btree_delete_at(&trans, iter, 0)); if (ret) { bch_err(c, "delete error (first) in test_delete: %i", ret); goto err; } pr_info("deleting twice"); - ret = bch2_btree_delete_at(&trans, iter, 0); + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + bch2_btree_delete_at(&trans, iter, 0)); if (ret) { bch_err(c, "delete error (second) in test_delete: %i", ret); goto err; @@ -101,7 +103,8 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_journal_flush_all_pins(&c->journal); - ret = bch2_btree_delete_at(&trans, iter, 0); + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + bch2_btree_delete_at(&trans, iter, 0)); if (ret) { bch_err(c, "delete error in test_delete_written: %i", ret); goto err; diff --git a/libbcachefs/util.c b/libbcachefs/util.c index e3ad26e..463260c 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -887,13 +887,9 @@ void eytzinger0_find_test(void) */ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) { - u64 *ret; + u64 *ret = this_cpu_ptr(p); int cpu; - preempt_disable(); - ret = this_cpu_ptr(p); - preempt_enable(); - for_each_possible_cpu(cpu) { u64 *i = per_cpu_ptr(p, cpu); diff --git a/libbcachefs/util.h b/libbcachefs/util.h index c69b05d..84ef4d6 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -712,10 +712,7 @@ static inline void percpu_u64_set(u64 __percpu *dst, u64 src) for_each_possible_cpu(cpu) *per_cpu_ptr(dst, cpu) = 0; - - preempt_disable(); - *this_cpu_ptr(dst) = src; - preempt_enable(); + this_cpu_write(*dst, src); } static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr) diff --git a/linux/six.c b/linux/six.c index c4ae4e0..fca1208 100644 --- a/linux/six.c +++ b/linux/six.c @@ -142,8 +142,6 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock, union six_lock_state old, new; bool ret; u64 v; - old.v = 0; - new.v = 0; EBUG_ON(type == SIX_LOCK_write && lock->owner != current); EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1)); -- 2.39.2