X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_update_leaf.c;h=7faf98fd2f641acd3117d7a9596fd9b3c656542d;hb=5bc48bd428303aabe19c196a47d1d89a605397be;hp=1d94fb94a570122d7e0fc2aa6ab7fad33e7bfe21;hpb=dbb99e492daa3ffe623685f3871dfcb97c01cd4f;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 1d94fb9..7faf98f 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -23,11 +23,10 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans, struct btree_insert_entry *i) { - return i != trans->updates && - i[0].iter->l[0].b == i[-1].iter->l[0].b; + return i != trans->updates2 && + iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b; } - inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, struct btree_iter *iter) { @@ -59,8 +58,14 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, EBUG_ON(btree_node_just_written(b)); EBUG_ON(bset_written(b, btree_bset_last(b))); EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); - EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 || - bkey_cmp(insert->k.p, b->data->max_key) > 0); + EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) && + bkey_cmp(bkey_start_pos(&insert->k), + bkey_predecessor(b->data->min_key)) < 0); + EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0); + EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0); + EBUG_ON(insert->k.u64s > + bch_btree_keys_u64s_remaining(iter->trans->c, b)); + EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); k = bch2_btree_node_iter_peek_all(node_iter, b); if (k && bkey_cmp_packed(b, k, &insert->k)) @@ -79,7 +84,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, k->type = KEY_TYPE_deleted; if (k->needs_whiteout) - push_whiteout(iter->trans->c, b, k); + push_whiteout(iter->trans->c, b, insert->k.p); k->needs_whiteout = false; if (k >= btree_bset_last(b)->start) { @@ -143,6 +148,17 @@ static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, return __btree_node_flush(j, pin, 1, seq); } +inline void bch2_btree_add_journal_pin(struct bch_fs *c, + struct btree *b, u64 seq) +{ + struct btree_write *w = btree_current_write(b); + + bch2_journal_pin_add(&c->journal, seq, &w->journal, + btree_node_write_idx(b) == 0 + ? btree_node_flush0 + : btree_node_flush1); +} + static inline void __btree_journal_key(struct btree_trans *trans, enum btree_id btree_id, struct bkey_i *insert) @@ -164,16 +180,14 @@ static inline void __btree_journal_key(struct btree_trans *trans, *trans->journal_seq = seq; } -void bch2_btree_journal_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert) +static void bch2_btree_journal_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *insert) { struct bch_fs *c = trans->c; struct journal *j = &c->journal; - struct btree *b = iter->l[0].b; - struct btree_write *w = btree_current_write(b); + struct btree *b = iter_l(iter)->b; - EBUG_ON(iter->level || b->level); EBUG_ON(trans->journal_res.ref != !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)); @@ -183,35 +197,15 @@ void bch2_btree_journal_key(struct btree_trans *trans, cpu_to_le64(trans->journal_res.seq); } - if (unlikely(!journal_pin_active(&w->journal))) { - u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) + bch2_btree_add_journal_pin(c, b, + likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) ? trans->journal_res.seq - : j->replay_journal_seq; - - bch2_journal_pin_add(j, seq, &w->journal, - btree_node_write_idx(b) == 0 - ? btree_node_flush0 - : btree_node_flush1); - } + : j->replay_journal_seq); if (unlikely(!btree_node_dirty(b))) set_btree_node_dirty(b); } -static void bch2_insert_fixup_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert) -{ - struct btree_iter_level *l = &iter->l[0]; - - EBUG_ON(iter->level); - EBUG_ON(insert->k.u64s > - bch_btree_keys_u64s_remaining(trans->c, l->b)); - - if (likely(bch2_btree_bset_insert_key(iter, l->b, &l->iter, insert))) - bch2_btree_journal_key(trans, iter, insert); -} - /** * btree_insert_key - insert a key one key into a leaf node */ @@ -220,7 +214,7 @@ static void btree_insert_key_leaf(struct btree_trans *trans, struct bkey_i *insert) { struct bch_fs *c = trans->c; - struct btree *b = iter->l[0].b; + struct btree *b = iter_l(iter)->b; struct bset_tree *t = bset_tree_last(b); int old_u64s = bset_u64s(t); int old_live_u64s = b->nr.live_u64s; @@ -228,10 +222,8 @@ static void btree_insert_key_leaf(struct btree_trans *trans, insert->k.needs_whiteout = false; - if (!btree_node_is_extents(b)) - bch2_insert_fixup_key(trans, iter, insert); - else - bch2_insert_fixup_extent(trans, iter, insert); + if (likely(bch2_btree_bset_insert_key(iter, b, &iter_l(iter)->iter, insert))) + bch2_btree_journal_key(trans, iter, insert); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) bset_u64s(t) - old_u64s; @@ -256,14 +248,10 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, { struct bch_fs *c = trans->c; - BUG_ON(iter->level); - BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), iter->pos)); - EBUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && - bkey_cmp(insert->k.p, iter->l[0].b->key.k.p) > 0); - + BUG_ON(bkey_cmp(insert->k.p, iter->pos)); BUG_ON(debug_check_bkeys(c) && - !bkey_deleted(&insert->k) && - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), iter->btree_id)); + bch2_bkey_invalid(c, bkey_i_to_s_c(insert), + __btree_node_type(iter->level, iter->btree_id))); } static noinline int @@ -309,15 +297,22 @@ btree_key_can_insert(struct btree_trans *trans, unsigned *u64s) { struct bch_fs *c = trans->c; - struct btree *b = iter->l[0].b; + struct btree *b = iter_l(iter)->b; static enum btree_insert_ret ret; if (unlikely(btree_node_fake(b))) return BTREE_INSERT_BTREE_NODE_FULL; - ret = !btree_node_is_extents(b) + /* + * old bch2_extent_sort_fix_overlapping() algorithm won't work with new + * style extent updates: + */ + if (unlikely(btree_node_old_extent_overwrite(b))) + return BTREE_INSERT_BTREE_NODE_FULL; + + ret = !(iter->flags & BTREE_ITER_IS_EXTENTS) ? BTREE_INSERT_OK - : bch2_extent_can_insert(trans, iter, insert, u64s); + : bch2_extent_can_insert(trans, iter, insert); if (ret) return ret; @@ -357,7 +352,7 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans) struct btree_insert_entry *i; trans_for_each_update(trans, i) - if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) + if (gc_visited(c, gc_pos_btree_node(iter_l(i->iter)->b))) bch2_mark_update(trans, i->iter, i->k, NULL, i->trigger_flags|BTREE_TRIGGER_GC); } @@ -386,7 +381,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, prefetch(&trans->c->journal.flags); - trans_for_each_update(trans, i) { + trans_for_each_update2(trans, i) { /* Multiple inserts might go to same leaf: */ if (!same_leaf_as_prev(trans, i)) u64s = 0; @@ -425,10 +420,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { if (journal_seq_verify(c)) - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) i->k->k.version.lo = trans->journal_res.seq; else if (inject_invalid_keys(c)) - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) i->k->k.version = MAX_VERSION; } @@ -451,7 +446,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, if (unlikely(c->gc_pos.phase)) bch2_trans_mark_gc(trans); - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) do_btree_insert_one(trans, i->iter, i->k); err: if (marking) { @@ -472,8 +467,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, struct btree_iter *iter; int ret; - trans_for_each_update(trans, i) - BUG_ON(!btree_node_intent_locked(i->iter, 0)); + trans_for_each_update2(trans, i) + BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level)); ret = bch2_journal_preres_get(&trans->c->journal, &trans->journal_preres, trans->journal_preres_u64s, @@ -500,20 +495,20 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, } if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) btree_insert_entry_checks(trans, i->iter, i->k); bch2_btree_trans_verify_locks(trans); - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) if (!same_leaf_as_prev(trans, i)) bch2_btree_node_lock_for_insert(trans->c, - i->iter->l[0].b, i->iter); + iter_l(i->iter)->b, i->iter); ret = bch2_trans_commit_write_locked(trans, stopped_at); - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write_inlined(i->iter->l[0].b, + bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b, i->iter); /* @@ -528,14 +523,14 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, if (trans->flags & BTREE_INSERT_NOUNLOCK) trans->nounlock = true; - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) if (!same_leaf_as_prev(trans, i)) bch2_foreground_maybe_merge(trans->c, i->iter, 0, trans->flags); trans->nounlock = false; - trans_for_each_update(trans, i) + trans_for_each_update2(trans, i) bch2_btree_iter_downgrade(i->iter); return 0; @@ -658,6 +653,135 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) return 0; } +static void bch2_trans_update2(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *insert) +{ + struct btree_insert_entry *i, n = (struct btree_insert_entry) { + .iter = iter, .k = insert + }; + + btree_insert_entry_checks(trans, n.iter, n.k); + + BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); + + EBUG_ON(trans->nr_updates2 >= trans->nr_iters); + + iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; + + trans_for_each_update2(trans, i) { + if (btree_iter_cmp(n.iter, i->iter) == 0) { + *i = n; + return; + } + + if (btree_iter_cmp(n.iter, i->iter) <= 0) + break; + } + + array_insert_item(trans->updates2, trans->nr_updates2, + i - trans->updates2, n); +} + +static int extent_update_to_keys(struct btree_trans *trans, + struct btree_iter *orig_iter, + struct bkey_i *insert) +{ + struct btree_iter *iter; + + if (bkey_deleted(&insert->k)) + return 0; + + iter = bch2_trans_copy_iter(trans, orig_iter); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + iter->flags |= BTREE_ITER_INTENT; + __bch2_btree_iter_set_pos(iter, insert->k.p, false); + bch2_trans_update2(trans, iter, insert); + bch2_trans_iter_put(trans, iter); + return 0; +} + +static int extent_handle_overwrites(struct btree_trans *trans, + enum btree_id btree_id, + struct bpos start, struct bpos end) +{ + struct btree_iter *iter = NULL, *update_iter; + struct bkey_i *update; + struct bkey_s_c k; + int ret = 0; + + iter = bch2_trans_get_iter(trans, btree_id, start, BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + return ret; + + k = bch2_btree_iter_peek_with_updates(iter); + + while (k.k && !(ret = bkey_err(k))) { + if (bkey_cmp(end, bkey_start_pos(k.k)) <= 0) + break; + + if (bkey_cmp(bkey_start_pos(k.k), start) < 0) { + update_iter = bch2_trans_copy_iter(trans, iter); + if ((ret = PTR_ERR_OR_ZERO(update_iter))) + goto err; + + update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + bkey_reassemble(update, k); + bch2_cut_back(start, update); + + __bch2_btree_iter_set_pos(update_iter, update->k.p, false); + bch2_trans_update2(trans, update_iter, update); + bch2_trans_iter_put(trans, update_iter); + } + + if (bkey_cmp(k.k->p, end) > 0) { + update_iter = bch2_trans_copy_iter(trans, iter); + if ((ret = PTR_ERR_OR_ZERO(update_iter))) + goto err; + + update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + bkey_reassemble(update, k); + bch2_cut_front(end, update); + + __bch2_btree_iter_set_pos(update_iter, update->k.p, false); + bch2_trans_update2(trans, update_iter, update); + bch2_trans_iter_put(trans, update_iter); + } else { + update_iter = bch2_trans_copy_iter(trans, iter); + if ((ret = PTR_ERR_OR_ZERO(update_iter))) + goto err; + + update = bch2_trans_kmalloc(trans, sizeof(struct bkey)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + update->k = *k.k; + set_bkey_val_u64s(&update->k, 0); + update->k.type = KEY_TYPE_deleted; + update->k.size = 0; + + __bch2_btree_iter_set_pos(update_iter, update->k.p, false); + bch2_trans_update2(trans, update_iter, update); + bch2_trans_iter_put(trans, update_iter); + } + + k = bch2_btree_iter_next_with_updates(iter); + } +err: + if (!IS_ERR_OR_NULL(iter)) + bch2_trans_iter_put(trans, iter); + return ret; +} + int __bch2_trans_commit(struct btree_trans *trans) { struct btree_insert_entry *i = NULL; @@ -727,7 +851,36 @@ int __bch2_trans_commit(struct btree_trans *trans) } } while (trans_trigger_run); + /* Turn extents updates into keys: */ + trans_for_each_update(trans, i) + if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { + struct bpos start = bkey_start_pos(&i->k->k); + + while (i + 1 < trans->updates + trans->nr_updates && + i[0].iter->btree_id == i[1].iter->btree_id && + !bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k))) + i++; + + ret = extent_handle_overwrites(trans, i->iter->btree_id, + start, i->k->k.p); + if (ret) + goto out; + } + trans_for_each_update(trans, i) { + if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { + ret = extent_update_to_keys(trans, i->iter, i->k); + if (ret) + goto out; + } else { + bch2_trans_update2(trans, i->iter, i->k); + } + } + + trans_for_each_update2(trans, i) { + BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK); + BUG_ON(i->iter->locks_want < 1); + u64s = jset_u64s(i->k->k.u64s); if (0) trans->journal_preres_u64s += u64s; @@ -776,7 +929,10 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, .trigger_flags = flags, .iter = iter, .k = k }; - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&k->k))); + EBUG_ON(bkey_cmp(iter->pos, + (iter->flags & BTREE_ITER_IS_EXTENTS) + ? bkey_start_pos(&k->k) + : k->k.p)); iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;