X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fextent_update.c;h=21af6fb8cecff150908724c238f434bc54a9dd6d;hb=f3976e3733e97a0e96f5fc6349d1e82b25116043;hp=846d77dc253039798a60f7ff36cb5f0ec812316c;hpb=304691592738dc272f4150107b54a53ab43fc8be;p=bcachefs-tools-debian diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index 846d77d..21af6fb 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" @@ -16,17 +15,26 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - unsigned ret = 0; + unsigned ret = 0, lru = 0; bkey_extent_entry_for_each(ptrs, entry) { switch (__extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: + /* Might also be updating LRU btree */ + if (entry->ptr.cached) + lru++; + + fallthrough; case BCH_EXTENT_ENTRY_stripe_ptr: ret++; } } - return ret; + /* + * Updating keys in the alloc btree may also update keys in the + * freespace or discard btrees: + */ + return lru + ret * 2; } static int count_iters_for_insert(struct btree_trans *trans, @@ -34,10 +42,14 @@ static int count_iters_for_insert(struct btree_trans *trans, unsigned offset, struct bpos *end, unsigned *nr_iters, - unsigned max_iters, - bool overwrite) + unsigned max_iters) { - int ret = 0; + int ret = 0, ret2 = 0; + + if (*nr_iters >= max_iters) { + *end = bpos_min(*end, k.k->p); + ret = 1; + } switch (k.k->type) { case KEY_TYPE_extent: @@ -55,514 +67,107 @@ static int count_iters_for_insert(struct btree_trans *trans, u64 idx = le64_to_cpu(p.v->idx); unsigned sectors = bpos_min(*end, p.k->p).offset - bkey_start_offset(p.k); - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c r_k; - for_each_btree_key(trans, iter, - BTREE_ID_REFLINK, POS(0, idx + offset), - BTREE_ITER_SLOTS, r_k, ret) { - if (bkey_cmp(bkey_start_pos(r_k.k), - POS(0, idx + sectors)) >= 0) + for_each_btree_key_norestart(trans, iter, + BTREE_ID_reflink, POS(0, idx + offset), + BTREE_ITER_SLOTS, r_k, ret2) { + if (bkey_ge(bkey_start_pos(r_k.k), POS(0, idx + sectors))) break; + /* extent_update_to_keys(), for the reflink_v update */ + *nr_iters += 1; + *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); if (*nr_iters >= max_iters) { struct bpos pos = bkey_start_pos(k.k); - pos.offset += r_k.k->p.offset - idx; + pos.offset += min_t(u64, k.k->size, + r_k.k->p.offset - idx); *end = bpos_min(*end, pos); ret = 1; break; } } + bch2_trans_iter_exit(trans, &iter); - bch2_trans_iter_put(trans, iter); break; } } - return ret; + return ret2 ?: ret; } #define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) -int bch2_extent_atomic_end(struct btree_iter *iter, +int bch2_extent_atomic_end(struct btree_trans *trans, + struct btree_iter *iter, struct bkey_i *insert, struct bpos *end) { - struct btree_trans *trans = iter->trans; - struct btree *b; - struct btree_node_iter node_iter; - struct bkey_packed *_k; - unsigned nr_iters = 0; + struct btree_iter copy; + struct bkey_s_c k; + unsigned nr_iters = 0; int ret; ret = bch2_btree_iter_traverse(iter); if (ret) return ret; - b = iter->l[0].b; - node_iter = iter->l[0].iter; - - BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0); + *end = insert->k.p; - *end = bpos_min(insert->k.p, b->key.k.p); + /* extent_update_to_keys(): */ + nr_iters += 1; ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end, - &nr_iters, EXTENT_ITERS_MAX / 2, false); + &nr_iters, EXTENT_ITERS_MAX / 2); if (ret < 0) return ret; - while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_discard))) { - struct bkey unpacked; - struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); - unsigned offset = 0; + bch2_trans_copy_iter(©, iter); - if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0) - break; + for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) { + unsigned offset = 0; - if (bkey_cmp(bkey_start_pos(&insert->k), - bkey_start_pos(k.k)) > 0) + if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) offset = bkey_start_offset(&insert->k) - bkey_start_offset(k.k); + /* extent_handle_overwrites(): */ + switch (bch2_extent_overlap(&insert->k, k.k)) { + case BCH_EXTENT_OVERLAP_ALL: + case BCH_EXTENT_OVERLAP_FRONT: + nr_iters += 1; + break; + case BCH_EXTENT_OVERLAP_BACK: + case BCH_EXTENT_OVERLAP_MIDDLE: + nr_iters += 2; + break; + } + ret = count_iters_for_insert(trans, k, offset, end, - &nr_iters, EXTENT_ITERS_MAX, true); + &nr_iters, EXTENT_ITERS_MAX); if (ret) break; - - bch2_btree_node_iter_advance(&node_iter, b); } + bch2_trans_iter_exit(trans, ©); return ret < 0 ? ret : 0; } -int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) +int bch2_extent_trim_atomic(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *k) { struct bpos end; int ret; - ret = bch2_extent_atomic_end(iter, k, &end); + ret = bch2_extent_atomic_end(trans, iter, k, &end); if (ret) return ret; bch2_cut_back(end, k); return 0; } - -int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter) -{ - struct bpos end; - int ret; - - ret = bch2_extent_atomic_end(iter, k, &end); - if (ret) - return ret; - - return !bkey_cmp(end, k->k.p); -} - -enum btree_insert_ret -bch2_extent_can_insert(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert, - unsigned *u64s) -{ - struct btree_iter_level *l = &iter->l[0]; - struct btree_node_iter node_iter = l->iter; - struct bkey_packed *_k; - struct bkey unpacked; - int sectors; - - while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_discard))) { - struct bkey_s_c k = bkey_disassemble(l->b, _k, &unpacked); - enum bch_extent_overlap overlap = - bch2_extent_overlap(&insert->k, k.k); - - if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) - break; - - overlap = bch2_extent_overlap(&insert->k, k.k); - - /* - * If we're overwriting an existing extent, we may need to emit - * a whiteout - unless we're inserting a new extent at the same - * position: - */ - if (k.k->needs_whiteout && - (!bkey_whiteout(&insert->k) || - bkey_cmp(k.k->p, insert->k.p))) - *u64s += BKEY_U64s; - - /* - * If we're partially overwriting an existing extent which has - * been written out to disk, we'll need to emit a new version of - * that extent: - */ - if (bkey_written(l->b, _k) && - overlap != BCH_EXTENT_OVERLAP_ALL) - *u64s += _k->u64s; - - /* And we may be splitting an existing extent: */ - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) - *u64s += _k->u64s; - - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && - (sectors = bch2_bkey_sectors_compressed(k))) { - int flags = trans->flags & BTREE_INSERT_NOFAIL - ? BCH_DISK_RESERVATION_NOFAIL : 0; - - switch (bch2_disk_reservation_add(trans->c, - trans->disk_res, - sectors, flags)) { - case 0: - break; - case -ENOSPC: - return BTREE_INSERT_ENOSPC; - default: - BUG(); - } - } - - if (overlap == BCH_EXTENT_OVERLAP_FRONT || - overlap == BCH_EXTENT_OVERLAP_MIDDLE) - break; - - bch2_btree_node_iter_advance(&node_iter, l->b); - } - - return BTREE_INSERT_OK; -} - -static void verify_extent_nonoverlapping(struct bch_fs *c, - struct btree *b, - struct btree_node_iter *_iter, - struct bkey_i *insert) -{ -#ifdef CONFIG_BCACHEFS_DEBUG - struct btree_node_iter iter; - struct bkey_packed *k; - struct bkey uk; - - if (!expensive_debug_checks(c)) - return; - - iter = *_iter; - k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); - - iter = *_iter; - k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); -#if 0 - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0); -#else - if (k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) { - char buf1[100]; - char buf2[100]; - - bch2_bkey_to_text(&PBUF(buf1), &insert->k); - bch2_bkey_to_text(&PBUF(buf2), &uk); - - bch2_dump_btree_node(b); - panic("insert > next :\n" - "insert %s\n" - "next %s\n", - buf1, buf2); - } -#endif - -#endif -} - -static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert) -{ - struct btree_iter_level *l = &iter->l[0]; - struct bkey_packed *k = - bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b)); - - BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b)); - - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - verify_extent_nonoverlapping(c, l->b, &l->iter, insert); - - if (debug_check_bkeys(c)) - bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert)); - - bch2_bset_insert(l->b, &l->iter, k, insert, 0); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s); -} - -static void pack_push_whiteout(struct bch_fs *c, struct btree *b, - struct bpos pos) -{ - struct bkey_packed k; - - if (!bkey_pack_pos(&k, pos, b)) { - struct bkey_i tmp; - - bkey_init(&tmp.k); - tmp.k.p = pos; - bkey_copy(&k, &tmp); - } - - k.needs_whiteout = true; - push_whiteout(c, b, &k); -} - -static void -extent_drop(struct bch_fs *c, struct btree_iter *iter, - struct bkey_packed *_k, struct bkey_s k) -{ - struct btree_iter_level *l = &iter->l[0]; - - if (!bkey_whiteout(k.k)) - btree_account_key_drop(l->b, _k); - - k.k->size = 0; - k.k->type = KEY_TYPE_deleted; - - if (!btree_node_old_extent_overwrite(l->b) && - k.k->needs_whiteout) { - pack_push_whiteout(c, l->b, k.k->p); - k.k->needs_whiteout = false; - } - - if (_k >= btree_bset_last(l->b)->start) { - unsigned u64s = _k->u64s; - - bch2_bset_delete(l->b, _k, _k->u64s); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, _k, u64s, 0); - } else { - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } -} - -static void -extent_squash(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert, - struct bkey_packed *_k, struct bkey_s k, - enum bch_extent_overlap overlap) -{ - struct btree_iter_level *l = &iter->l[0]; - struct bkey_on_stack tmp, split; - - bkey_on_stack_init(&tmp); - bkey_on_stack_init(&split); - - if (!btree_node_old_extent_overwrite(l->b)) { - if (!bkey_whiteout(&insert->k) && - !bkey_cmp(k.k->p, insert->k.p)) { - insert->k.needs_whiteout = k.k->needs_whiteout; - k.k->needs_whiteout = false; - } - } else { - insert->k.needs_whiteout |= k.k->needs_whiteout; - } - - switch (overlap) { - case BCH_EXTENT_OVERLAP_FRONT: - if (bkey_written(l->b, _k)) { - bkey_on_stack_reassemble(&tmp, c, k.s_c); - bch2_cut_front(insert->k.p, tmp.k); - - /* - * needs_whiteout was propagated to new version of @k, - * @tmp: - */ - if (!btree_node_old_extent_overwrite(l->b)) - k.k->needs_whiteout = false; - - extent_drop(c, iter, _k, k); - extent_bset_insert(c, iter, tmp.k); - } else { - btree_keys_account_val_delta(l->b, _k, - bch2_cut_front_s(insert->k.p, k)); - - extent_save(l->b, _k, k.k); - /* - * No need to call bset_fix_invalidated_key, start of - * extent changed but extents are indexed by where they - * end - */ - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } - break; - case BCH_EXTENT_OVERLAP_BACK: - if (bkey_written(l->b, _k)) { - bkey_on_stack_reassemble(&tmp, c, k.s_c); - bch2_cut_back(bkey_start_pos(&insert->k), tmp.k); - - /* - * @tmp has different position than @k, needs_whiteout - * should not be propagated: - */ - if (!btree_node_old_extent_overwrite(l->b)) - tmp.k->k.needs_whiteout = false; - - extent_drop(c, iter, _k, k); - extent_bset_insert(c, iter, tmp.k); - } else { - /* - * position of @k is changing, emit a whiteout if - * needs_whiteout is set: - */ - if (!btree_node_old_extent_overwrite(l->b) && - k.k->needs_whiteout) { - pack_push_whiteout(c, l->b, k.k->p); - k.k->needs_whiteout = false; - } - - btree_keys_account_val_delta(l->b, _k, - bch2_cut_back_s(bkey_start_pos(&insert->k), k)); - extent_save(l->b, _k, k.k); - - bch2_bset_fix_invalidated_key(l->b, _k); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, _k->u64s, _k->u64s); - } - break; - case BCH_EXTENT_OVERLAP_ALL: - extent_drop(c, iter, _k, k); - break; - case BCH_EXTENT_OVERLAP_MIDDLE: - bkey_on_stack_reassemble(&split, c, k.s_c); - bch2_cut_back(bkey_start_pos(&insert->k), split.k); - - if (!btree_node_old_extent_overwrite(l->b)) - split.k->k.needs_whiteout = false; - - /* this is identical to BCH_EXTENT_OVERLAP_FRONT: */ - if (bkey_written(l->b, _k)) { - bkey_on_stack_reassemble(&tmp, c, k.s_c); - bch2_cut_front(insert->k.p, tmp.k); - - if (!btree_node_old_extent_overwrite(l->b)) - k.k->needs_whiteout = false; - - extent_drop(c, iter, _k, k); - extent_bset_insert(c, iter, tmp.k); - } else { - btree_keys_account_val_delta(l->b, _k, - bch2_cut_front_s(insert->k.p, k)); - - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } - - extent_bset_insert(c, iter, split.k); - break; - } - - bkey_on_stack_exit(&split, c); - bkey_on_stack_exit(&tmp, c); -} - -/** - * bch_extent_insert_fixup - insert a new extent and deal with overlaps - * - * this may result in not actually doing the insert, or inserting some subset - * of the insert key. For cmpxchg operations this is where that logic lives. - * - * All subsets of @insert that need to be inserted are inserted using - * bch2_btree_insert_and_journal(). If @b or @res fills up, this function - * returns false, setting @iter->pos for the prefix of @insert that actually got - * inserted. - * - * BSET INVARIANTS: this function is responsible for maintaining all the - * invariants for bsets of extents in memory. things get really hairy with 0 - * size extents - * - * within one bset: - * - * bkey_start_pos(bkey_next(k)) >= k - * or bkey_start_offset(bkey_next(k)) >= k->offset - * - * i.e. strict ordering, no overlapping extents. - * - * multiple bsets (i.e. full btree node): - * - * ∀ k, j - * k.size != 0 ∧ j.size != 0 → - * ¬ (k > bkey_start_pos(j) ∧ k < j) - * - * i.e. no two overlapping keys _of nonzero size_ - * - * We can't realistically maintain this invariant for zero size keys because of - * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j - * there may be another 0 size key between them in another bset, and it will - * thus overlap with the merged key. - * - * In addition, the end of iter->pos indicates how much has been processed. - * If the end of iter->pos is not the same as the end of insert, then - * key insertion needs to continue/be retried. - */ -void bch2_insert_fixup_extent(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert) -{ - struct bch_fs *c = trans->c; - struct btree_iter_level *l = &iter->l[0]; - struct btree_node_iter node_iter = l->iter; - bool do_update = !bkey_whiteout(&insert->k); - struct bkey_packed *_k; - struct bkey unpacked; - - EBUG_ON(iter->level); - EBUG_ON(!insert->k.size); - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); - - while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, - KEY_TYPE_discard))) { - struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); - enum bch_extent_overlap overlap = - bch2_extent_overlap(&insert->k, k.k); - - if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) - break; - - if (!bkey_whiteout(k.k)) - do_update = true; - - if (!do_update) { - struct bpos cur_end = bpos_min(insert->k.p, k.k->p); - - bch2_cut_front(cur_end, insert); - bch2_btree_iter_set_pos_same_leaf(iter, cur_end); - } else { - extent_squash(c, iter, insert, _k, k, overlap); - } - - node_iter = l->iter; - - if (overlap == BCH_EXTENT_OVERLAP_FRONT || - overlap == BCH_EXTENT_OVERLAP_MIDDLE) - break; - } - - l->iter = node_iter; - bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p); - - if (do_update) { - if (insert->k.type == KEY_TYPE_deleted) - insert->k.type = KEY_TYPE_discard; - - if (!bkey_whiteout(&insert->k) || - btree_node_old_extent_overwrite(l->b)) - extent_bset_insert(c, iter, insert); - - bch2_btree_journal_key(trans, iter, insert); - } - - bch2_cut_front(insert->k.p, insert); -}