X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_update_interior.c;h=9dca694b6ee3b09c0b8027fb5354d01c687c9bff;hb=ad031731727d4239351afeebf675f069a289beb3;hp=c8c3382f48c7e31ded2872dbf34af9057306cff9;hpb=f06b01e9eacca7cd23679ee92f3d082c9352263f;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index c8c3382..9dca694 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -23,8 +23,9 @@ #include static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *, - struct btree_iter *, struct btree *, + struct btree_path *, struct btree *, struct keylist *, unsigned); +static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); /* Debug code: */ @@ -152,38 +153,26 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b) clear_btree_node_noevict(b); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_lock(&c->btree_cache.lock); list_move(&b->list, &c->btree_cache.freeable); mutex_unlock(&c->btree_cache.lock); } -void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b) +static void bch2_btree_node_free_inmem(struct btree_trans *trans, + struct btree *b) { - struct open_buckets ob = b->ob; + struct bch_fs *c = trans->c; + struct btree_path *path; - b->ob.nr = 0; + trans_for_each_path(trans, path) + BUG_ON(path->l[b->c.level].b == b && + path->l[b->c.level].lock_seq == b->c.lock.state.seq); - clear_btree_node_dirty(c, b); + six_lock_write(&b->c.lock, NULL, NULL); - btree_node_lock_type(c, b, SIX_LOCK_write); + bch2_btree_node_hash_remove(&c->btree_cache, b); __btree_node_free(c, b); - six_unlock_write(&b->c.lock); - - bch2_open_buckets_put(c, &ob); -} -void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, - struct btree_iter *iter) -{ - struct btree_iter *linked; - - trans_for_each_iter(iter->trans, linked) - BUG_ON(linked->l[b->c.level].b == b); - - six_lock_write(&b->c.lock, NULL, NULL); - __btree_node_free(c, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } @@ -234,12 +223,12 @@ retry: if (IS_ERR(wp)) return ERR_CAST(wp); - if (wp->sectors_free < c->opts.btree_node_size) { + if (wp->sectors_free < btree_sectors(c)) { struct open_bucket *ob; unsigned i; open_bucket_for_each(c, &wp->ptrs, ob, i) - if (ob->sectors_free < c->opts.btree_node_size) + if (ob->sectors_free < btree_sectors(c)) ob->sectors_free = 0; bch2_alloc_sectors_done(c, wp); @@ -247,7 +236,7 @@ retry: } bkey_btree_ptr_v2_init(&tmp.k); - bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size); + bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c)); bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); @@ -466,15 +455,23 @@ static void bch2_btree_update_free(struct btree_update *as) bch2_disk_reservation_put(c, &as->disk_res); bch2_btree_reserve_put(as); + bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], + as->start_time); + mutex_lock(&c->btree_interior_update_lock); list_del(&as->unwritten_list); list_del(&as->list); - mutex_unlock(&c->btree_interior_update_lock); closure_debug_destroy(&as->cl); mempool_free(as, &c->btree_interior_update_pool); + /* + * Have to do the wakeup with btree_interior_update_lock still held, + * since being on btree_interior_update_list is our ref on @c: + */ closure_wake_up(&c->btree_interior_update_wait); + + mutex_unlock(&c->btree_interior_update_lock); } static void btree_update_will_delete_key(struct btree_update *as, @@ -773,7 +770,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) * And it adds @b to the list of @as's new nodes, so that we can update sector * counts in bch2_btree_update_nodes_written: */ -void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b) +static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; @@ -827,7 +824,7 @@ found: closure_put(&as->cl); } -void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b) +static void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b) { while (b->ob.nr) as->open_buckets[as->nr_open_buckets++] = @@ -839,7 +836,7 @@ void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b * nodes and thus outstanding btree_updates - redirect @b's * btree_updates to point to this btree_update: */ -void bch2_btree_interior_update_will_free_node(struct btree_update *as, +static void bch2_btree_interior_update_will_free_node(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; @@ -911,8 +908,11 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, as->nr_old_nodes++; } -void bch2_btree_update_done(struct btree_update *as) +static void bch2_btree_update_done(struct btree_update *as) { + struct bch_fs *c = as->c; + u64 start_time = as->start_time; + BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); if (as->took_gc_lock) @@ -923,22 +923,25 @@ void bch2_btree_update_done(struct btree_update *as) continue_at(&as->cl, btree_update_set_nodes_written, as->c->btree_interior_update_worker); + + bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground], + start_time); } -struct btree_update * -bch2_btree_update_start(struct btree_iter *iter, unsigned level, - unsigned nr_nodes, unsigned flags) +static struct btree_update * +bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, + unsigned level, unsigned nr_nodes, unsigned flags) { - struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; struct btree_update *as; struct closure cl; + u64 start_time = local_clock(); int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) ? BCH_DISK_RESERVATION_NOFAIL : 0; int journal_flags = 0; int ret = 0; - BUG_ON(!iter->should_be_locked); + BUG_ON(!path->should_be_locked); if (flags & BTREE_INSERT_JOURNAL_RESERVED) journal_flags |= JOURNAL_RES_GET_RESERVED; @@ -950,11 +953,11 @@ retry: * XXX: figure out how far we might need to split, * instead of locking/reserving all the way to the root: */ - if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { + if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_, - iter->btree_id, - &iter->real_pos); - return ERR_PTR(-EINTR); + path->btree_id, &path->pos); + ret = btree_trans_restart(trans); + return ERR_PTR(ret); } if (flags & BTREE_INSERT_GC_LOCK_HELD) @@ -972,9 +975,10 @@ retry: memset(as, 0, sizeof(*as)); closure_init(&as->cl, NULL); as->c = c; + as->start_time = start_time; as->mode = BTREE_INTERIOR_NO_UPDATE; as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); - as->btree_id = iter->btree_id; + as->btree_id = path->btree_id; INIT_LIST_HEAD(&as->list); INIT_LIST_HEAD(&as->unwritten_list); INIT_LIST_HEAD(&as->write_blocked_list); @@ -1025,7 +1029,7 @@ retry: } ret = bch2_disk_reservation_get(c, &as->disk_res, - nr_nodes * c->opts.btree_node_size, + nr_nodes * btree_sectors(c), c->opts.metadata_replicas, disk_res_flags); if (ret) @@ -1092,8 +1096,10 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) * is nothing new to be done. This just guarantees that there is a * journal write. */ -static void bch2_btree_set_root(struct btree_update *as, struct btree *b, - struct btree_iter *iter) +static void bch2_btree_set_root(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b) { struct bch_fs *c = as->c; struct btree *old; @@ -1108,7 +1114,7 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *b, * Ensure no one is using the old root while we switch to the * new root: */ - bch2_btree_node_lock_write(old, iter); + bch2_btree_node_lock_write(trans, path, old); bch2_btree_set_root_inmem(c, b); @@ -1121,15 +1127,17 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *b, * an intent lock on the new root, and any updates that would * depend on the new root would have to update the new root. */ - bch2_btree_node_unlock_write(old, iter); + bch2_btree_node_unlock_write(trans, path, old); } /* Interior node updates: */ -static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b, - struct btree_iter *iter, - struct bkey_i *insert, - struct btree_node_iter *node_iter) +static void bch2_insert_fixup_btree_ptr(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct btree_node_iter *node_iter, + struct bkey_i *insert) { struct bch_fs *c = as->c; struct bkey_packed *k; @@ -1161,15 +1169,18 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b bkey_iter_pos_cmp(b, k, &insert->k.p) < 0) bch2_btree_node_iter_advance(node_iter, b); - bch2_btree_bset_insert_key(iter, b, node_iter, insert); + bch2_btree_bset_insert_key(trans, path, b, node_iter, insert); set_btree_node_dirty(c, b); set_btree_node_need_write(b); } static void -__bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys, - struct btree_node_iter node_iter) +__bch2_btree_insert_keys_interior(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct btree_node_iter node_iter, + struct keylist *keys) { struct bkey_i *insert = bch2_keylist_front(keys); struct bkey_packed *k; @@ -1181,8 +1192,8 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, ; while (!bch2_keylist_empty(keys)) { - bch2_insert_fixup_btree_ptr(as, b, iter, - bch2_keylist_front(keys), &node_iter); + bch2_insert_fixup_btree_ptr(as, trans, path, b, + &node_iter, bch2_keylist_front(keys)); bch2_keylist_pop_front(keys); } } @@ -1192,8 +1203,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, * node) */ static struct btree *__btree_split_node(struct btree_update *as, - struct btree *n1, - struct btree_iter *iter) + struct btree *n1) { struct bkey_format_state s; size_t nr_packed = 0, nr_unpacked = 0; @@ -1308,8 +1318,10 @@ static struct btree *__btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, struct btree *b, - struct btree_iter *iter, +static void btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, struct keylist *keys) { struct btree_node_iter node_iter; @@ -1319,7 +1331,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, bch2_btree_node_iter_init(&node_iter, b, &k->k.p); - __bch2_btree_insert_keys_interior(as, b, iter, keys, node_iter); + __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); /* * We can't tolerate whiteouts here - with whiteouts there can be @@ -1349,18 +1361,17 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, btree_node_interior_verify(as->c, b); } -static void btree_split(struct btree_update *as, - struct btree_trans *trans, struct btree_iter *iter, - struct btree *b, struct keylist *keys, - unsigned flags) +static void btree_split(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; - struct btree *parent = btree_node_parent(iter, b); + struct btree *parent = btree_node_parent(path, b); struct btree *n1, *n2 = NULL, *n3 = NULL; u64 start_time = local_clock(); BUG_ON(!parent && (b != btree_node_root(c, b))); - BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level)); + BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); bch2_btree_interior_update_will_free_node(as, b); @@ -1368,12 +1379,12 @@ static void btree_split(struct btree_update *as, bch2_btree_update_add_new_node(as, n1); if (keys) - btree_split_insert_keys(as, n1, iter, keys); + btree_split_insert_keys(as, trans, path, n1, keys); if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) { trace_btree_split(c, b); - n2 = __btree_split_node(as, n1, iter); + n2 = __btree_split_node(as, n1); bch2_btree_build_aux_trees(n2); bch2_btree_build_aux_trees(n1); @@ -1398,7 +1409,7 @@ static void btree_split(struct btree_update *as, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, n3, iter, &as->parent_keys); + btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); bch2_btree_node_write(c, n3, SIX_LOCK_intent); } @@ -1418,12 +1429,12 @@ static void btree_split(struct btree_update *as, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); + bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); } else if (n3) { - bch2_btree_set_root(as, n3, iter); + bch2_btree_set_root(as, trans, path, n3); } else { /* Root filled up but didn't need to be split */ - bch2_btree_set_root(as, n1, iter); + bch2_btree_set_root(as, trans, path, n1); } bch2_btree_update_get_open_buckets(as, n1); @@ -1432,15 +1443,14 @@ static void btree_split(struct btree_update *as, if (n3) bch2_btree_update_get_open_buckets(as, n3); - /* Successful split, update the iterator to point to the new nodes: */ + /* Successful split, update the path to point to the new nodes: */ six_lock_increment(&b->c.lock, SIX_LOCK_intent); - bch2_btree_iter_node_drop(iter, b); if (n3) - bch2_btree_iter_node_replace(iter, n3); + bch2_trans_node_add(trans, n3); if (n2) - bch2_btree_iter_node_replace(iter, n2); - bch2_btree_iter_node_replace(iter, n1); + bch2_trans_node_add(trans, n2); + bch2_trans_node_add(trans, n1); /* * The old node must be freed (in memory) _before_ unlocking the new @@ -1448,7 +1458,7 @@ static void btree_split(struct btree_update *as, * node after another thread has locked and updated the new node, thus * seeing stale data: */ - bch2_btree_node_free_inmem(c, b, iter); + bch2_btree_node_free_inmem(trans, b); if (n3) six_unlock_intent(&n3->c.lock); @@ -1456,26 +1466,32 @@ static void btree_split(struct btree_update *as, six_unlock_intent(&n2->c.lock); six_unlock_intent(&n1->c.lock); - bch2_btree_trans_verify_locks(trans); + bch2_trans_verify_locks(trans); - bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split], + bch2_time_stats_update(&c->times[n2 + ? BCH_TIME_btree_node_split + : BCH_TIME_btree_node_compact], start_time); } static void -bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys) +bch2_btree_insert_keys_interior(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct keylist *keys) { - struct btree_iter *linked; + struct btree_path *linked; - __bch2_btree_insert_keys_interior(as, b, iter, keys, iter->l[b->c.level].iter); + __bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); btree_update_updated_node(as, b); - trans_for_each_iter_with_node(iter->trans, b, linked) + trans_for_each_path_with_node(trans, b, linked) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); - bch2_btree_trans_verify_iters(iter->trans, b); + bch2_trans_verify_paths(trans); } /** @@ -1490,10 +1506,9 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, * If a split occurred, this function will return early. This can only happen * for leaf nodes -- inserts into interior nodes have to be atomic. */ -static void bch2_btree_insert_node(struct btree_update *as, - struct btree_trans *trans, struct btree_iter *iter, - struct btree *b, struct keylist *keys, - unsigned flags) +static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); @@ -1501,21 +1516,21 @@ static void bch2_btree_insert_node(struct btree_update *as, int live_u64s_added, u64s_added; lockdep_assert_held(&c->gc_lock); - BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level)); + BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); BUG_ON(!b->c.level); BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - bch2_btree_node_lock_for_insert(trans, iter, b); + bch2_btree_node_lock_for_insert(trans, path, b); if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { - bch2_btree_node_unlock_write(b, iter); + bch2_btree_node_unlock_write(trans, path, b); goto split; } btree_node_interior_verify(c, b); - bch2_btree_insert_keys_interior(as, b, iter, keys); + bch2_btree_insert_keys_interior(as, trans, path, b, keys); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s; @@ -1527,48 +1542,48 @@ static void bch2_btree_insert_node(struct btree_update *as, if (u64s_added > live_u64s_added && bch2_maybe_compact_whiteouts(c, b)) - bch2_btree_iter_reinit_node(iter, b); + bch2_trans_node_reinit_iter(trans, b); - bch2_btree_node_unlock_write(b, iter); + bch2_btree_node_unlock_write(trans, path, b); btree_node_interior_verify(c, b); return; split: - btree_split(as, trans, iter, b, keys, flags); + btree_split(as, trans, path, b, keys, flags); } int bch2_btree_split_leaf(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned flags) { struct bch_fs *c = trans->c; - struct btree *b = iter_l(iter)->b; + struct btree *b = path_l(path)->b; struct btree_update *as; unsigned l; int ret = 0; - as = bch2_btree_update_start(iter, iter->level, + as = bch2_btree_update_start(trans, path, path->level, btree_update_reserve_required(c, b), flags); if (IS_ERR(as)) return PTR_ERR(as); - btree_split(as, trans, iter, b, NULL, flags); + btree_split(as, trans, path, b, NULL, flags); bch2_btree_update_done(as); - for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++) - ret = bch2_foreground_maybe_merge(trans, iter, l, flags); + for (l = path->level + 1; btree_path_node(path, l) && !ret; l++) + ret = bch2_foreground_maybe_merge(trans, path, l, flags); return ret; } int __bch2_foreground_maybe_merge(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned level, unsigned flags, enum btree_node_sibling sib) { struct bch_fs *c = trans->c; - struct btree_iter *sib_iter = NULL; + struct btree_path *sib_path = NULL; struct btree_update *as; struct bkey_format_state new_s; struct bkey_format new_f; @@ -1576,39 +1591,36 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, struct btree *b, *m, *n, *prev, *next, *parent; struct bpos sib_pos; size_t sib_u64s; - int ret = 0, ret2 = 0; - -retry: - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; + u64 start_time = local_clock(); + int ret = 0; - BUG_ON(!iter->should_be_locked); - BUG_ON(!btree_node_locked(iter, level)); + BUG_ON(!path->should_be_locked); + BUG_ON(!btree_node_locked(path, level)); - b = iter->l[level].b; + b = path->l[level].b; if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) || (sib == btree_next_sib && !bpos_cmp(b->data->max_key, SPOS_MAX))) { b->sib_u64s[sib] = U16_MAX; - goto out; + return 0; } sib_pos = sib == btree_prev_sib ? bpos_predecessor(b->data->min_key) : bpos_successor(b->data->max_key); - sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id, - sib_pos, U8_MAX, level, - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(sib_iter); + sib_path = bch2_path_get(trans, path->btree_id, sib_pos, + U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); + ret = bch2_btree_path_traverse(trans, sib_path, false); if (ret) goto err; - m = sib_iter->l[level].b; + sib_path->should_be_locked = true; + + m = sib_path->l[level].b; - if (btree_node_parent(iter, b) != - btree_node_parent(sib_iter, m)) { + if (btree_node_parent(path, b) != + btree_node_parent(sib_path, m)) { b->sib_u64s[sib] = U16_MAX; goto out; } @@ -1659,8 +1671,8 @@ retry: if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) goto out; - parent = btree_node_parent(iter, b); - as = bch2_btree_update_start(iter, level, + parent = btree_node_parent(path, b); + as = bch2_btree_update_start(trans, path, level, btree_update_reserve_required(c, parent) + 1, flags| BTREE_INSERT_NOFAIL| @@ -1696,47 +1708,34 @@ retry: bch2_keylist_add(&as->parent_keys, &delete); bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); + bch2_trans_verify_paths(trans); + + bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + + bch2_trans_verify_paths(trans); bch2_btree_update_get_open_buckets(as, n); six_lock_increment(&b->c.lock, SIX_LOCK_intent); six_lock_increment(&m->c.lock, SIX_LOCK_intent); - bch2_btree_iter_node_drop(iter, b); - bch2_btree_iter_node_drop(iter, m); - bch2_btree_iter_node_replace(iter, n); + bch2_trans_node_add(trans, n); - bch2_btree_trans_verify_iters(trans, n); + bch2_trans_verify_paths(trans); - bch2_btree_node_free_inmem(c, b, iter); - bch2_btree_node_free_inmem(c, m, iter); + bch2_btree_node_free_inmem(trans, b); + bch2_btree_node_free_inmem(trans, m); six_unlock_intent(&n->c.lock); bch2_btree_update_done(as); -out: - bch2_btree_trans_verify_locks(trans); - bch2_trans_iter_free(trans, sib_iter); - /* - * Don't downgrade locks here: we're called after successful insert, - * and the caller will downgrade locks after a successful insert - * anyways (in case e.g. a split was required first) - * - * And we're also called when inserting into interior nodes in the - * split path, and downgrading to read locks in there is potentially - * confusing: - */ - return ret ?: ret2; + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time); +out: err: - bch2_trans_iter_put(trans, sib_iter); - sib_iter = NULL; - - if (ret == -EINTR && bch2_trans_relock(trans)) - goto retry; - - goto out; + bch2_path_put(trans, sib_path, true); + bch2_trans_verify_locks(trans); + return ret; } /** @@ -1744,32 +1743,23 @@ err: */ int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_iter *iter, - __le64 seq, unsigned flags) + struct btree *b, + unsigned flags) { struct bch_fs *c = trans->c; - struct btree *b, *n, *parent; + struct btree *n, *parent; struct btree_update *as; int ret; flags |= BTREE_INSERT_NOFAIL; -retry: - ret = bch2_btree_iter_traverse(iter); - if (ret) - goto out; - b = bch2_btree_iter_peek_node(iter); - if (!b || b->data->keys.seq != seq) - goto out; - - parent = btree_node_parent(iter, b); - as = bch2_btree_update_start(iter, b->c.level, + parent = btree_node_parent(iter->path, b); + as = bch2_btree_update_start(trans, iter->path, b->c.level, (parent ? btree_update_reserve_required(c, parent) : 0) + 1, flags); ret = PTR_ERR_OR_ZERO(as); - if (ret == -EINTR) - goto retry; if (ret) { trace_btree_gc_rewrite_node_fail(c, b); goto out; @@ -1789,23 +1779,22 @@ retry: if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter, parent, + bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys, flags); } else { - bch2_btree_set_root(as, n, iter); + bch2_btree_set_root(as, trans, iter->path, n); } bch2_btree_update_get_open_buckets(as, n); six_lock_increment(&b->c.lock, SIX_LOCK_intent); - bch2_btree_iter_node_drop(iter, b); - bch2_btree_iter_node_replace(iter, n); - bch2_btree_node_free_inmem(c, b, iter); + bch2_trans_node_add(trans, n); + bch2_btree_node_free_inmem(trans, b); six_unlock_intent(&n->c.lock); bch2_btree_update_done(as); out: - bch2_btree_iter_downgrade(iter); + bch2_btree_path_downgrade(iter->path); return ret; } @@ -1818,20 +1807,38 @@ struct async_btree_rewrite { __le64 seq; }; +static int async_btree_node_rewrite_trans(struct btree_trans *trans, + struct async_btree_rewrite *a) +{ + struct btree_iter iter; + struct btree *b; + int ret; + + bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos, + BTREE_MAX_DEPTH, a->level, 0); + b = bch2_btree_iter_peek_node(&iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto out; + + if (!b || b->data->keys.seq != a->seq) + goto out; + + ret = bch2_btree_node_rewrite(trans, &iter, b, 0); +out : + bch2_trans_iter_exit(trans, &iter); + + return ret; +} + void async_btree_node_rewrite_work(struct work_struct *work) { struct async_btree_rewrite *a = container_of(work, struct async_btree_rewrite, work); struct bch_fs *c = a->c; - struct btree_trans trans; - struct btree_iter *iter; - bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_node_iter(&trans, a->btree_id, a->pos, - BTREE_MAX_DEPTH, a->level, 0); - bch2_btree_node_rewrite(&trans, iter, a->seq, 0); - bch2_trans_iter_put(&trans, iter); - bch2_trans_exit(&trans); + bch2_trans_do(c, NULL, NULL, 0, + async_btree_node_rewrite_trans(&trans, a)); percpu_ref_put(&c->writes); kfree(a); } @@ -1869,7 +1876,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bool skip_triggers) { struct bch_fs *c = trans->c; - struct btree_iter *iter2 = NULL; + struct btree_iter iter2 = { NULL }; struct btree *parent; u64 journal_entries[BKEY_BTREE_PTR_U64s_MAX]; int ret; @@ -1897,19 +1904,23 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BUG_ON(ret); } - parent = btree_node_parent(iter, b); + parent = btree_node_parent(iter->path, b); if (parent) { - iter2 = bch2_trans_copy_iter(trans, iter); + bch2_trans_copy_iter(&iter2, iter); + + iter2.path = bch2_btree_path_make_mut(trans, iter2.path, + iter2.flags & BTREE_ITER_INTENT, + _THIS_IP_); - BUG_ON(iter2->level != b->c.level); - BUG_ON(bpos_cmp(iter2->pos, new_key->k.p)); + BUG_ON(iter2.path->level != b->c.level); + BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p)); - btree_node_unlock(iter2, iter2->level); - iter2->l[iter2->level].b = BTREE_ITER_NO_NODE_UP; - iter2->level++; + btree_node_unlock(iter2.path, iter2.path->level); + path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP; + iter2.path->level++; - ret = bch2_btree_iter_traverse(iter2) ?: - bch2_trans_update(trans, iter2, new_key, BTREE_TRIGGER_NORUN); + ret = bch2_btree_iter_traverse(&iter2) ?: + bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN); if (ret) goto err; } else { @@ -1931,7 +1942,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, if (ret) goto err; - bch2_btree_node_lock_write(b, iter); + bch2_btree_node_lock_write(trans, iter->path, b); if (new_hash) { mutex_lock(&c->btree_cache.lock); @@ -1946,9 +1957,9 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bkey_copy(&b->key, new_key); } - bch2_btree_node_unlock_write(b, iter); + bch2_btree_node_unlock_write(trans, iter->path, b); out: - bch2_trans_iter_put(trans, iter2); + bch2_trans_iter_exit(trans, &iter2); return ret; err: if (new_hash) { @@ -1965,9 +1976,16 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite { struct bch_fs *c = trans->c; struct btree *new_hash = NULL; + struct btree_path *path = iter->path; struct closure cl; int ret = 0; + if (!btree_node_intent_locked(path, b->c.level) && + !bch2_btree_path_upgrade(trans, path, b->c.level + 1)) { + btree_trans_restart(trans); + return -EINTR; + } + closure_init_stack(&cl); /* @@ -1986,8 +2004,10 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite new_hash = bch2_btree_node_mem_alloc(c); } + path->intent_ref++; ret = __bch2_btree_node_update_key(trans, iter, b, new_hash, new_key, skip_triggers); + --path->intent_ref; if (new_hash) { mutex_lock(&c->btree_cache.lock); @@ -2006,18 +2026,18 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, struct btree *b, struct bkey_i *new_key, bool skip_triggers) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_node_iter(trans, b->c.btree_id, b->key.k.p, - BTREE_MAX_DEPTH, b->c.level, - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter); + bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p, + BTREE_MAX_DEPTH, b->c.level, + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter); if (ret) goto out; /* has node been freed? */ - if (iter->l[b->c.level].b != b) { + if (iter.path->l[b->c.level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); goto out; @@ -2025,9 +2045,9 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, BUG_ON(!btree_node_hashed(b)); - ret = bch2_btree_node_update_key(trans, iter, b, new_key, skip_triggers); + ret = bch2_btree_node_update_key(trans, &iter, b, new_key, skip_triggers); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; }