+ if (path1->btree_id == path2->btree_id &&
+ path1->locks_want < path2->locks_want)
+ __bch2_btree_path_upgrade(trans, path1, path2->locks_want);
+ else if (!path1->locks_want && path2->locks_want)
+ __bch2_btree_path_upgrade(trans, path1, 1);
+ }
+
+ bch2_trans_unlock(trans);
+ cond_resched();
+
+ if (unlikely(ret == -ENOMEM)) {
+ struct closure cl;
+
+ closure_init_stack(&cl);
+
+ do {
+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+ closure_sync(&cl);
+ } while (ret);
+ }
+
+ if (unlikely(ret == -EIO))
+ goto out;
+
+ BUG_ON(ret && ret != -EINTR);
+
+ /* Now, redo traversals in correct order: */
+ i = 0;
+ while (i < trans->nr_sorted) {
+ path = trans->paths + trans->sorted[i];
+
+ EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
+
+ ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
+ if (ret)
+ goto retry_all;
+
+ EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
+
+ if (path->nodes_locked ||
+ !btree_path_node(path, path->level))
+ i++;
+ }
+
+ /*
+ * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock()
+ * and relock(), relock() won't relock since path->should_be_locked
+ * isn't set yet, which is all fine
+ */
+ trans_for_each_path(trans, path)
+ BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
+out:
+ bch2_btree_cache_cannibalize_unlock(c);
+
+ trans->in_traverse_all = false;
+
+ trace_trans_traverse_all(trans->ip, trace_ip);
+ return ret;
+}
+
+static int bch2_btree_path_traverse_all(struct btree_trans *trans)
+{
+ return __btree_path_traverse_all(trans, 0, _RET_IP_);
+}
+
+static inline bool btree_path_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned l, int check_pos)
+{
+ if (!is_btree_node(path, l) ||
+ !bch2_btree_node_relock(trans, path, l))
+ return false;
+
+ if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
+ return false;
+ if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
+ return false;
+ return true;
+}
+
+static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ int check_pos)
+{
+ unsigned i, l = path->level;
+
+ while (btree_path_node(path, l) &&
+ !btree_path_good_node(trans, path, l, check_pos)) {
+ btree_node_unlock(path, l);
+ path->l[l].b = BTREE_ITER_NO_NODE_UP;
+ l++;
+ }
+
+ /* If we need intent locks, take them too: */
+ for (i = l + 1;
+ i < path->locks_want && btree_path_node(path, i);
+ i++)
+ if (!bch2_btree_node_relock(trans, path, i))
+ while (l <= i) {
+ btree_node_unlock(path, l);
+ path->l[l].b = BTREE_ITER_NO_NODE_UP;
+ l++;
+ }
+
+ return l;
+}
+
+/*
+ * This is the main state machine for walking down the btree - walks down to a
+ * specified depth
+ *
+ * Returns 0 on success, -EIO on error (error reading in a btree node).
+ *
+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is
+ * stashed in the iterator and returned from bch2_trans_exit().
+ */
+static int btree_path_traverse_one(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned flags,
+ unsigned long trace_ip)
+{
+ unsigned depth_want = path->level;
+ int ret = 0;
+
+ if (unlikely(trans->restarted)) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ /*
+ * Ensure we obey path->should_be_locked: if it's set, we can't unlock
+ * and re-traverse the path without a transaction restart:
+ */
+ if (path->should_be_locked) {
+ ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR;
+ goto out;
+ }
+
+ if (path->cached) {
+ ret = bch2_btree_path_traverse_cached(trans, path, flags);
+ goto out;
+ }
+
+ if (unlikely(path->level >= BTREE_MAX_DEPTH))
+ goto out;
+
+ path->level = btree_path_up_until_good_node(trans, path, 0);
+
+ /*
+ * Note: path->nodes[path->level] may be temporarily NULL here - that
+ * would indicate to other code that we got to the end of the btree,
+ * here it indicates that relocking the root failed - it's critical that
+ * btree_path_lock_root() comes next and that it can't fail
+ */
+ while (path->level > depth_want) {
+ ret = btree_path_node(path, path->level)
+ ? btree_path_down(trans, path, flags, trace_ip)
+ : btree_path_lock_root(trans, path, depth_want, trace_ip);
+ if (unlikely(ret)) {
+ if (ret == 1) {
+ /*
+ * No nodes at this level - got to the end of
+ * the btree:
+ */
+ ret = 0;
+ goto out;
+ }
+
+ __bch2_btree_path_unlock(path);
+ path->level = depth_want;
+
+ if (ret == -EIO)
+ path->l[path->level].b =
+ BTREE_ITER_NO_NODE_ERROR;
+ else
+ path->l[path->level].b =
+ BTREE_ITER_NO_NODE_DOWN;
+ goto out;
+ }
+ }
+
+ path->uptodate = BTREE_ITER_UPTODATE;
+out:
+ BUG_ON((ret == -EINTR) != !!trans->restarted);
+ bch2_btree_path_verify(trans, path);
+ return ret;
+}
+
+static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long);
+
+int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
+ struct btree_path *path, unsigned flags)
+{
+ if (path->uptodate < BTREE_ITER_NEED_RELOCK)
+ return 0;
+
+ return bch2_trans_cond_resched(trans) ?:
+ btree_path_traverse_one(trans, path, flags, _RET_IP_);
+}
+
+static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
+ struct btree_path *src)
+{
+ unsigned i;
+
+ memcpy(&dst->pos, &src->pos,
+ sizeof(struct btree_path) - offsetof(struct btree_path, pos));
+
+ for (i = 0; i < BTREE_MAX_DEPTH; i++)
+ if (btree_node_locked(dst, i))
+ six_lock_increment(&dst->l[i].b->c.lock,
+ __btree_lock_want(dst, i));
+
+ btree_path_check_sort(trans, dst, 0);
+}
+
+static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
+ bool intent)
+{
+ struct btree_path *new = btree_path_alloc(trans, src);
+
+ btree_path_copy(trans, new, src);
+ __btree_path_get(new, intent);
+ return new;
+}
+
+inline struct btree_path * __must_check
+bch2_btree_path_make_mut(struct btree_trans *trans,
+ struct btree_path *path, bool intent,
+ unsigned long ip)
+{
+ if (path->ref > 1 || path->preserve) {
+ __btree_path_put(path, intent);
+ path = btree_path_clone(trans, path, intent);
+ path->preserve = false;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ path->ip_allocated = ip;
+#endif
+ btree_trans_verify_sorted(trans);
+ }
+
+ return path;
+}
+
+static struct btree_path * __must_check
+btree_path_set_pos(struct btree_trans *trans,
+ struct btree_path *path, struct bpos new_pos,
+ bool intent, unsigned long ip)
+{
+ int cmp = bpos_cmp(new_pos, path->pos);
+ unsigned l = path->level;
+
+ EBUG_ON(trans->restarted);
+ EBUG_ON(!path->ref);
+
+ if (!cmp)
+ return path;
+
+ path = bch2_btree_path_make_mut(trans, path, intent, ip);
+
+ path->pos = new_pos;
+ path->should_be_locked = false;
+
+ btree_path_check_sort(trans, path, cmp);
+
+ if (unlikely(path->cached)) {
+ btree_node_unlock(path, 0);
+ path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ goto out;
+ }
+
+ l = btree_path_up_until_good_node(trans, path, cmp);
+
+ if (btree_path_node(path, l)) {
+ /*
+ * We might have to skip over many keys, or just a few: try
+ * advancing the node iterator, and if we have to skip over too
+ * many keys just reinit it (or if we're rewinding, since that
+ * is expensive).
+ */
+ if (cmp < 0 ||
+ !btree_path_advance_to_pos(path, &path->l[l], 8))
+ __btree_path_level_init(path, l);
+ }
+
+ if (l != path->level) {
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ __bch2_btree_path_unlock(path);
+ }
+out:
+ bch2_btree_path_verify(trans, path);
+ return path;
+}
+
+/* Btree path: main interface: */
+
+static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+ struct btree_path *next;
+
+ next = prev_btree_path(trans, path);
+ if (next && !btree_path_cmp(next, path))
+ return next;
+
+ next = next_btree_path(trans, path);
+ if (next && !btree_path_cmp(next, path))
+ return next;
+
+ return NULL;
+}
+
+static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+ struct btree_path *next;
+
+ next = prev_btree_path(trans, path);
+ if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
+ return next;
+
+ next = next_btree_path(trans, path);
+ if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
+ return next;
+
+ return NULL;
+}
+
+static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
+{
+ __bch2_btree_path_unlock(path);
+ btree_path_list_remove(trans, path);
+ trans->paths_allocated &= ~(1ULL << path->idx);
+}
+
+void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
+{
+ struct btree_path *dup;
+
+ EBUG_ON(trans->paths + path->idx != path);
+ EBUG_ON(!path->ref);
+
+ if (!__btree_path_put(path, intent))
+ return;
+
+ /*
+ * Perhaps instead we should check for duplicate paths in traverse_all:
+ */
+ if (path->preserve &&
+ (dup = have_path_at_pos(trans, path))) {
+ dup->preserve = true;
+ path->preserve = false;
+ goto free;
+ }
+
+ if (!path->preserve &&
+ (dup = have_node_at_pos(trans, path)))
+ goto free;
+ return;
+free:
+ if (path->should_be_locked &&
+ !btree_node_locked(dup, path->level))
+ return;
+
+ dup->should_be_locked |= path->should_be_locked;
+ __bch2_path_free(trans, path);
+}
+
+noinline __cold
+void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ struct btree_insert_entry *i;
+ unsigned idx;
+ char buf1[300], buf2[300];
+
+ btree_trans_verify_sorted(trans);
+
+ trans_for_each_path_inorder(trans, path, idx)
+ printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree %s pos %s locks %u %pS\n",
+ path->idx, path->ref, path->intent_ref,
+ path->should_be_locked ? " S" : "",
+ path->preserve ? " P" : "",
+ bch2_btree_ids[path->btree_id],
+ (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1),
+ path->nodes_locked,
+#ifdef CONFIG_BCACHEFS_DEBUG
+ (void *) path->ip_allocated
+#else
+ NULL
+#endif
+ );
+
+ trans_for_each_update(trans, i) {
+ struct bkey u;
+ struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u);
+
+ printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s",
+ bch2_btree_ids[i->btree_id],
+ (void *) i->ip_allocated,
+ (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1),
+ (bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2));
+ }
+}
+
+static struct btree_path *btree_path_alloc(struct btree_trans *trans,
+ struct btree_path *pos)
+{
+ struct btree_path *path;
+ unsigned idx;
+
+ if (unlikely(trans->paths_allocated ==
+ ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
+ bch2_dump_trans_paths_updates(trans);
+ panic("trans path oveflow\n");
+ }
+
+ idx = __ffs64(~trans->paths_allocated);
+ trans->paths_allocated |= 1ULL << idx;
+
+ path = &trans->paths[idx];
+
+ path->idx = idx;
+ path->ref = 0;
+ path->intent_ref = 0;
+ path->nodes_locked = 0;
+ path->nodes_intent_locked = 0;
+
+ btree_path_list_add(trans, pos, path);
+ return path;
+}
+
+struct btree_path *bch2_path_get(struct btree_trans *trans, bool cached,
+ enum btree_id btree_id, struct bpos pos,
+ unsigned locks_want, unsigned level,
+ bool intent, unsigned long ip)
+{
+ struct btree_path *path, *path_pos = NULL;
+ int i;
+
+ BUG_ON(trans->restarted);
+
+ trans_for_each_path_inorder(trans, path, i) {
+ if (__btree_path_cmp(path,
+ btree_id,
+ cached,
+ pos,
+ level) > 0)
+ break;
+
+ path_pos = path;
+ }
+
+ if (path_pos &&
+ path_pos->cached == cached &&
+ path_pos->btree_id == btree_id &&
+ path_pos->level == level) {
+ __btree_path_get(path_pos, intent);
+ path = btree_path_set_pos(trans, path_pos, pos, intent, ip);
+ path->preserve = true;
+ } else {
+ path = btree_path_alloc(trans, path_pos);
+ path_pos = NULL;
+
+ __btree_path_get(path, intent);
+ path->pos = pos;
+ path->btree_id = btree_id;
+ path->cached = cached;
+ path->preserve = true;
+ path->uptodate = BTREE_ITER_NEED_TRAVERSE;
+ path->should_be_locked = false;
+ path->level = level;
+ path->locks_want = locks_want;
+ path->nodes_locked = 0;
+ path->nodes_intent_locked = 0;
+ for (i = 0; i < ARRAY_SIZE(path->l); i++)
+ path->l[i].b = BTREE_ITER_NO_NODE_INIT;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ path->ip_allocated = ip;
+#endif
+ btree_trans_verify_sorted(trans);
+ }
+
+ if (path->intent_ref)
+ locks_want = max(locks_want, level + 1);
+
+ /*
+ * If the path has locks_want greater than requested, we don't downgrade
+ * it here - on transaction restart because btree node split needs to
+ * upgrade locks, we might be putting/getting the iterator again.
+ * Downgrading iterators only happens via bch2_trans_downgrade(), after
+ * a successful transaction commit.
+ */
+
+ locks_want = min(locks_want, BTREE_MAX_DEPTH);
+ if (locks_want > path->locks_want) {
+ path->locks_want = locks_want;
+ btree_path_get_locks(trans, path, true, _THIS_IP_);
+ }
+
+ return path;
+}
+
+inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
+{
+
+ struct bkey_s_c k;
+
+ BUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
+
+ if (!path->cached) {
+ struct btree_path_level *l = path_l(path);
+ struct bkey_packed *_k =
+ bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+ k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
+
+ EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
+
+ if (!k.k || bpos_cmp(path->pos, k.k->p))
+ goto hole;
+ } else {
+ struct bkey_cached *ck = (void *) path->l[0].b;
+
+ EBUG_ON(path->btree_id != ck->key.btree_id ||
+ bkey_cmp(path->pos, ck->key.pos));
+
+ /* BTREE_ITER_CACHED_NOFILL? */
+ if (unlikely(!ck->valid))
+ goto hole;
+
+ k = bkey_i_to_s_c(ck->k);
+ }
+
+ return k;
+hole:
+ bkey_init(u);
+ u->p = path->pos;
+ return (struct bkey_s_c) { u, NULL };
+}
+
+/* Btree iterators: */
+
+int __must_check
+__bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+ return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
+}
+
+int __must_check
+bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+ int ret;
+
+ iter->path = btree_path_set_pos(iter->trans, iter->path,
+ btree_iter_search_key(iter),
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
+ if (ret)
+ return ret;
+
+ iter->path->should_be_locked = true;
+ return 0;
+}
+
+/* Iterate across nodes (leaf and interior nodes) */
+
+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct btree *b = NULL;
+ int ret;
+
+ EBUG_ON(iter->path->cached);
+ bch2_btree_iter_verify(iter);
+
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (ret)
+ goto err;
+
+ b = btree_path_node(iter->path, iter->path->level);
+ if (!b)
+ goto out;
+
+ BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0);
+
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos = b->key.k.p;
+
+ iter->path = btree_path_set_pos(trans, iter->path, b->key.k.p,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+ iter->path->should_be_locked = true;
+ BUG_ON(iter->path->uptodate);
+out:
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
+
+ return b;
+err:
+ b = ERR_PTR(ret);
+ goto out;
+}
+
+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct btree_path *path = iter->path;
+ struct btree *b = NULL;
+ unsigned l;
+ int ret;
+
+ BUG_ON(trans->restarted);
+ EBUG_ON(iter->path->cached);
+ bch2_btree_iter_verify(iter);
+
+ /* already at end? */
+ if (!btree_path_node(path, path->level))
+ return NULL;
+
+ /* got to end? */
+ if (!btree_path_node(path, path->level + 1)) {
+ btree_node_unlock(path, path->level);
+ path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
+ path->level++;
+ return NULL;
+ }
+
+ if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
+ __bch2_btree_path_unlock(path);
+ path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+ path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+ btree_trans_restart(trans);
+ ret = -EINTR;
+ goto err;
+ }
+
+ b = btree_path_node(path, path->level + 1);
+
+ if (!bpos_cmp(iter->pos, b->key.k.p)) {
+ btree_node_unlock(path, path->level);
+ path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
+ path->level++;
+ } else {
+ /*
+ * Haven't gotten to the end of the parent node: go back down to
+ * the next child node
+ */
+ path = iter->path =
+ btree_path_set_pos(trans, path, bpos_successor(iter->pos),
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ path->level = iter->min_depth;
+
+ for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
+ if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
+ btree_node_unlock(path, l);
+
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ bch2_btree_iter_verify(iter);
+
+ ret = bch2_btree_path_traverse(trans, path, iter->flags);
+ if (ret)
+ goto err;
+
+ b = path->l[path->level].b;
+ }
+
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos = b->key.k.p;
+
+ iter->path = btree_path_set_pos(trans, iter->path, b->key.k.p,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+ iter->path->should_be_locked = true;
+ BUG_ON(iter->path->uptodate);
+out:
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
+
+ return b;
+err:
+ b = ERR_PTR(ret);
+ goto out;
+}
+
+/* Iterate across keys (in leaf nodes only) */
+
+inline bool bch2_btree_iter_advance(struct btree_iter *iter)
+{
+ struct bpos pos = iter->k.p;
+ bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
+ ? bpos_cmp(pos, SPOS_MAX)
+ : bkey_cmp(pos, SPOS_MAX)) != 0;
+
+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+ pos = bkey_successor(iter, pos);
+ bch2_btree_iter_set_pos(iter, pos);
+ return ret;
+}
+
+inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
+{
+ struct bpos pos = bkey_start_pos(&iter->k);
+ bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
+ ? bpos_cmp(pos, POS_MIN)
+ : bkey_cmp(pos, POS_MIN)) != 0;
+
+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+ pos = bkey_predecessor(iter, pos);
+ bch2_btree_iter_set_pos(iter, pos);
+ return ret;
+}
+
+/**
+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
+ * current position
+ */
+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct bpos search_key = btree_iter_search_key(iter);
+ struct bkey_i *next_update;
+ struct bkey_s_c k;
+ int ret, cmp;
+
+ EBUG_ON(iter->path->cached || iter->path->level);
+ bch2_btree_iter_verify(iter);
+ bch2_btree_iter_verify_entry_exit(iter);
+
+ while (1) {
+ iter->path = btree_path_set_pos(trans, iter->path, search_key,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (unlikely(ret)) {
+ /* ensure that iter->k is consistent with iter->pos: */
+ bch2_btree_iter_set_pos(iter, iter->pos);
+ k = bkey_s_c_err(ret);
+ goto out;
+ }
+
+ next_update = iter->flags & BTREE_ITER_WITH_UPDATES
+ ? btree_trans_peek_updates(trans, iter->btree_id, search_key)
+ : NULL;
+ k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
+
+ /* * In the btree, deleted keys sort before non deleted: */
+ if (k.k && bkey_deleted(k.k) &&
+ (!next_update ||
+ bpos_cmp(k.k->p, next_update->k.p) <= 0)) {
+ search_key = k.k->p;
+ continue;
+ }
+
+ if (next_update &&
+ bpos_cmp(next_update->k.p,
+ k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
+ iter->k = next_update->k;
+ k = bkey_i_to_s_c(next_update);
+ }
+
+ if (likely(k.k)) {
+ /*
+ * We can never have a key in a leaf node at POS_MAX, so
+ * we don't have to check these successor() calls:
+ */
+ if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
+ !bch2_snapshot_is_ancestor(trans->c,
+ iter->snapshot,
+ k.k->p.snapshot)) {
+ search_key = bpos_successor(k.k->p);
+ continue;
+ }
+
+ if (bkey_whiteout(k.k) &&
+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
+ search_key = bkey_successor(iter, k.k->p);
+ continue;
+ }
+
+ break;
+ } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
+ /* Advance to next leaf node: */
+ search_key = bpos_successor(iter->path->l[0].b->key.k.p);
+ } else {
+ /* End of btree: */
+ bch2_btree_iter_set_pos(iter, SPOS_MAX);
+ k = bkey_s_c_null;
+ goto out;