+ struct btree_path *new = btree_path_alloc(trans, src);
+
+ btree_path_copy(trans, new, src);
+ __btree_path_get(new, intent);
+ return new;
+}
+
+inline struct btree_path * __must_check
+bch2_btree_path_make_mut(struct btree_trans *trans,
+ struct btree_path *path, bool intent,
+ unsigned long ip)
+{
+ if (path->ref > 1 || path->preserve) {
+ __btree_path_put(path, intent);
+ path = btree_path_clone(trans, path, intent);
+ path->preserve = false;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ path->ip_allocated = ip;
+#endif
+ btree_trans_verify_sorted(trans);
+ }
+
+ path->should_be_locked = false;
+ return path;
+}
+
+struct btree_path * __must_check
+bch2_btree_path_set_pos(struct btree_trans *trans,
+ struct btree_path *path, struct bpos new_pos,
+ bool intent, unsigned long ip)
+{
+ int cmp = bpos_cmp(new_pos, path->pos);
+ unsigned l = path->level;
+
+ EBUG_ON(trans->restarted);
+ EBUG_ON(!path->ref);
+
+ if (!cmp)
+ return path;
+
+ path = bch2_btree_path_make_mut(trans, path, intent, ip);
+
+ path->pos = new_pos;
+
+ bch2_btree_path_check_sort(trans, path, cmp);
+
+ if (unlikely(path->cached)) {
+ btree_node_unlock(path, 0);
+ path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ goto out;
+ }
+
+ l = btree_path_up_until_good_node(trans, path, cmp);
+
+ if (btree_path_node(path, l)) {
+ BUG_ON(!btree_node_locked(path, l));
+ /*
+ * We might have to skip over many keys, or just a few: try
+ * advancing the node iterator, and if we have to skip over too
+ * many keys just reinit it (or if we're rewinding, since that
+ * is expensive).
+ */
+ if (cmp < 0 ||
+ !btree_path_advance_to_pos(path, &path->l[l], 8))
+ __btree_path_level_init(path, l);
+ }
+
+ if (l != path->level) {
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ __bch2_btree_path_unlock(path);
+ }
+out:
+ bch2_btree_path_verify(trans, path);
+ return path;
+}
+
+/* Btree path: main interface: */
+
+static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+ struct btree_path *next;
+
+ next = prev_btree_path(trans, path);
+ if (next && !btree_path_cmp(next, path))
+ return next;
+
+ next = next_btree_path(trans, path);
+ if (next && !btree_path_cmp(next, path))
+ return next;
+
+ return NULL;
+}
+
+static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+ struct btree_path *next;
+
+ next = prev_btree_path(trans, path);
+ if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
+ return next;
+
+ next = next_btree_path(trans, path);
+ if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
+ return next;
+
+ return NULL;
+}
+
+static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
+{
+ __bch2_btree_path_unlock(path);
+ btree_path_list_remove(trans, path);
+ trans->paths_allocated &= ~(1ULL << path->idx);
+}
+
+void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
+{
+ struct btree_path *dup;
+
+ EBUG_ON(trans->paths + path->idx != path);
+ EBUG_ON(!path->ref);
+
+ if (!__btree_path_put(path, intent))
+ return;
+
+ /*
+ * Perhaps instead we should check for duplicate paths in traverse_all:
+ */
+ if (path->preserve &&
+ (dup = have_path_at_pos(trans, path))) {
+ dup->preserve = true;
+ path->preserve = false;
+ goto free;
+ }
+
+ if (!path->preserve &&
+ (dup = have_node_at_pos(trans, path)))
+ goto free;
+ return;
+free:
+ if (path->should_be_locked &&
+ !btree_node_locked(dup, path->level))
+ return;
+
+ dup->should_be_locked |= path->should_be_locked;
+ __bch2_path_free(trans, path);
+}
+
+void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ pr_buf(buf, "transaction updates for %s journal seq %llu",
+ trans->fn, trans->journal_res.seq);
+ pr_newline(buf);
+ pr_indent_push(buf, 2);
+
+ trans_for_each_update(trans, i) {
+ struct bkey_s_c old = { &i->old_k, i->old_v };
+
+ pr_buf(buf, "update: btree %s %pS",
+ bch2_btree_ids[i->btree_id],
+ (void *) i->ip_allocated);
+ pr_newline(buf);
+
+ pr_buf(buf, " old ");
+ bch2_bkey_val_to_text(buf, trans->c, old);
+ pr_newline(buf);
+
+ pr_buf(buf, " new ");
+ bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
+ pr_newline(buf);
+ }
+
+ pr_indent_pop(buf, 2);
+}
+
+noinline __cold
+void bch2_dump_trans_updates(struct btree_trans *trans)
+{
+ struct printbuf buf = PRINTBUF;
+
+ bch2_trans_updates_to_text(&buf, trans);
+ bch_err(trans->c, "%s", buf.buf);
+ printbuf_exit(&buf);
+}
+
+noinline __cold
+void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ struct printbuf buf = PRINTBUF;
+ unsigned idx;
+
+ trans_for_each_path_inorder(trans, path, idx) {
+ printbuf_reset(&buf);
+
+ bch2_bpos_to_text(&buf, path->pos);
+
+ printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
+ path->idx, path->ref, path->intent_ref,
+ path->should_be_locked ? " S" : "",
+ path->preserve ? " P" : "",
+ bch2_btree_ids[path->btree_id],
+ path->level,
+ buf.buf,
+ path->nodes_locked,
+#ifdef CONFIG_BCACHEFS_DEBUG
+ (void *) path->ip_allocated
+#else
+ NULL
+#endif
+ );
+ }
+
+ printbuf_exit(&buf);
+
+ bch2_dump_trans_updates(trans);
+}
+
+static struct btree_path *btree_path_alloc(struct btree_trans *trans,
+ struct btree_path *pos)
+{
+ struct btree_path *path;
+ unsigned idx;
+
+ if (unlikely(trans->paths_allocated ==
+ ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
+ bch2_dump_trans_paths_updates(trans);
+ panic("trans path oveflow\n");
+ }
+
+ idx = __ffs64(~trans->paths_allocated);
+ trans->paths_allocated |= 1ULL << idx;
+
+ path = &trans->paths[idx];
+
+ path->idx = idx;
+ path->ref = 0;
+ path->intent_ref = 0;
+ path->nodes_locked = 0;
+ path->nodes_intent_locked = 0;
+
+ btree_path_list_add(trans, pos, path);
+ return path;
+}
+
+struct btree_path *bch2_path_get(struct btree_trans *trans,
+ enum btree_id btree_id, struct bpos pos,
+ unsigned locks_want, unsigned level,
+ unsigned flags, unsigned long ip)
+{
+ struct btree_path *path, *path_pos = NULL;
+ bool cached = flags & BTREE_ITER_CACHED;
+ bool intent = flags & BTREE_ITER_INTENT;
+ int i;
+
+ BUG_ON(trans->restarted);
+ btree_trans_verify_sorted(trans);
+ bch2_trans_verify_locks(trans);
+
+ trans_for_each_path_inorder(trans, path, i) {
+ if (__btree_path_cmp(path,
+ btree_id,
+ cached,
+ pos,
+ level) > 0)
+ break;
+
+ path_pos = path;
+ }
+
+ if (path_pos &&
+ path_pos->cached == cached &&
+ path_pos->btree_id == btree_id &&
+ path_pos->level == level) {
+ __btree_path_get(path_pos, intent);
+ path = bch2_btree_path_set_pos(trans, path_pos, pos, intent, ip);
+ } else {
+ path = btree_path_alloc(trans, path_pos);
+ path_pos = NULL;
+
+ __btree_path_get(path, intent);
+ path->pos = pos;
+ path->btree_id = btree_id;
+ path->cached = cached;
+ path->uptodate = BTREE_ITER_NEED_TRAVERSE;
+ path->should_be_locked = false;
+ path->level = level;
+ path->locks_want = locks_want;
+ path->nodes_locked = 0;
+ path->nodes_intent_locked = 0;
+ for (i = 0; i < ARRAY_SIZE(path->l); i++)
+ path->l[i].b = BTREE_ITER_NO_NODE_INIT;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ path->ip_allocated = ip;
+#endif
+ btree_trans_verify_sorted(trans);
+ }
+
+ if (!(flags & BTREE_ITER_NOPRESERVE))
+ path->preserve = true;
+
+ if (path->intent_ref)
+ locks_want = max(locks_want, level + 1);
+
+ /*
+ * If the path has locks_want greater than requested, we don't downgrade
+ * it here - on transaction restart because btree node split needs to
+ * upgrade locks, we might be putting/getting the iterator again.
+ * Downgrading iterators only happens via bch2_trans_downgrade(), after
+ * a successful transaction commit.
+ */
+
+ locks_want = min(locks_want, BTREE_MAX_DEPTH);
+ if (locks_want > path->locks_want) {
+ path->locks_want = locks_want;
+ btree_path_get_locks(trans, path, true);
+ }
+
+ return path;
+}
+
+inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
+{
+
+ struct bkey_s_c k;
+
+ if (!path->cached) {
+ struct btree_path_level *l = path_l(path);
+ struct bkey_packed *_k;
+
+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
+
+ _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+ k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
+
+ EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
+
+ if (!k.k || bpos_cmp(path->pos, k.k->p))
+ goto hole;
+ } else {
+ struct bkey_cached *ck = (void *) path->l[0].b;
+
+ EBUG_ON(ck &&
+ (path->btree_id != ck->key.btree_id ||
+ bkey_cmp(path->pos, ck->key.pos)));
+
+ /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
+ if (unlikely(!ck || !ck->valid))
+ return bkey_s_c_null;
+
+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
+
+ *u = ck->k->k;
+ k = bkey_i_to_s_c(ck->k);
+ }
+
+ return k;
+hole:
+ bkey_init(u);
+ u->p = path->pos;
+ return (struct bkey_s_c) { u, NULL };
+}
+
+/* Btree iterators: */
+
+int __must_check
+__bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+ return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);