- return iter->btree_id == b->c.btree_id &&
- !btree_iter_pos_before_node(iter, b) &&
- !btree_iter_pos_after_node(iter, b);
-}
-
-/* Btree node locking: */
-
-void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
-{
- bch2_btree_node_unlock_write_inlined(b, iter);
-}
-
-void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
-{
- struct btree_iter *linked;
- unsigned readers = 0;
-
- EBUG_ON(!btree_node_intent_locked(iter, b->c.level));
-
- trans_for_each_iter(iter->trans, linked)
- if (linked->l[b->c.level].b == b &&
- btree_node_read_locked(linked, b->c.level))
- readers++;
-
- /*
- * Must drop our read locks before calling six_lock_write() -
- * six_unlock() won't do wakeups until the reader count
- * goes to 0, and it's safe because we have the node intent
- * locked:
- */
- atomic64_sub(__SIX_VAL(read_lock, readers),
- &b->c.lock.state.counter);
- btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write);
- atomic64_add(__SIX_VAL(read_lock, readers),
- &b->c.lock.state.counter);
-}
-
-bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
-{
- struct btree *b = btree_iter_node(iter, level);
- int want = __btree_lock_want(iter, level);
-
- if (!is_btree_node(iter, level))
- return false;
-
- if (race_fault())
- return false;
-
- if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) ||
- (btree_node_lock_seq_matches(iter, b, level) &&
- btree_node_lock_increment(iter->trans, b, level, want))) {
- mark_btree_node_locked(iter, level, want);
- return true;
- } else {
- return false;
- }
-}
-
-static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
-{
- struct btree *b = iter->l[level].b;
-
- EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED);
-
- if (!is_btree_node(iter, level))
- return false;
-
- if (btree_node_intent_locked(iter, level))
- return true;
-
- if (race_fault())
- return false;
-
- if (btree_node_locked(iter, level)
- ? six_lock_tryupgrade(&b->c.lock)
- : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq))
- goto success;
-
- if (btree_node_lock_seq_matches(iter, b, level) &&
- btree_node_lock_increment(iter->trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
- btree_node_unlock(iter, level);
- goto success;
- }
-
- return false;
-success:
- mark_btree_node_intent_locked(iter, level);
- return true;
-}
-
-static inline bool btree_iter_get_locks(struct btree_iter *iter,
- bool upgrade, bool trace)
-{
- unsigned l = iter->level;
- int fail_idx = -1;
-
- do {
- if (!btree_iter_node(iter, l))
- break;
-
- if (!(upgrade
- ? bch2_btree_node_upgrade(iter, l)
- : bch2_btree_node_relock(iter, l))) {
- if (trace)
- (upgrade
- ? trace_node_upgrade_fail
- : trace_node_relock_fail)(l, iter->l[l].lock_seq,
- is_btree_node(iter, l)
- ? 0
- : (unsigned long) iter->l[l].b,
- is_btree_node(iter, l)
- ? iter->l[l].b->c.lock.state.seq
- : 0);
-
- fail_idx = l;
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
- }
-
- l++;
- } while (l < iter->locks_want);
-
- /*
- * When we fail to get a lock, we have to ensure that any child nodes
- * can't be relocked so bch2_btree_iter_traverse has to walk back up to
- * the node that we failed to relock:
- */
- while (fail_idx >= 0) {
- btree_node_unlock(iter, fail_idx);
- iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
- --fail_idx;
- }
-
- if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
- iter->uptodate = BTREE_ITER_NEED_PEEK;
-
- bch2_btree_trans_verify_locks(iter->trans);
-
- return iter->uptodate < BTREE_ITER_NEED_RELOCK;
-}
-
-static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
- enum btree_iter_type type)
-{
- return type != BTREE_ITER_CACHED
- ? container_of(_b, struct btree, c)->key.k.p
- : container_of(_b, struct bkey_cached, c)->key.pos;
-}
-
-/* Slowpath: */
-bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
- unsigned level, struct btree_iter *iter,
- enum six_lock_type type,
- six_lock_should_sleep_fn should_sleep_fn, void *p,
- unsigned long ip)
-{
- struct btree_trans *trans = iter->trans;
- struct btree_iter *linked, *deadlock_iter = NULL;
- u64 start_time = local_clock();
- unsigned reason = 9;
- bool ret;
-
- /* Check if it's safe to block: */
- trans_for_each_iter(trans, linked) {
- if (!linked->nodes_locked)
- continue;
-
- /*
- * Can't block taking an intent lock if we have _any_ nodes read
- * locked:
- *
- * - Our read lock blocks another thread with an intent lock on
- * the same node from getting a write lock, and thus from
- * dropping its intent lock
- *
- * - And the other thread may have multiple nodes intent locked:
- * both the node we want to intent lock, and the node we
- * already have read locked - deadlock:
- */
- if (type == SIX_LOCK_intent &&
- linked->nodes_locked != linked->nodes_intent_locked) {
- deadlock_iter = linked;
- reason = 1;
- }
-
- if (linked->btree_id != iter->btree_id) {
- if (linked->btree_id > iter->btree_id) {
- deadlock_iter = linked;
- reason = 3;
- }
- continue;
- }
-
- /*
- * Within the same btree, cached iterators come before non
- * cached iterators:
- */
- if (btree_iter_is_cached(linked) != btree_iter_is_cached(iter)) {
- if (btree_iter_is_cached(iter)) {
- deadlock_iter = linked;
- reason = 4;
- }
- continue;
- }
-
- /*
- * Interior nodes must be locked before their descendants: if
- * another iterator has possible descendants locked of the node
- * we're about to lock, it must have the ancestors locked too:
- */
- if (level > __fls(linked->nodes_locked)) {
- deadlock_iter = linked;
- reason = 5;
- }
-
- /* Must lock btree nodes in key order: */
- if (btree_node_locked(linked, level) &&
- bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
- btree_iter_type(linked))) <= 0) {
- deadlock_iter = linked;
- reason = 7;
- BUG_ON(trans->in_traverse_all);
- }
- }
-
- if (unlikely(deadlock_iter)) {
- trace_trans_restart_would_deadlock(iter->trans->ip, ip,
- trans->in_traverse_all, reason,
- deadlock_iter->btree_id,
- btree_iter_type(deadlock_iter),
- &deadlock_iter->real_pos,
- iter->btree_id,
- btree_iter_type(iter),
- &pos);
- return false;
- }
-
- if (six_trylock_type(&b->c.lock, type))
- return true;
-
-#ifdef CONFIG_BCACHEFS_DEBUG
- trans->locking_iter_idx = iter->idx;
- trans->locking_pos = pos;
- trans->locking_btree_id = iter->btree_id;
- trans->locking_level = level;
- trans->locking = b;
-#endif
-
- ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
-
-#ifdef CONFIG_BCACHEFS_DEBUG
- trans->locking = NULL;
-#endif
- if (ret)
- bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
- start_time);
- return ret;
-}
-
-/* Btree iterator locking: */
-
-#ifdef CONFIG_BCACHEFS_DEBUG
-static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
-{
- unsigned l;
-
- if (!(iter->trans->iters_linked & (1ULL << iter->idx))) {
- BUG_ON(iter->nodes_locked);
- return;
- }
-
- for (l = 0; is_btree_node(iter, l); l++) {
- if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
- !btree_node_locked(iter, l))
- continue;
-
- BUG_ON(btree_lock_want(iter, l) !=
- btree_node_locked_type(iter, l));
- }
-}
-
-void bch2_btree_trans_verify_locks(struct btree_trans *trans)
-{
- struct btree_iter *iter;
-
- trans_for_each_iter(trans, iter)
- bch2_btree_iter_verify_locks(iter);
-}
-#else
-static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
-#endif
-
-__flatten
-bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
-{
- return btree_iter_get_locks(iter, false, trace);
-}
-
-bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
- unsigned new_locks_want)
-{
- struct btree_iter *linked;
-
- EBUG_ON(iter->locks_want >= new_locks_want);
-
- iter->locks_want = new_locks_want;
-
- if (btree_iter_get_locks(iter, true, true))
- return true;
-
- /*
- * XXX: this is ugly - we'd prefer to not be mucking with other
- * iterators in the btree_trans here.
- *
- * On failure to upgrade the iterator, setting iter->locks_want and
- * calling get_locks() is sufficient to make bch2_btree_iter_traverse()
- * get the locks we want on transaction restart.
- *
- * But if this iterator was a clone, on transaction restart what we did
- * to this iterator isn't going to be preserved.
- *
- * Possibly we could add an iterator field for the parent iterator when
- * an iterator is a copy - for now, we'll just upgrade any other
- * iterators with the same btree id.
- *
- * The code below used to be needed to ensure ancestor nodes get locked
- * before interior nodes - now that's handled by
- * bch2_btree_iter_traverse_all().
- */
- trans_for_each_iter(iter->trans, linked)
- if (linked != iter &&
- btree_iter_type(linked) == btree_iter_type(iter) &&
- linked->btree_id == iter->btree_id &&
- linked->locks_want < new_locks_want) {
- linked->locks_want = new_locks_want;
- btree_iter_get_locks(linked, true, false);
- }
-
- return false;
-}
-
-void __bch2_btree_iter_downgrade(struct btree_iter *iter,
- unsigned new_locks_want)
-{
- unsigned l;
-
- EBUG_ON(iter->locks_want < new_locks_want);
-
- iter->locks_want = new_locks_want;
-
- while (iter->nodes_locked &&
- (l = __fls(iter->nodes_locked)) >= iter->locks_want) {
- if (l > iter->level) {
- btree_node_unlock(iter, l);
- } else {
- if (btree_node_intent_locked(iter, l)) {
- six_lock_downgrade(&iter->l[l].b->c.lock);
- iter->nodes_intent_locked ^= 1 << l;
- }
- break;
- }
- }
-
- bch2_btree_trans_verify_locks(iter->trans);
-}
-
-void bch2_trans_downgrade(struct btree_trans *trans)
-{
- struct btree_iter *iter;
-
- trans_for_each_iter(trans, iter)
- bch2_btree_iter_downgrade(iter);
-}
-
-/* Btree transaction locking: */
-
-bool bch2_trans_relock(struct btree_trans *trans)
-{
- struct btree_iter *iter;
-
- trans_for_each_iter(trans, iter)
- if (!bch2_btree_iter_relock(iter, true)) {
- trace_trans_restart_relock(trans->ip);
- return false;
- }
- return true;
-}
-
-void bch2_trans_unlock(struct btree_trans *trans)
-{
- struct btree_iter *iter;
-
- trans_for_each_iter(trans, iter)
- __bch2_btree_iter_unlock(iter);