(upgrade
? trace_node_upgrade_fail
: trace_node_relock_fail)(iter->trans->ip, trace_ip,
+ btree_iter_type(iter) == BTREE_ITER_CACHED,
iter->btree_id, &iter->real_pos,
l, iter->l[l].lock_seq,
is_btree_node(iter, l)
is_btree_node(iter, l)
? iter->l[l].b->c.lock.state.seq
: 0);
-
fail_idx = l;
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
}
}
if (unlikely(deadlock_iter)) {
- trace_trans_restart_would_deadlock(iter->trans->ip, ip,
+ trace_trans_restart_would_deadlock(trans->ip, ip,
trans->in_traverse_all, reason,
deadlock_iter->btree_id,
btree_iter_type(deadlock_iter),
iter->btree_id,
btree_iter_type(iter),
&pos);
+ btree_trans_restart(trans);
return false;
}
static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
#endif
+/*
+ * Only for btree_cache.c - only relocks intent locks
+ */
+bool bch2_btree_iter_relock_intent(struct btree_iter *iter)
+{
+ unsigned l;
+
+ for (l = iter->level;
+ l < iter->locks_want && btree_iter_node(iter, l);
+ l++) {
+ if (!bch2_btree_node_relock(iter, l)) {
+ trace_node_relock_fail(iter->trans->ip, _RET_IP_,
+ btree_iter_type(iter) == BTREE_ITER_CACHED,
+ iter->btree_id, &iter->real_pos,
+ l, iter->l[l].lock_seq,
+ is_btree_node(iter, l)
+ ? 0
+ : (unsigned long) iter->l[l].b,
+ is_btree_node(iter, l)
+ ? iter->l[l].b->c.lock.state.seq
+ : 0);
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+ btree_trans_restart(iter->trans);
+ return false;
+ }
+ }
+
+ return true;
+}
+
__flatten
bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip)
{
- return btree_iter_get_locks(iter, false, trace_ip);
+ bool ret = btree_iter_get_locks(iter, false, trace_ip);
+
+ if (!ret)
+ btree_trans_restart(iter->trans);
+ return ret;
}
bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
btree_iter_get_locks(linked, true, _THIS_IP_);
}
+ if (iter->should_be_locked)
+ btree_trans_restart(iter->trans);
return false;
}
/* Btree transaction locking: */
-static inline bool btree_iter_should_be_locked(struct btree_trans *trans,
- struct btree_iter *iter)
+static inline bool btree_iter_should_be_locked(struct btree_iter *iter)
{
return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
iter->should_be_locked;
{
struct btree_iter *iter;
+ if (unlikely(trans->restarted))
+ return false;
+
trans_for_each_iter(trans, iter)
- if (!bch2_btree_iter_relock(iter, _RET_IP_) &&
- btree_iter_should_be_locked(trans, iter)) {
+ if (btree_iter_should_be_locked(iter) &&
+ !bch2_btree_iter_relock(iter, _RET_IP_)) {
trace_trans_restart_relock(trans->ip, _RET_IP_,
iter->btree_id, &iter->real_pos);
+ BUG_ON(!trans->restarted);
return false;
}
return true;
return b == *rootp ? 0 : -1;
}
-static inline int btree_iter_lock_root(struct btree_iter *iter,
+static inline int btree_iter_lock_root(struct btree_trans *trans,
+ struct btree_iter *iter,
unsigned depth_want,
unsigned long trace_ip)
{
- struct bch_fs *c = iter->trans->c;
+ struct bch_fs *c = trans->c;
struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
enum six_lock_type lock_type;
unsigned i;
if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level,
iter, lock_type,
lock_root_check_fn, rootp,
- trace_ip)))
- return -EINTR;
+ trace_ip))) {
+ if (trans->restarted)
+ return -EINTR;
+ continue;
+ }
if (likely(b == READ_ONCE(*rootp) &&
b->c.level == iter->level &&
}
noinline
-static void btree_iter_prefetch(struct btree_iter *iter)
+static int btree_iter_prefetch(struct btree_iter *iter)
{
struct bch_fs *c = iter->trans->c;
struct btree_iter_level *l = &iter->l[iter->level];
? (iter->level > 1 ? 0 : 2)
: (iter->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(iter, iter->level);
+ int ret = 0;
bch2_bkey_buf_init(&tmp);
- while (nr) {
+ while (nr && !ret) {
if (!bch2_btree_node_relock(iter, iter->level))
break;
break;
bch2_bkey_buf_unpack(&tmp, c, l->b, k);
- bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
- iter->level - 1);
+ ret = bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
+ iter->level - 1);
}
if (!was_locked)
btree_node_unlock(iter, iter->level);
bch2_bkey_buf_exit(&tmp, c);
+ return ret;
}
static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
btree_node_unlock(iter, plevel);
}
-static __always_inline int btree_iter_down(struct btree_iter *iter,
+static __always_inline int btree_iter_down(struct btree_trans *trans,
+ struct btree_iter *iter,
unsigned long trace_ip)
{
- struct bch_fs *c = iter->trans->c;
+ struct bch_fs *c = trans->c;
struct btree_iter_level *l = &iter->l[iter->level];
struct btree *b;
unsigned level = iter->level - 1;
bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b));
- b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip);
+ b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b);
if (unlikely(ret))
goto err;
btree_node_mem_ptr_set(iter, level + 1, b);
if (iter->flags & BTREE_ITER_PREFETCH)
- btree_iter_prefetch(iter);
+ ret = btree_iter_prefetch(iter);
if (btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1);
struct btree_iter *iter;
u8 sorted[BTREE_ITER_MAX];
int i, nr_sorted = 0;
- bool relock_fail;
if (trans->in_traverse_all)
return -EINTR;
trans->in_traverse_all = true;
retry_all:
+ trans->restarted = false;
+
nr_sorted = 0;
- relock_fail = false;
trans_for_each_iter(trans, iter) {
- if (!bch2_btree_iter_relock(iter, _THIS_IP_))
- relock_fail = true;
sorted[nr_sorted++] = iter->idx;
- }
-
- if (!relock_fail) {
- trans->in_traverse_all = false;
- return 0;
+ iter->should_be_locked = false;
}
#define btree_iter_cmp_by_idx(_l, _r) \
if (ret)
goto retry_all;
}
-
- if (hweight64(trans->iters_live) > 1)
- ret = -EINTR;
- else
- trans_for_each_iter(trans, iter)
- if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
- ret = -EINTR;
- break;
- }
out:
bch2_btree_cache_cannibalize_unlock(c);
return ret;
}
-int bch2_btree_iter_traverse_all(struct btree_trans *trans)
+static int bch2_btree_iter_traverse_all(struct btree_trans *trans)
{
return __btree_iter_traverse_all(trans, 0, _RET_IP_);
}
static int btree_iter_traverse_one(struct btree_iter *iter,
unsigned long trace_ip)
{
+ struct btree_trans *trans = iter->trans;
unsigned l, depth_want = iter->level;
int ret = 0;
+ /*
+ * Ensure we obey iter->should_be_locked: if it's set, we can't unlock
+ * and re-traverse the iterator without a transaction restart:
+ */
+ if (iter->should_be_locked) {
+ ret = bch2_btree_iter_relock(iter, trace_ip) ? 0 : -EINTR;
+ goto out;
+ }
+
if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
ret = bch2_btree_iter_traverse_cached(iter);
goto out;
*/
while (iter->level > depth_want) {
ret = btree_iter_node(iter, iter->level)
- ? btree_iter_down(iter, trace_ip)
- : btree_iter_lock_root(iter, depth_want, trace_ip);
+ ? btree_iter_down(trans, iter, trace_ip)
+ : btree_iter_lock_root(trans, iter, depth_want, trace_ip);
if (unlikely(ret)) {
if (ret == 1) {
/*
iter->uptodate = BTREE_ITER_NEED_PEEK;
out:
- trace_iter_traverse(iter->trans->ip, trace_ip,
+ BUG_ON((ret == -EINTR) != !!trans->restarted);
+ trace_iter_traverse(trans->ip, trace_ip,
+ btree_iter_type(iter) == BTREE_ITER_CACHED,
iter->btree_id, &iter->real_pos, ret);
bch2_btree_iter_verify(iter);
return ret;
ret = bch2_trans_cond_resched(trans) ?:
btree_iter_traverse_one(iter, _RET_IP_);
- if (unlikely(ret))
+ if (unlikely(ret) && hweight64(trans->iters_linked) == 1) {
ret = __btree_iter_traverse_all(trans, ret, _RET_IP_);
+ BUG_ON(ret == -EINTR);
+ }
return ret;
}
int cmp = bpos_cmp(new_pos, iter->real_pos);
unsigned l = iter->level;
+ EBUG_ON(iter->trans->restarted);
+
if (!cmp)
goto out;
struct btree_iter *iter, *best = NULL;
struct bpos real_pos, pos_min = POS_MIN;
+ EBUG_ON(trans->restarted);
+
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS) &&
if (old_bytes) {
trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes);
+ btree_trans_restart(trans);
return ERR_PTR(-EINTR);
}
}
}
/**
- * bch2_trans_reset() - reset a transaction after a interrupted attempt
+ * bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset
- * @flags: transaction reset flags.
*
* While iterating over nodes or updating nodes a attempt to lock a btree
* node may return EINTR when the trylock fails. When this occurs
- * bch2_trans_reset() or bch2_trans_begin() should be called and the
- * transaction retried.
- *
- * Transaction reset flags include:
- *
- * - TRANS_RESET_NOUNLOCK - Do not attempt to unlock and reschedule the
- * transaction.
- * - TRANS_RESET_NOTRAVERSE - Do not traverse all linked iters.
+ * bch2_trans_begin() should be called and the transaction retried.
*/
-void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
+void bch2_trans_begin(struct btree_trans *trans)
{
struct btree_iter *iter;
- trans_for_each_iter(trans, iter) {
+ trans_for_each_iter(trans, iter)
iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
BTREE_ITER_SET_POS_AFTER_COMMIT);
- iter->should_be_locked = false;
- }
+ /*
+ * XXX: we shouldn't be doing this if the transaction was restarted, but
+ * currently we still overflow transaction iterators if we do that
+ * */
bch2_trans_unlink_iters(trans);
-
trans->iters_touched &= trans->iters_live;
trans->extra_journal_res = 0;
(void *) &trans->fs_usage_deltas->memset_start);
}
- if (!(flags & TRANS_RESET_NOUNLOCK))
- bch2_trans_cond_resched(trans);
+ bch2_trans_cond_resched(trans);
- if (!(flags & TRANS_RESET_NOTRAVERSE) &&
- trans->iters_linked)
+ if (trans->restarted)
bch2_btree_iter_traverse_all(trans);
+
+ trans->restarted = false;
}
static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)