BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
- !btree_type_has_snapshots(iter->btree_id));
+ !btree_type_has_snapshot_field(iter->btree_id));
if (iter->update_path)
bch2_btree_path_verify(trans, iter->update_path);
bch2_bpos_to_text(&buf, pos);
panic("not locked: %s %s%s\n",
- bch2_btree_ids[id], buf.buf,
+ bch2_btree_id_str(id), buf.buf,
key_cache ? " cached" : "");
}
struct btree_node_iter node_iter = l->iter;
struct bkey_packed *k;
struct bkey_buf tmp;
- unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
+ unsigned nr = test_bit(BCH_FS_started, &c->flags)
? (path->level > 1 ? 0 : 2)
: (path->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(path, path->level);
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bkey_buf tmp;
- unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
+ unsigned nr = test_bit(BCH_FS_started, &c->flags)
? (path->level > 1 ? 0 : 2)
: (path->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(path, path->level);
if (unlikely(ret))
goto out;
+ if (unlikely(!trans->srcu_held))
+ bch2_trans_srcu_lock(trans);
+
/*
* Ensure we obey path->should_be_locked: if it's set, we can't unlock
* and re-traverse the path without a transaction restart:
struct btree_path *path, struct bpos new_pos,
bool intent, unsigned long ip, int cmp)
{
- unsigned level = path->level;
-
bch2_trans_verify_not_in_restart(trans);
EBUG_ON(!path->ref);
goto out;
}
- level = btree_path_up_until_good_node(trans, path, cmp);
+ unsigned level = btree_path_up_until_good_node(trans, path, cmp);
if (btree_path_node(path, level)) {
struct btree_path_level *l = &path->l[level];
struct bkey_s_c old = { &i->old_k, i->old_v };
prt_printf(buf, "update: btree=%s cached=%u %pS",
- bch2_btree_ids[i->btree_id],
+ bch2_btree_id_str(i->btree_id),
i->cached,
(void *) i->ip_allocated);
prt_newline(buf);
trans_for_each_wb_update(trans, wb) {
prt_printf(buf, "update: btree=%s wb=1 %pS",
- bch2_btree_ids[wb->btree],
+ bch2_btree_id_str(wb->btree),
(void *) i->ip_allocated);
prt_newline(buf);
path->idx, path->ref, path->intent_ref,
path->preserve ? 'P' : ' ',
path->should_be_locked ? 'S' : ' ',
- bch2_btree_ids[path->btree_id],
+ bch2_btree_id_str(path->btree_id),
path->level);
bch2_bpos_to_text(out, path->pos);
trans->nr_max_paths = hweight64(trans->paths_allocated);
}
+noinline __cold
+int __bch2_btree_trans_too_many_iters(struct btree_trans *trans)
+{
+ if (trace_trans_restart_too_many_iters_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_trans_paths_to_text(&buf, trans);
+ trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ count_event(trans->c, trans_restart_too_many_iters);
+
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
+}
+
static noinline void btree_path_overflow(struct btree_trans *trans)
{
bch2_dump_trans_paths_updates(trans);
path->ref = 0;
path->intent_ref = 0;
path->nodes_locked = 0;
+ path->alloc_seq++;
btree_path_list_add(trans, pos, path);
trans->paths_sorted = false;
locks_want = min(locks_want, BTREE_MAX_DEPTH);
if (locks_want > path->locks_want)
- bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
+ bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
return path;
}
inline bool bch2_btree_iter_advance(struct btree_iter *iter)
{
- if (likely(!(iter->flags & BTREE_ITER_ALL_LEVELS))) {
- struct bpos pos = iter->k.p;
- bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
- ? bpos_eq(pos, SPOS_MAX)
- : bkey_eq(pos, SPOS_MAX));
-
- if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
- pos = bkey_successor(iter, pos);
- bch2_btree_iter_set_pos(iter, pos);
- return ret;
- } else {
- if (!btree_path_node(iter->path, iter->path->level))
- return true;
+ struct bpos pos = iter->k.p;
+ bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
+ ? bpos_eq(pos, SPOS_MAX)
+ : bkey_eq(pos, SPOS_MAX));
- iter->advanced = true;
- return false;
- }
+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+ pos = bkey_successor(iter, pos);
+ bch2_btree_iter_set_pos(iter, pos);
+ return ret;
}
inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
struct btree_iter *iter,
struct bpos end_pos)
{
- struct bkey_i *k;
-
- if (bpos_lt(iter->path->pos, iter->journal_pos))
- iter->journal_idx = 0;
-
- k = bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
- iter->path->level,
- iter->path->pos,
- end_pos,
- &iter->journal_idx);
-
- iter->journal_pos = k ? k->k.p : end_pos;
- return k;
+ return bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
+ iter->path->level,
+ iter->path->pos,
+ end_pos,
+ &iter->journal_idx);
}
static noinline
struct bpos iter_pos;
int ret;
- EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX));
if (iter->update_path) {
goto out_no_locked;
}
-/**
- * bch2_btree_iter_peek_all_levels() - returns the first key greater than or
- * equal to iterator's current position, returning keys from every level of the
- * btree. For keys at different levels of the btree that compare equal, the key
- * from the lower level (leaf) is returned first.
- * @iter: iterator to peek from
- *
- * Returns: key if found, or an error extractable with bkey_err().
- */
-struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
-{
- struct btree_trans *trans = iter->trans;
- struct bkey_s_c k;
- int ret;
-
- EBUG_ON(iter->path->cached);
- bch2_btree_iter_verify(iter);
- BUG_ON(iter->path->level < iter->min_depth);
- BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
- EBUG_ON(!(iter->flags & BTREE_ITER_ALL_LEVELS));
-
- while (1) {
- iter->path = bch2_btree_path_set_pos(trans, iter->path, iter->pos,
- iter->flags & BTREE_ITER_INTENT,
- btree_iter_ip_allocated(iter));
-
- ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
- if (unlikely(ret)) {
- /* ensure that iter->k is consistent with iter->pos: */
- bch2_btree_iter_set_pos(iter, iter->pos);
- k = bkey_s_c_err(ret);
- goto out_no_locked;
- }
-
- /* Already at end? */
- if (!btree_path_node(iter->path, iter->path->level)) {
- k = bkey_s_c_null;
- goto out_no_locked;
- }
-
- k = btree_path_level_peek_all(trans->c,
- &iter->path->l[iter->path->level], &iter->k);
-
- /* Check if we should go up to the parent node: */
- if (!k.k ||
- (iter->advanced &&
- bpos_eq(path_l(iter->path)->b->key.k.p, iter->pos))) {
- iter->pos = path_l(iter->path)->b->key.k.p;
- btree_path_set_level_up(trans, iter->path);
- iter->advanced = false;
- continue;
- }
-
- /*
- * Check if we should go back down to a leaf:
- * If we're not in a leaf node, we only return the current key
- * if it exactly matches iter->pos - otherwise we first have to
- * go back to the leaf:
- */
- if (iter->path->level != iter->min_depth &&
- (iter->advanced ||
- !k.k ||
- !bpos_eq(iter->pos, k.k->p))) {
- btree_path_set_level_down(trans, iter->path, iter->min_depth);
- iter->pos = bpos_successor(iter->pos);
- iter->advanced = false;
- continue;
- }
-
- /* Check if we should go to the next key: */
- if (iter->path->level == iter->min_depth &&
- iter->advanced &&
- k.k &&
- bpos_eq(iter->pos, k.k->p)) {
- iter->pos = bpos_successor(iter->pos);
- iter->advanced = false;
- continue;
- }
-
- if (iter->advanced &&
- iter->path->level == iter->min_depth &&
- !bpos_eq(k.k->p, iter->pos))
- iter->advanced = false;
-
- BUG_ON(iter->advanced);
- BUG_ON(!k.k);
- break;
- }
-
- iter->pos = k.k->p;
- btree_path_set_should_be_locked(iter->path);
-out_no_locked:
- bch2_btree_iter_verify(iter);
-
- return k;
-}
-
/**
* bch2_btree_iter_next() - returns first key greater than iterator's current
* position
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
- EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE));
/* extents can't span inode numbers: */
return p;
}
-static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+static inline void check_srcu_held_too_long(struct btree_trans *trans)
{
- struct bch_fs *c = trans->c;
- struct btree_path *path;
+ WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+ "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
+ (jiffies - trans->srcu_lock_time) / HZ);
+}
- trans_for_each_path(trans, path)
- if (path->cached && !btree_node_locked(path, 0))
- path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+void bch2_trans_srcu_unlock(struct btree_trans *trans)
+{
+ if (trans->srcu_held) {
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
- srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
- trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
- trans->srcu_lock_time = jiffies;
+ trans_for_each_path(trans, path)
+ if (path->cached && !btree_node_locked(path, 0))
+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+ check_srcu_held_too_long(trans);
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ trans->srcu_held = false;
+ }
+}
+
+void bch2_trans_srcu_lock(struct btree_trans *trans)
+{
+ if (!trans->srcu_held) {
+ trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
+ trans->srcu_held = true;
+ }
}
/**
}
trans->last_begin_time = now;
- if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
- bch2_trans_reset_srcu_lock(trans);
+ if (unlikely(trans->srcu_held &&
+ time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
+ bch2_trans_srcu_unlock(trans);
trans->last_begin_ip = _RET_IP_;
if (trans->restarted) {
trans->fn_idx = fn_idx;
trans->locking_wait.task = current;
trans->journal_replay_not_finished =
- !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
+ unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
+ atomic_inc_not_zero(&c->journal_keys.ref);
closure_init_stack(&trans->ref);
s = btree_trans_stats(trans);
trans->wb_updates_size = s->wb_updates_size;
}
- trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
+ trans->srcu_held = true;
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
struct btree_trans *pos;
trans_for_each_path(trans, path)
if (path->ref)
printk(KERN_ERR " btree %s %pS\n",
- bch2_btree_ids[path->btree_id],
+ bch2_btree_id_str(path->btree_id),
(void *) path->ip_allocated);
/* Be noisy about this: */
bch2_fatal_error(c);
check_btree_paths_leaked(trans);
- srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
-
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
+ if (trans->srcu_held) {
+ check_srcu_held_too_long(trans);
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ }
kfree(trans->extra_journal_entries.data);
kfree(trans->fs_usage_deltas);
}
+ if (unlikely(trans->journal_replay_not_finished))
+ bch2_journal_keys_put(c);
+
if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
mempool_free(trans->mem, &c->btree_trans_mem_pool);
else
prt_tab(out);
prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
- b->level, bch2_btree_ids[b->btree_id]);
+ b->level, bch2_btree_id_str(b->btree_id));
bch2_bpos_to_text(out, btree_node_pos(b));
prt_tab(out);
struct btree_path *path;
struct btree_bkey_cached_common *b;
static char lock_types[] = { 'r', 'i', 'w' };
+ struct task_struct *task = READ_ONCE(trans->locking_wait.task);
unsigned l, idx;
if (!out->nr_tabstops) {
printbuf_tabstop_push(out, 32);
}
- prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn);
+ prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn);
trans_for_each_path_safe(trans, path, idx) {
if (!path->nodes_locked)
path->idx,
path->cached ? 'c' : 'b',
path->level,
- bch2_btree_ids[path->btree_id]);
+ bch2_btree_id_str(path->btree_id));
bch2_bpos_to_text(out, path->pos);
prt_newline(out);