X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_locking.c;h=1eca320e7574ef251c2f62a5463fb93da7530f7c;hb=e3e7f67b3ed89f5b3158142c29e66bb98f868ce2;hp=0032d0eb05a59c15a520467b86b7d971a2005785;hpb=1ee7dc7a55273d34358a0ee525a9e823c999ffe6;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 0032d0e..1eca320 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -6,30 +6,25 @@ static struct lock_class_key bch2_btree_node_lock_key; -void bch2_btree_lock_init(struct btree_bkey_cached_common *b) +void bch2_btree_lock_init(struct btree_bkey_cached_common *b, + enum six_lock_init_flags flags) { - __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key); + __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags); + lockdep_set_novalidate_class(&b->lock); } #ifdef CONFIG_LOCKDEP void bch2_assert_btree_nodes_not_locked(void) { +#if 0 + //Re-enable when lock_class_is_held() is merged: BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); +#endif } #endif /* Btree node locking: */ -static inline void six_lock_readers_add(struct six_lock *lock, int nr) -{ - if (lock->readers) - this_cpu_add(*lock->readers, nr); - else if (nr > 0) - atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter); - else - atomic64_sub(__SIX_VAL(read_lock, -nr), &lock->state.counter); -} - struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans, struct btree_path *skip, struct btree_bkey_cached_common *b, @@ -117,10 +112,8 @@ static noinline void lock_graph_pop_all(struct lock_graph *g) lock_graph_up(g); } -static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans) +static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans) { - closure_get(&trans->ref); - g->g[g->nr++] = (struct trans_waiting_for_lock) { .trans = trans, .node_want = trans->locking, @@ -128,6 +121,12 @@ static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans) }; } +static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans) +{ + closure_get(&trans->ref); + __lock_graph_down(g, trans); +} + static bool lock_graph_remove_non_waiters(struct lock_graph *g) { struct trans_waiting_for_lock *i; @@ -143,10 +142,28 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g) return false; } +static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans, + unsigned long ip) +{ + struct bch_fs *c = trans->c; + + count_event(c, trans_restart_would_deadlock); + + if (trace_trans_restart_would_deadlock_enabled()) { + struct printbuf buf = PRINTBUF; + + buf.atomic++; + print_cycle(&buf, g); + + trace_trans_restart_would_deadlock(trans, ip, buf.buf); + printbuf_exit(&buf); + } +} + static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { if (i == g->g) { - trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_); + trace_would_deadlock(g, i->trans, _RET_IP_); return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); } else { i->trans->lock_must_abort = true; @@ -228,10 +245,14 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, struct trans_waiting_for_lock *i; for (i = g->g; i < g->g + g->nr; i++) - if (i->trans == trans) + if (i->trans == trans) { + closure_put(&trans->ref); return break_cycle(g, cycle); + } if (g->nr == ARRAY_SIZE(g->g)) { + closure_put(&trans->ref); + if (orig_trans->lock_may_not_fail) return 0; @@ -245,7 +266,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, return btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit); } - lock_graph_down(g, trans); + __lock_graph_down(g, trans); return 0; } @@ -260,17 +281,19 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) struct trans_waiting_for_lock *top; struct btree_bkey_cached_common *b; struct btree_path *path; + unsigned path_idx; int ret; + g.nr = 0; + if (trans->lock_must_abort) { if (cycle) return -1; - trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_); + trace_would_deadlock(&g, trans, _RET_IP_); return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } - g.nr = 0; lock_graph_down(&g, trans); next: if (!g.nr) @@ -278,12 +301,12 @@ next: top = &g.g[g.nr - 1]; - trans_for_each_path_from(top->trans, path, top->path_idx) { + trans_for_each_path_safe_from(top->trans, path, path_idx, top->path_idx) { if (!path->nodes_locked) continue; - if (top->path_idx != path->idx) { - top->path_idx = path->idx; + if (path_idx != top->path_idx) { + top->path_idx = path_idx; top->level = 0; top->lock_start_time = 0; } @@ -339,9 +362,10 @@ next: !lock_type_conflicts(lock_held, trans->locking_wait.lock_want)) continue; - ret = lock_graph_descend(&g, trans, cycle); + closure_get(&trans->ref); raw_spin_unlock(&b->lock.wait_lock); + ret = lock_graph_descend(&g, trans, cycle); if (ret) return ret; goto next; @@ -383,16 +407,51 @@ int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *p six_lock_readers_add(&b->lock, readers); if (ret) - mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_intent); + mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_INTENT_LOCKED); return ret; } +void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, + struct btree_path *path, + struct btree_bkey_cached_common *b) +{ + struct btree_path *linked; + unsigned i; + int ret; + + /* + * XXX BIG FAT NOTICE + * + * Drop all read locks before taking a write lock: + * + * This is a hack, because bch2_btree_node_lock_write_nofail() is a + * hack - but by dropping read locks first, this should never fail, and + * we only use this in code paths where whatever read locks we've + * already taken are no longer needed: + */ + + trans_for_each_path(trans, linked) { + if (!linked->nodes_locked) + continue; + + for (i = 0; i < BTREE_MAX_DEPTH; i++) + if (btree_node_read_locked(linked, i)) { + btree_node_unlock(trans, linked, i); + btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK); + } + } + + ret = __btree_node_lock_write(trans, path, b, true); + BUG_ON(ret); +} + /* relock */ static inline bool btree_path_get_locks(struct btree_trans *trans, struct btree_path *path, - bool upgrade) + bool upgrade, + struct get_locks_fail *f) { unsigned l = path->level; int fail_idx = -1; @@ -403,8 +462,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans, if (!(upgrade ? bch2_btree_node_upgrade(trans, path, l) - : bch2_btree_node_relock(trans, path, l))) - fail_idx = l; + : bch2_btree_node_relock(trans, path, l))) { + fail_idx = l; + + if (f) { + f->l = l; + f->b = path->l[l].b; + } + } l++; } while (l < path->locks_want); @@ -513,7 +578,7 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans, trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level); return false; success: - mark_btree_node_locked_noreset(path, level, SIX_LOCK_intent); + mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); return true; } @@ -545,7 +610,9 @@ __flatten bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path, unsigned long trace_ip) { - return btree_path_get_locks(trans, path, false); + struct get_locks_fail f; + + return btree_path_get_locks(trans, path, false, &f); } int __bch2_btree_path_relock(struct btree_trans *trans, @@ -559,31 +626,26 @@ int __bch2_btree_path_relock(struct btree_trans *trans, return 0; } -__flatten -bool bch2_btree_path_upgrade_norestart(struct btree_trans *trans, - struct btree_path *path, unsigned long trace_ip) -{ - return btree_path_get_locks(trans, path, true); -} - bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans, struct btree_path *path, - unsigned new_locks_want) + unsigned new_locks_want, + struct get_locks_fail *f) { EBUG_ON(path->locks_want >= new_locks_want); path->locks_want = new_locks_want; - return btree_path_get_locks(trans, path, true); + return btree_path_get_locks(trans, path, true, f); } bool __bch2_btree_path_upgrade(struct btree_trans *trans, struct btree_path *path, - unsigned new_locks_want) + unsigned new_locks_want, + struct get_locks_fail *f) { struct btree_path *linked; - if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want)) + if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f)) return true; /* @@ -612,7 +674,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, linked->btree_id == path->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; - btree_path_get_locks(trans, linked, true); + btree_path_get_locks(trans, linked, true, NULL); } return false; @@ -622,7 +684,10 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, struct btree_path *path, unsigned new_locks_want) { - unsigned l; + unsigned l, old_locks_want = path->locks_want; + + if (trans->restarted) + return; EBUG_ON(path->locks_want < new_locks_want); @@ -635,13 +700,16 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, } else { if (btree_node_intent_locked(path, l)) { six_lock_downgrade(&path->l[l].b->c.lock); - mark_btree_node_locked_noreset(path, l, SIX_LOCK_read); + mark_btree_node_locked_noreset(path, l, BTREE_NODE_READ_LOCKED); } break; } } bch2_btree_path_verify_locks(path); + + path->downgrade_seq++; + trace_path_downgrade(trans, _RET_IP_, path, old_locks_want); } /* Btree transaction locking: */ @@ -650,6 +718,9 @@ void bch2_trans_downgrade(struct btree_trans *trans) { struct btree_path *path; + if (trans->restarted) + return; + trans_for_each_path(trans, path) bch2_btree_path_downgrade(trans, path); } @@ -685,19 +756,26 @@ int bch2_trans_relock_notrace(struct btree_trans *trans) return 0; } +void bch2_trans_unlock_noassert(struct btree_trans *trans) +{ + struct btree_path *path; + + trans_for_each_path(trans, path) + __bch2_btree_path_unlock(trans, path); +} + void bch2_trans_unlock(struct btree_trans *trans) { struct btree_path *path; trans_for_each_path(trans, path) __bch2_btree_path_unlock(trans, path); +} - /* - * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking - * btree nodes, it implements its own walking: - */ - if (!trans->is_initial_gc) - bch2_assert_btree_nodes_not_locked(); +void bch2_trans_unlock_long(struct btree_trans *trans) +{ + bch2_trans_unlock(trans); + bch2_trans_srcu_unlock(trans); } bool bch2_trans_locked(struct btree_trans *trans) @@ -713,11 +791,8 @@ bool bch2_trans_locked(struct btree_trans *trans) int __bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex *lock) { - int ret; + int ret = drop_locks_do(trans, (mutex_lock(lock), 0)); - bch2_trans_unlock(trans); - mutex_lock(lock); - ret = bch2_trans_relock(trans); if (ret) mutex_unlock(lock); return ret;