X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fsix.h;h=394da423c28e511f4e0d733708941c9385104b80;hb=63c8f14756921c1d1d6a99082a679b92aef288c1;hp=999c49db23715b616e679c37c5cea84e90ba9b7a;hpb=bb6f4111fbfe2550eb7b583586e732a473ba62f0;p=bcachefs-tools-debian diff --git a/libbcachefs/six.h b/libbcachefs/six.h index 999c49d..394da42 100644 --- a/libbcachefs/six.h +++ b/libbcachefs/six.h @@ -1,59 +1,126 @@ -#ifndef _BCACHEFS_SIX_H -#define _BCACHEFS_SIX_H +/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Shared/intent/exclusive locks: sleepable read/write locks, much like rw - * semaphores, except with a third intermediate state, intent. Basic operations - * are: +#ifndef _LINUX_SIX_H +#define _LINUX_SIX_H + +/** + * DOC: SIX locks overview * - * six_lock_read(&foo->lock); - * six_unlock_read(&foo->lock); + * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores + * but with an additional state: read/shared, intent, exclusive/write * - * six_lock_intent(&foo->lock); - * six_unlock_intent(&foo->lock); + * The purpose of the intent state is to allow for greater concurrency on tree + * structures without deadlocking. In general, a read can't be upgraded to a + * write lock without deadlocking, so an operation that updates multiple nodes + * will have to take write locks for the full duration of the operation. * - * six_lock_write(&foo->lock); - * six_unlock_write(&foo->lock); + * But by adding an intent state, which is exclusive with other intent locks but + * not with readers, we can take intent locks at thte start of the operation, + * and then take write locks only for the actual update to each individual + * nodes, without deadlocking. * - * Intent locks block other intent locks, but do not block read locks, and you - * must have an intent lock held before taking a write lock, like so: + * Example usage: + * six_lock_read(&foo->lock); + * six_unlock_read(&foo->lock); * - * six_lock_intent(&foo->lock); - * six_lock_write(&foo->lock); - * six_unlock_write(&foo->lock); - * six_unlock_intent(&foo->lock); + * An intent lock must be held before taking a write lock: + * six_lock_intent(&foo->lock); + * six_lock_write(&foo->lock); + * six_unlock_write(&foo->lock); + * six_unlock_intent(&foo->lock); * * Other operations: - * * six_trylock_read() * six_trylock_intent() * six_trylock_write() * - * six_lock_downgrade(): convert from intent to read - * six_lock_tryupgrade(): attempt to convert from read to intent - * - * Locks also embed a sequence number, which is incremented when the lock is - * locked or unlocked for write. The current sequence number can be grabbed - * while a lock is held from lock->state.seq; then, if you drop the lock you can - * use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock - * iff it hasn't been locked for write in the meantime. - * - * There are also operations that take the lock type as a parameter, where the - * type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write: - * - * six_lock_type(lock, type) - * six_unlock_type(lock, type) - * six_relock(lock, type, seq) - * six_trylock_type(lock, type) - * six_trylock_convert(lock, from, to) - * - * A lock may be held multiple types by the same thread (for read or intent, - * not write) - up to SIX_LOCK_MAX_RECURSE. However, the six locks code does - * _not_ implement the actual recursive checks itself though - rather, if your - * code (e.g. btree iterator code) knows that the current thread already has a - * lock held, and for the correct type, six_lock_increment() may be used to - * bump up the counter for that type - the only effect is that one more call to - * unlock will be required before the lock is unlocked. + * six_lock_downgrade() convert from intent to read + * six_lock_tryupgrade() attempt to convert from read to intent, may fail + * + * There are also interfaces that take the lock type as an enum: + * + * six_lock_type(&foo->lock, SIX_LOCK_read); + * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent) + * six_lock_type(&foo->lock, SIX_LOCK_write); + * six_unlock_type(&foo->lock, SIX_LOCK_write); + * six_unlock_type(&foo->lock, SIX_LOCK_intent); + * + * Lock sequence numbers - unlock(), relock(): + * + * Locks embed sequences numbers, which are incremented on write lock/unlock. + * This allows locks to be dropped and the retaken iff the state they protect + * hasn't changed; this makes it much easier to avoid holding locks while e.g. + * doing IO or allocating memory. + * + * Example usage: + * six_lock_read(&foo->lock); + * u32 seq = six_lock_seq(&foo->lock); + * six_unlock_read(&foo->lock); + * + * some_operation_that_may_block(); + * + * if (six_relock_read(&foo->lock, seq)) { ... } + * + * If the relock operation succeeds, it is as if the lock was never unlocked. + * + * Reentrancy: + * + * Six locks are not by themselves reentrent, but have counters for both the + * read and intent states that can be used to provide reentrency by an upper + * layer that tracks held locks. If a lock is known to already be held in the + * read or intent state, six_lock_increment() can be used to bump the "lock + * held in this state" counter, increasing the number of unlock calls that + * will be required to fully unlock it. + * + * Example usage: + * six_lock_read(&foo->lock); + * six_lock_increment(&foo->lock, SIX_LOCK_read); + * six_unlock_read(&foo->lock); + * six_unlock_read(&foo->lock); + * foo->lock is now fully unlocked. + * + * Since the intent state supercedes read, it's legal to increment the read + * counter when holding an intent lock, but not the reverse. + * + * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write) + * is not legal. + * + * should_sleep_fn: + * + * There is a six_lock() variant that takes a function pointer that is called + * immediately prior to schedule() when blocking, and may return an error to + * abort. + * + * One possible use for this feature is when objects being locked are part of + * a cache and may reused, and lock ordering is based on a property of the + * object that will change when the object is reused - i.e. logical key order. + * + * If looking up an object in the cache may race with object reuse, and lock + * ordering is required to prevent deadlock, object reuse may change the + * correct lock order for that object and cause a deadlock. should_sleep_fn + * can be used to check if the object is still the object we want and avoid + * this deadlock. + * + * Wait list entry interface: + * + * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a + * wait list entry. By embedding six_lock_waiter into another object, and by + * traversing lock waitlists, it is then possible for an upper layer to + * implement full cycle detection for deadlock avoidance. + * + * should_sleep_fn should be used for invoking the cycle detector, walking the + * graph of held locks to check for a deadlock. The upper layer must track + * held locks for each thread, and each thread's held locks must be reachable + * from its six_lock_waiter object. + * + * six_lock_waiter() will add the wait object to the waitlist re-trying taking + * the lock, and before calling should_sleep_fn, and the wait object will not + * be removed from the waitlist until either the lock has been successfully + * acquired, or we aborted because should_sleep_fn returned an error. + * + * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will + * have timestamps in strictly ascending order - this is so the timestamp can + * be used as a cursor for lock graph traverse. */ #include @@ -61,43 +128,6 @@ #include #include -#include "util.h" - -#define SIX_LOCK_SEPARATE_LOCKFNS - -union six_lock_state { - struct { - atomic64_t counter; - }; - - struct { - u64 v; - }; - - struct { - /* for waitlist_bitnr() */ - unsigned long l; - }; - - struct { - unsigned read_lock:26; - unsigned intent_lock:3; - unsigned waiters:3; - /* - * seq works much like in seqlocks: it's incremented every time - * we lock and unlock for write. - * - * If it's odd write lock is held, even unlocked. - * - * Thus readers can unlock, and then lock again later iff it - * hasn't been modified in the meantime. - */ - u32 seq; - }; -}; - -#define SIX_LOCK_MAX_RECURSE ((1 << 3) - 1) - enum six_lock_type { SIX_LOCK_read, SIX_LOCK_intent, @@ -105,112 +135,233 @@ enum six_lock_type { }; struct six_lock { - union six_lock_state state; + atomic_t state; + u32 seq; + unsigned intent_lock_recurse; struct task_struct *owner; + unsigned __percpu *readers; struct optimistic_spin_queue osq; - raw_spinlock_t wait_lock; - struct list_head wait_list[2]; + struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif }; -static __always_inline void __six_lock_init(struct six_lock *lock, - const char *name, - struct lock_class_key *key) -{ - atomic64_set(&lock->state.counter, 0); - raw_spin_lock_init(&lock->wait_lock); - INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]); - INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]); -#ifdef CONFIG_DEBUG_LOCK_ALLOC - debug_check_no_locks_freed((void *) lock, sizeof(*lock)); - lockdep_init_map(&lock->dep_map, name, key, 0); -#endif -} +struct six_lock_waiter { + struct list_head list; + struct task_struct *task; + enum six_lock_type lock_want; + bool lock_acquired; + u64 start_time; +}; + +typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *); + +void six_lock_exit(struct six_lock *lock); + +enum six_lock_init_flags { + SIX_LOCK_INIT_PCPU = 1U << 0, +}; -#define six_lock_init(lock) \ +void __six_lock_init(struct six_lock *lock, const char *name, + struct lock_class_key *key, enum six_lock_init_flags flags); + +/** + * six_lock_init - initialize a six lock + * @lock: lock to initialize + * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU + */ +#define six_lock_init(lock, flags) \ do { \ static struct lock_class_key __key; \ \ - __six_lock_init((lock), #lock, &__key); \ + __six_lock_init((lock), #lock, &__key, flags); \ } while (0) -#define __SIX_VAL(field, _v) (((union six_lock_state) { .field = _v }).v) - -#ifdef SIX_LOCK_SEPARATE_LOCKFNS +/** + * six_lock_seq - obtain current lock sequence number + * @lock: six_lock to obtain sequence number for + * + * @lock should be held for read or intent, and not write + * + * By saving the lock sequence number, we can unlock @lock and then (typically + * after some blocking operation) attempt to relock it: the relock will succeed + * if the sequence number hasn't changed, meaning no write locks have been taken + * and state corresponding to what @lock protects is still valid. + */ +static inline u32 six_lock_seq(const struct six_lock *lock) +{ + return lock->seq; +} -#define __SIX_LOCK(type) \ -bool six_trylock_##type(struct six_lock *); \ -bool six_relock_##type(struct six_lock *, u32); \ -void six_lock_##type(struct six_lock *); \ -void six_unlock_##type(struct six_lock *); +bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); -__SIX_LOCK(read) -__SIX_LOCK(intent) -__SIX_LOCK(write) -#undef __SIX_LOCK +/** + * six_trylock_type - attempt to take a six lock without blocking + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * + * Return: true on success, false on failure. + */ +static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) +{ + return six_trylock_ip(lock, type, _THIS_IP_); +} -#define SIX_LOCK_DISPATCH(type, fn, ...) \ - switch (type) { \ - case SIX_LOCK_read: \ - return fn##_read(__VA_ARGS__); \ - case SIX_LOCK_intent: \ - return fn##_intent(__VA_ARGS__); \ - case SIX_LOCK_write: \ - return fn##_write(__VA_ARGS__); \ - default: \ - BUG(); \ - } +int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, + struct six_lock_waiter *wait, + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip); -static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) +/** + * six_lock_waiter - take a lock, with full waitlist interface + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @wait: pointer to wait object, which will be added to lock's waitlist + * @should_sleep_fn: callback run after adding to waitlist, immediately prior + * to scheduling + * @p: passed through to @should_sleep_fn + * + * This is a convenience wrapper around six_lock_ip_waiter(), see that function + * for full documentation. + * + * Return: 0 on success, or the return code from @should_sleep_fn on failure. + */ +static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type, + struct six_lock_waiter *wait, + six_lock_should_sleep_fn should_sleep_fn, void *p) { - SIX_LOCK_DISPATCH(type, six_trylock, lock); + return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_); } -static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type, - unsigned seq) +/** + * six_lock_ip - take a six lock lock + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @should_sleep_fn: callback run after adding to waitlist, immediately prior + * to scheduling + * @p: passed through to @should_sleep_fn + * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ + * + * Return: 0 on success, or the return code from @should_sleep_fn on failure. + */ +static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type, + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip) { - SIX_LOCK_DISPATCH(type, six_relock, lock, seq); + struct six_lock_waiter wait; + + return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip); } -static inline void six_lock_type(struct six_lock *lock, enum six_lock_type type) +/** + * six_lock_type - take a six lock lock + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @should_sleep_fn: callback run after adding to waitlist, immediately prior + * to scheduling + * @p: passed through to @should_sleep_fn + * + * Return: 0 on success, or the return code from @should_sleep_fn on failure. + */ +static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type, + six_lock_should_sleep_fn should_sleep_fn, void *p) { - SIX_LOCK_DISPATCH(type, six_lock, lock); + struct six_lock_waiter wait; + + return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_); } -static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type) +bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, + unsigned seq, unsigned long ip); + +/** + * six_relock_type - attempt to re-take a lock that was held previously + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @seq: lock sequence number obtained from six_lock_seq() while lock was + * held previously + * + * Return: true on success, false on failure. + */ +static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type, + unsigned seq) { - SIX_LOCK_DISPATCH(type, six_unlock, lock); + return six_relock_ip(lock, type, seq, _THIS_IP_); } -#else +void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); -bool six_trylock_type(struct six_lock *, enum six_lock_type); -bool six_relock_type(struct six_lock *, enum six_lock_type, unsigned); -void six_lock_type(struct six_lock *, enum six_lock_type); -void six_unlock_type(struct six_lock *, enum six_lock_type); +/** + * six_unlock_type - drop a six lock + * @lock: lock to unlock + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * + * When a lock is held multiple times (because six_lock_incement()) was used), + * this decrements the 'lock held' counter by one. + * + * For example: + * six_lock_read(&foo->lock); read count 1 + * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 + * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 + * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 + */ +static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type) +{ + six_unlock_ip(lock, type, _THIS_IP_); +} #define __SIX_LOCK(type) \ -static __always_inline bool six_trylock_##type(struct six_lock *lock) \ +static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\ +{ \ + return six_trylock_ip(lock, SIX_LOCK_##type, ip); \ +} \ + \ +static inline bool six_trylock_##type(struct six_lock *lock) \ +{ \ + return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ +} \ + \ +static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \ + struct six_lock_waiter *wait, \ + six_lock_should_sleep_fn should_sleep_fn, void *p,\ + unsigned long ip) \ +{ \ + return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\ +} \ + \ +static inline int six_lock_ip_##type(struct six_lock *lock, \ + six_lock_should_sleep_fn should_sleep_fn, void *p, \ + unsigned long ip) \ { \ - return six_trylock_type(lock, SIX_LOCK_##type); \ + return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\ } \ \ -static __always_inline bool six_relock_##type(struct six_lock *lock, u32 seq)\ +static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\ { \ - return six_relock_type(lock, SIX_LOCK_##type, seq); \ + return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \ } \ \ -static __always_inline void six_lock_##type(struct six_lock *lock) \ +static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \ { \ - six_lock_type(lock, SIX_LOCK_##type); \ + return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \ } \ \ -static __always_inline void six_unlock_##type(struct six_lock *lock) \ +static inline int six_lock_##type(struct six_lock *lock, \ + six_lock_should_sleep_fn fn, void *p)\ { \ - six_unlock_type(lock, SIX_LOCK_##type); \ + return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \ +} \ + \ +static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \ +{ \ + six_unlock_ip(lock, SIX_LOCK_##type, ip); \ +} \ + \ +static inline void six_unlock_##type(struct six_lock *lock) \ +{ \ + six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ } __SIX_LOCK(read) @@ -218,8 +369,6 @@ __SIX_LOCK(intent) __SIX_LOCK(write) #undef __SIX_LOCK -#endif - void six_lock_downgrade(struct six_lock *); bool six_lock_tryupgrade(struct six_lock *); bool six_trylock_convert(struct six_lock *, enum six_lock_type, @@ -227,4 +376,13 @@ bool six_trylock_convert(struct six_lock *, enum six_lock_type, void six_lock_increment(struct six_lock *, enum six_lock_type); -#endif /* _BCACHEFS_SIX_H */ +void six_lock_wakeup_all(struct six_lock *); + +struct six_lock_count { + unsigned n[3]; +}; + +struct six_lock_count six_lock_counts(struct six_lock *); +void six_lock_readers_add(struct six_lock *, int); + +#endif /* _LINUX_SIX_H */