]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/six.h
Disable pristine-tar option in gbp.conf, since there is no pristine-tar branch.
[bcachefs-tools-debian] / libbcachefs / six.h
index 999c49db23715b616e679c37c5cea84e90ba9b7a..68d46fd7f3912a8101ecee24e8aab9722dbf07c1 100644 (file)
-#ifndef _BCACHEFS_SIX_H
-#define _BCACHEFS_SIX_H
+/* SPDX-License-Identifier: GPL-2.0 */
 
-/*
- * Shared/intent/exclusive locks: sleepable read/write locks, much like rw
- * semaphores, except with a third intermediate state, intent. Basic operations
- * are:
+#ifndef _LINUX_SIX_H
+#define _LINUX_SIX_H
+
+/**
+ * DOC: SIX locks overview
  *
- * six_lock_read(&foo->lock);
- * six_unlock_read(&foo->lock);
+ * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
+ * but with an additional state: read/shared, intent, exclusive/write
  *
- * six_lock_intent(&foo->lock);
- * six_unlock_intent(&foo->lock);
+ * The purpose of the intent state is to allow for greater concurrency on tree
+ * structures without deadlocking. In general, a read can't be upgraded to a
+ * write lock without deadlocking, so an operation that updates multiple nodes
+ * will have to take write locks for the full duration of the operation.
  *
- * six_lock_write(&foo->lock);
- * six_unlock_write(&foo->lock);
+ * But by adding an intent state, which is exclusive with other intent locks but
+ * not with readers, we can take intent locks at the start of the operation,
+ * and then take write locks only for the actual update to each individual
+ * nodes, without deadlocking.
  *
- * Intent locks block other intent locks, but do not block read locks, and you
- * must have an intent lock held before taking a write lock, like so:
+ * Example usage:
+ *   six_lock_read(&foo->lock);
+ *   six_unlock_read(&foo->lock);
  *
- * six_lock_intent(&foo->lock);
- * six_lock_write(&foo->lock);
- * six_unlock_write(&foo->lock);
- * six_unlock_intent(&foo->lock);
+ * An intent lock must be held before taking a write lock:
+ *   six_lock_intent(&foo->lock);
+ *   six_lock_write(&foo->lock);
+ *   six_unlock_write(&foo->lock);
+ *   six_unlock_intent(&foo->lock);
  *
  * Other operations:
- *
  *   six_trylock_read()
  *   six_trylock_intent()
  *   six_trylock_write()
  *
- *   six_lock_downgrade():     convert from intent to read
- *   six_lock_tryupgrade():    attempt to convert from read to intent
- *
- * Locks also embed a sequence number, which is incremented when the lock is
- * locked or unlocked for write. The current sequence number can be grabbed
- * while a lock is held from lock->state.seq; then, if you drop the lock you can
- * use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
- * iff it hasn't been locked for write in the meantime.
- *
- * There are also operations that take the lock type as a parameter, where the
- * type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
- *
- *   six_lock_type(lock, type)
- *   six_unlock_type(lock, type)
- *   six_relock(lock, type, seq)
- *   six_trylock_type(lock, type)
- *   six_trylock_convert(lock, from, to)
- *
- * A lock may be held multiple types by the same thread (for read or intent,
- * not write) - up to SIX_LOCK_MAX_RECURSE. However, the six locks code does
- * _not_ implement the actual recursive checks itself though - rather, if your
- * code (e.g. btree iterator code) knows that the current thread already has a
- * lock held, and for the correct type, six_lock_increment() may be used to
- * bump up the counter for that type - the only effect is that one more call to
- * unlock will be required before the lock is unlocked.
+ *   six_lock_downgrade()      convert from intent to read
+ *   six_lock_tryupgrade()     attempt to convert from read to intent, may fail
+ *
+ * There are also interfaces that take the lock type as an enum:
+ *
+ *   six_lock_type(&foo->lock, SIX_LOCK_read);
+ *   six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
+ *   six_lock_type(&foo->lock, SIX_LOCK_write);
+ *   six_unlock_type(&foo->lock, SIX_LOCK_write);
+ *   six_unlock_type(&foo->lock, SIX_LOCK_intent);
+ *
+ * Lock sequence numbers - unlock(), relock():
+ *
+ *   Locks embed sequences numbers, which are incremented on write lock/unlock.
+ *   This allows locks to be dropped and the retaken iff the state they protect
+ *   hasn't changed; this makes it much easier to avoid holding locks while e.g.
+ *   doing IO or allocating memory.
+ *
+ *   Example usage:
+ *     six_lock_read(&foo->lock);
+ *     u32 seq = six_lock_seq(&foo->lock);
+ *     six_unlock_read(&foo->lock);
+ *
+ *     some_operation_that_may_block();
+ *
+ *     if (six_relock_read(&foo->lock, seq)) { ... }
+ *
+ *   If the relock operation succeeds, it is as if the lock was never unlocked.
+ *
+ * Reentrancy:
+ *
+ *   Six locks are not by themselves reentrant, but have counters for both the
+ *   read and intent states that can be used to provide reentrancy by an upper
+ *   layer that tracks held locks. If a lock is known to already be held in the
+ *   read or intent state, six_lock_increment() can be used to bump the "lock
+ *   held in this state" counter, increasing the number of unlock calls that
+ *   will be required to fully unlock it.
+ *
+ *   Example usage:
+ *     six_lock_read(&foo->lock);
+ *     six_lock_increment(&foo->lock, SIX_LOCK_read);
+ *     six_unlock_read(&foo->lock);
+ *     six_unlock_read(&foo->lock);
+ *   foo->lock is now fully unlocked.
+ *
+ *   Since the intent state supercedes read, it's legal to increment the read
+ *   counter when holding an intent lock, but not the reverse.
+ *
+ *   A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
+ *   is not legal.
+ *
+ * should_sleep_fn:
+ *
+ *   There is a six_lock() variant that takes a function pointer that is called
+ *   immediately prior to schedule() when blocking, and may return an error to
+ *   abort.
+ *
+ *   One possible use for this feature is when objects being locked are part of
+ *   a cache and may reused, and lock ordering is based on a property of the
+ *   object that will change when the object is reused - i.e. logical key order.
+ *
+ *   If looking up an object in the cache may race with object reuse, and lock
+ *   ordering is required to prevent deadlock, object reuse may change the
+ *   correct lock order for that object and cause a deadlock. should_sleep_fn
+ *   can be used to check if the object is still the object we want and avoid
+ *   this deadlock.
+ *
+ * Wait list entry interface:
+ *
+ *   There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
+ *   wait list entry. By embedding six_lock_waiter into another object, and by
+ *   traversing lock waitlists, it is then possible for an upper layer to
+ *   implement full cycle detection for deadlock avoidance.
+ *
+ *   should_sleep_fn should be used for invoking the cycle detector, walking the
+ *   graph of held locks to check for a deadlock. The upper layer must track
+ *   held locks for each thread, and each thread's held locks must be reachable
+ *   from its six_lock_waiter object.
+ *
+ *   six_lock_waiter() will add the wait object to the waitlist re-trying taking
+ *   the lock, and before calling should_sleep_fn, and the wait object will not
+ *   be removed from the waitlist until either the lock has been successfully
+ *   acquired, or we aborted because should_sleep_fn returned an error.
+ *
+ *   Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
+ *   have timestamps in strictly ascending order - this is so the timestamp can
+ *   be used as a cursor for lock graph traverse.
  */
 
 #include <linux/lockdep.h>
-#include <linux/osq_lock.h>
 #include <linux/sched.h>
 #include <linux/types.h>
 
-#include "util.h"
-
-#define SIX_LOCK_SEPARATE_LOCKFNS
-
-union six_lock_state {
-       struct {
-               atomic64_t      counter;
-       };
-
-       struct {
-               u64             v;
-       };
-
-       struct {
-               /* for waitlist_bitnr() */
-               unsigned long   l;
-       };
-
-       struct {
-               unsigned        read_lock:26;
-               unsigned        intent_lock:3;
-               unsigned        waiters:3;
-               /*
-                * seq works much like in seqlocks: it's incremented every time
-                * we lock and unlock for write.
-                *
-                * If it's odd write lock is held, even unlocked.
-                *
-                * Thus readers can unlock, and then lock again later iff it
-                * hasn't been modified in the meantime.
-                */
-               u32             seq;
-       };
-};
-
-#define SIX_LOCK_MAX_RECURSE   ((1 << 3) - 1)
-
 enum six_lock_type {
        SIX_LOCK_read,
        SIX_LOCK_intent,
@@ -105,112 +134,232 @@ enum six_lock_type {
 };
 
 struct six_lock {
-       union six_lock_state    state;
+       atomic_t                state;
+       u32                     seq;
+       unsigned                intent_lock_recurse;
        struct task_struct      *owner;
-       struct optimistic_spin_queue osq;
-
+       unsigned __percpu       *readers;
        raw_spinlock_t          wait_lock;
-       struct list_head        wait_list[2];
+       struct list_head        wait_list;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map      dep_map;
 #endif
 };
 
-static __always_inline void __six_lock_init(struct six_lock *lock,
-                                           const char *name,
-                                           struct lock_class_key *key)
-{
-       atomic64_set(&lock->state.counter, 0);
-       raw_spin_lock_init(&lock->wait_lock);
-       INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]);
-       INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       debug_check_no_locks_freed((void *) lock, sizeof(*lock));
-       lockdep_init_map(&lock->dep_map, name, key, 0);
-#endif
-}
+struct six_lock_waiter {
+       struct list_head        list;
+       struct task_struct      *task;
+       enum six_lock_type      lock_want;
+       bool                    lock_acquired;
+       u64                     start_time;
+};
+
+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
+
+void six_lock_exit(struct six_lock *lock);
+
+enum six_lock_init_flags {
+       SIX_LOCK_INIT_PCPU      = 1U << 0,
+};
 
-#define six_lock_init(lock)                                            \
+void __six_lock_init(struct six_lock *lock, const char *name,
+                    struct lock_class_key *key, enum six_lock_init_flags flags);
+
+/**
+ * six_lock_init - initialize a six lock
+ * @lock:      lock to initialize
+ * @flags:     optional flags, i.e. SIX_LOCK_INIT_PCPU
+ */
+#define six_lock_init(lock, flags)                                     \
 do {                                                                   \
        static struct lock_class_key __key;                             \
                                                                        \
-       __six_lock_init((lock), #lock, &__key);                         \
+       __six_lock_init((lock), #lock, &__key, flags);                  \
 } while (0)
 
-#define __SIX_VAL(field, _v)   (((union six_lock_state) { .field = _v }).v)
-
-#ifdef SIX_LOCK_SEPARATE_LOCKFNS
+/**
+ * six_lock_seq - obtain current lock sequence number
+ * @lock:      six_lock to obtain sequence number for
+ *
+ * @lock should be held for read or intent, and not write
+ *
+ * By saving the lock sequence number, we can unlock @lock and then (typically
+ * after some blocking operation) attempt to relock it: the relock will succeed
+ * if the sequence number hasn't changed, meaning no write locks have been taken
+ * and state corresponding to what @lock protects is still valid.
+ */
+static inline u32 six_lock_seq(const struct six_lock *lock)
+{
+       return lock->seq;
+}
 
-#define __SIX_LOCK(type)                                               \
-bool six_trylock_##type(struct six_lock *);                            \
-bool six_relock_##type(struct six_lock *, u32);                                \
-void six_lock_##type(struct six_lock *);                               \
-void six_unlock_##type(struct six_lock *);
+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
 
-__SIX_LOCK(read)
-__SIX_LOCK(intent)
-__SIX_LOCK(write)
-#undef __SIX_LOCK
+/**
+ * six_trylock_type - attempt to take a six lock without blocking
+ * @lock:      lock to take
+ * @type:      SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ *
+ * Return: true on success, false on failure.
+ */
+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
+{
+       return six_trylock_ip(lock, type, _THIS_IP_);
+}
 
-#define SIX_LOCK_DISPATCH(type, fn, ...)                       \
-       switch (type) {                                         \
-       case SIX_LOCK_read:                                     \
-               return fn##_read(__VA_ARGS__);                  \
-       case SIX_LOCK_intent:                                   \
-               return fn##_intent(__VA_ARGS__);                \
-       case SIX_LOCK_write:                                    \
-               return fn##_write(__VA_ARGS__);                 \
-       default:                                                \
-               BUG();                                          \
-       }
+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
+                      struct six_lock_waiter *wait,
+                      six_lock_should_sleep_fn should_sleep_fn, void *p,
+                      unsigned long ip);
 
-static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
+/**
+ * six_lock_waiter - take a lock, with full waitlist interface
+ * @lock:      lock to take
+ * @type:      SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @wait:      pointer to wait object, which will be added to lock's waitlist
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ *             to scheduling
+ * @p:         passed through to @should_sleep_fn
+ *
+ * This is a convenience wrapper around six_lock_ip_waiter(), see that function
+ * for full documentation.
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
+                                 struct six_lock_waiter *wait,
+                                 six_lock_should_sleep_fn should_sleep_fn, void *p)
 {
-       SIX_LOCK_DISPATCH(type, six_trylock, lock);
+       return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
 }
 
-static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
-                    unsigned seq)
+/**
+ * six_lock_ip - take a six lock lock
+ * @lock:      lock to take
+ * @type:      SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ *             to scheduling
+ * @p:         passed through to @should_sleep_fn
+ * @ip:                ip parameter for lockdep/lockstat, i.e. _THIS_IP_
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
+                             six_lock_should_sleep_fn should_sleep_fn, void *p,
+                             unsigned long ip)
 {
-       SIX_LOCK_DISPATCH(type, six_relock, lock, seq);
+       struct six_lock_waiter wait;
+
+       return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
 }
 
-static inline void six_lock_type(struct six_lock *lock, enum six_lock_type type)
+/**
+ * six_lock_type - take a six lock lock
+ * @lock:      lock to take
+ * @type:      SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ *             to scheduling
+ * @p:         passed through to @should_sleep_fn
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
+                               six_lock_should_sleep_fn should_sleep_fn, void *p)
 {
-       SIX_LOCK_DISPATCH(type, six_lock, lock);
+       struct six_lock_waiter wait;
+
+       return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
 }
 
-static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
+                  unsigned seq, unsigned long ip);
+
+/**
+ * six_relock_type - attempt to re-take a lock that was held previously
+ * @lock:      lock to take
+ * @type:      SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @seq:       lock sequence number obtained from six_lock_seq() while lock was
+ *             held previously
+ *
+ * Return: true on success, false on failure.
+ */
+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
+                                  unsigned seq)
 {
-       SIX_LOCK_DISPATCH(type, six_unlock, lock);
+       return six_relock_ip(lock, type, seq, _THIS_IP_);
 }
 
-#else
+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
 
-bool six_trylock_type(struct six_lock *, enum six_lock_type);
-bool six_relock_type(struct six_lock *, enum six_lock_type, unsigned);
-void six_lock_type(struct six_lock *, enum six_lock_type);
-void six_unlock_type(struct six_lock *, enum six_lock_type);
+/**
+ * six_unlock_type - drop a six lock
+ * @lock:      lock to unlock
+ * @type:      SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ *
+ * When a lock is held multiple times (because six_lock_incement()) was used),
+ * this decrements the 'lock held' counter by one.
+ *
+ * For example:
+ * six_lock_read(&foo->lock);                          read count 1
+ * six_lock_increment(&foo->lock, SIX_LOCK_read);      read count 2
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read);         read count 1
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read);         read count 0
+ */
+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
+{
+       six_unlock_ip(lock, type, _THIS_IP_);
+}
 
 #define __SIX_LOCK(type)                                               \
-static __always_inline bool six_trylock_##type(struct six_lock *lock)  \
+static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
+{                                                                      \
+       return six_trylock_ip(lock, SIX_LOCK_##type, ip);               \
+}                                                                      \
+                                                                       \
+static inline bool six_trylock_##type(struct six_lock *lock)           \
+{                                                                      \
+       return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_);        \
+}                                                                      \
+                                                                       \
+static inline int six_lock_ip_waiter_##type(struct six_lock *lock,     \
+                          struct six_lock_waiter *wait,                \
+                          six_lock_should_sleep_fn should_sleep_fn, void *p,\
+                          unsigned long ip)                            \
+{                                                                      \
+       return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
+}                                                                      \
+                                                                       \
+static inline int six_lock_ip_##type(struct six_lock *lock,            \
+                   six_lock_should_sleep_fn should_sleep_fn, void *p,  \
+                   unsigned long ip)                                   \
 {                                                                      \
-       return six_trylock_type(lock, SIX_LOCK_##type);                 \
+       return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
 }                                                                      \
                                                                        \
-static __always_inline bool six_relock_##type(struct six_lock *lock, u32 seq)\
+static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
 {                                                                      \
-       return six_relock_type(lock, SIX_LOCK_##type, seq);             \
+       return six_relock_ip(lock, SIX_LOCK_##type, seq, ip);           \
 }                                                                      \
                                                                        \
-static __always_inline void six_lock_##type(struct six_lock *lock)     \
+static inline bool six_relock_##type(struct six_lock *lock, u32 seq)   \
 {                                                                      \
-       six_lock_type(lock, SIX_LOCK_##type);                           \
+       return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_);    \
 }                                                                      \
                                                                        \
-static __always_inline void six_unlock_##type(struct six_lock *lock)   \
+static inline int six_lock_##type(struct six_lock *lock,               \
+                                 six_lock_should_sleep_fn fn, void *p)\
 {                                                                      \
-       six_unlock_type(lock, SIX_LOCK_##type);                         \
+       return six_lock_ip_##type(lock, fn, p, _THIS_IP_);              \
+}                                                                      \
+                                                                       \
+static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip)       \
+{                                                                      \
+       six_unlock_ip(lock, SIX_LOCK_##type, ip);                       \
+}                                                                      \
+                                                                       \
+static inline void six_unlock_##type(struct six_lock *lock)            \
+{                                                                      \
+       six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_);                \
 }
 
 __SIX_LOCK(read)
@@ -218,8 +367,6 @@ __SIX_LOCK(intent)
 __SIX_LOCK(write)
 #undef __SIX_LOCK
 
-#endif
-
 void six_lock_downgrade(struct six_lock *);
 bool six_lock_tryupgrade(struct six_lock *);
 bool six_trylock_convert(struct six_lock *, enum six_lock_type,
@@ -227,4 +374,13 @@ bool six_trylock_convert(struct six_lock *, enum six_lock_type,
 
 void six_lock_increment(struct six_lock *, enum six_lock_type);
 
-#endif /* _BCACHEFS_SIX_H */
+void six_lock_wakeup_all(struct six_lock *);
+
+struct six_lock_count {
+       unsigned n[3];
+};
+
+struct six_lock_count six_lock_counts(struct six_lock *);
+void six_lock_readers_add(struct six_lock *, int);
+
+#endif /* _LINUX_SIX_H */