X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fsix.c;h=458a1de0a6e39c89bb6840be729a4fc3f2a52ec5;hb=9799b119c34d7be1ee96d143209cfe5fc543d92a;hp=c60a67307773327e8c5d6e4e915e4f11c43c0d83;hpb=c35fbbc025c6099969befb4dfaf065215cf40cf3;p=bcachefs-tools-debian diff --git a/libbcachefs/six.c b/libbcachefs/six.c index c60a673..458a1de 100644 --- a/libbcachefs/six.c +++ b/libbcachefs/six.c @@ -1,158 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +#include #include +#include #include #include #include +#include #include +#include +#include + +#include #include "six.h" -#define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_) -#define six_release(l) lock_release(l, 0, _RET_IP_) +#ifdef DEBUG +#define EBUG_ON(cond) BUG_ON(cond) +#else +#define EBUG_ON(cond) do {} while (0) +#endif + +#define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip) +#define six_release(l, ip) lock_release(l, ip) -#define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0) -#define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0) -#define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1) +static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type); + +#define SIX_LOCK_HELD_read_OFFSET 0 +#define SIX_LOCK_HELD_read ~(~0U << 26) +#define SIX_LOCK_HELD_intent (1U << 26) +#define SIX_LOCK_HELD_write (1U << 27) +#define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read)) +#define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write)) +#define SIX_LOCK_NOSPIN (1U << 31) struct six_lock_vals { /* Value we add to the lock in order to take the lock: */ - u64 lock_val; + u32 lock_val; /* If the lock has this value (used as a mask), taking the lock fails: */ - u64 lock_fail; - - /* Value we add to the lock in order to release the lock: */ - u64 unlock_val; + u32 lock_fail; /* Mask that indicates lock is held for this type: */ - u64 held_mask; + u32 held_mask; /* Waitlist we wakeup when releasing the lock: */ enum six_lock_type unlock_wakeup; }; -#define LOCK_VALS { \ - [SIX_LOCK_read] = { \ - .lock_val = __SIX_VAL(read_lock, 1), \ - .lock_fail = __SIX_LOCK_HELD_write, \ - .unlock_val = -__SIX_VAL(read_lock, 1), \ - .held_mask = __SIX_LOCK_HELD_read, \ - .unlock_wakeup = SIX_LOCK_write, \ - }, \ - [SIX_LOCK_intent] = { \ - .lock_val = __SIX_VAL(intent_lock, 1), \ - .lock_fail = __SIX_LOCK_HELD_intent, \ - .unlock_val = -__SIX_VAL(intent_lock, 1), \ - .held_mask = __SIX_LOCK_HELD_intent, \ - .unlock_wakeup = SIX_LOCK_intent, \ - }, \ - [SIX_LOCK_write] = { \ - .lock_val = __SIX_VAL(seq, 1), \ - .lock_fail = __SIX_LOCK_HELD_read, \ - .unlock_val = __SIX_VAL(seq, 1), \ - .held_mask = __SIX_LOCK_HELD_write, \ - .unlock_wakeup = SIX_LOCK_read, \ - }, \ -} +static const struct six_lock_vals l[] = { + [SIX_LOCK_read] = { + .lock_val = 1U << SIX_LOCK_HELD_read_OFFSET, + .lock_fail = SIX_LOCK_HELD_write, + .held_mask = SIX_LOCK_HELD_read, + .unlock_wakeup = SIX_LOCK_write, + }, + [SIX_LOCK_intent] = { + .lock_val = SIX_LOCK_HELD_intent, + .lock_fail = SIX_LOCK_HELD_intent, + .held_mask = SIX_LOCK_HELD_intent, + .unlock_wakeup = SIX_LOCK_intent, + }, + [SIX_LOCK_write] = { + .lock_val = SIX_LOCK_HELD_write, + .lock_fail = SIX_LOCK_HELD_read, + .held_mask = SIX_LOCK_HELD_write, + .unlock_wakeup = SIX_LOCK_read, + }, +}; -static void six_set_owner(struct six_lock *lock, enum six_lock_type type) +static inline void six_set_bitmask(struct six_lock *lock, u32 mask) { - if (type == SIX_LOCK_intent) - lock->owner = current; + if ((atomic_read(&lock->state) & mask) != mask) + atomic_or(mask, &lock->state); } -static void six_clear_owner(struct six_lock *lock, enum six_lock_type type) +static inline void six_clear_bitmask(struct six_lock *lock, u32 mask) { - if (type == SIX_LOCK_intent) - lock->owner = NULL; + if (atomic_read(&lock->state) & mask) + atomic_and(~mask, &lock->state); } -static inline bool __six_trylock_type(struct six_lock *lock, - enum six_lock_type type) +static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type, + u32 old, struct task_struct *owner) { - const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state old; - u64 v = READ_ONCE(lock->state.v); + if (type != SIX_LOCK_intent) + return; - do { - old.v = v; + if (!(old & SIX_LOCK_HELD_intent)) { + EBUG_ON(lock->owner); + lock->owner = owner; + } else { + EBUG_ON(lock->owner != current); + } +} - EBUG_ON(type == SIX_LOCK_write && - ((old.v & __SIX_LOCK_HELD_write) || - !(old.v & __SIX_LOCK_HELD_intent))); +static inline unsigned pcpu_read_count(struct six_lock *lock) +{ + unsigned read_count = 0; + int cpu; - if (old.v & l[type].lock_fail) - return false; - } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter, - old.v, - old.v + l[type].lock_val)) != old.v); - return true; + for_each_possible_cpu(cpu) + read_count += *per_cpu_ptr(lock->readers, cpu); + return read_count; } -bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) +/* + * __do_six_trylock() - main trylock routine + * + * Returns 1 on success, 0 on failure + * + * In percpu reader mode, a failed trylock may cause a spurious trylock failure + * for anoter thread taking the competing lock type, and we may havve to do a + * wakeup: when a wakeup is required, we return -1 - wakeup_type. + */ +static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, + struct task_struct *task, bool try) { - bool ret = __six_trylock_type(lock, type); + int ret; + u32 old; + + EBUG_ON(type == SIX_LOCK_write && lock->owner != task); + EBUG_ON(type == SIX_LOCK_write && + (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write))); + + /* + * Percpu reader mode: + * + * The basic idea behind this algorithm is that you can implement a lock + * between two threads without any atomics, just memory barriers: + * + * For two threads you'll need two variables, one variable for "thread a + * has the lock" and another for "thread b has the lock". + * + * To take the lock, a thread sets its variable indicating that it holds + * the lock, then issues a full memory barrier, then reads from the + * other thread's variable to check if the other thread thinks it has + * the lock. If we raced, we backoff and retry/sleep. + * + * Failure to take the lock may cause a spurious trylock failure in + * another thread, because we temporarily set the lock to indicate that + * we held it. This would be a problem for a thread in six_lock(), when + * they are calling trylock after adding themself to the waitlist and + * prior to sleeping. + * + * Therefore, if we fail to get the lock, and there were waiters of the + * type we conflict with, we will have to issue a wakeup. + * + * Since we may be called under wait_lock (and by the wakeup code + * itself), we return that the wakeup has to be done instead of doing it + * here. + */ + if (type == SIX_LOCK_read && lock->readers) { + preempt_disable(); + this_cpu_inc(*lock->readers); /* signal that we own lock */ + + smp_mb(); + + old = atomic_read(&lock->state); + ret = !(old & l[type].lock_fail); - if (ret) { - six_acquire(&lock->dep_map, 1); - six_set_owner(lock, type); + this_cpu_sub(*lock->readers, !ret); + preempt_enable(); + + if (!ret && (old & SIX_LOCK_WAITING_write)) + ret = -1 - SIX_LOCK_write; + } else if (type == SIX_LOCK_write && lock->readers) { + if (try) { + atomic_add(SIX_LOCK_HELD_write, &lock->state); + smp_mb__after_atomic(); + } + + ret = !pcpu_read_count(lock); + + if (try && !ret) { + old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state); + if (old & SIX_LOCK_WAITING_read) + ret = -1 - SIX_LOCK_read; + } + } else { + old = atomic_read(&lock->state); + do { + ret = !(old & l[type].lock_fail); + if (!ret || (type == SIX_LOCK_write && !try)) { + smp_mb(); + break; + } + } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val)); + + EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask)); } + if (ret > 0) + six_set_owner(lock, type, old, task); + + EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 && + (atomic_read(&lock->state) & SIX_LOCK_HELD_write)); + return ret; } -bool six_relock_type(struct six_lock *lock, enum six_lock_type type, - unsigned seq) +static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type) { - const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state old; - u64 v = READ_ONCE(lock->state.v); + struct six_lock_waiter *w, *next; + struct task_struct *task; + bool saw_one; + int ret; +again: + ret = 0; + saw_one = false; + raw_spin_lock(&lock->wait_lock); - do { - old.v = v; + list_for_each_entry_safe(w, next, &lock->wait_list, list) { + if (w->lock_want != lock_type) + continue; - if (old.seq != seq || old.v & l[type].lock_fail) - return false; - } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter, - old.v, - old.v + l[type].lock_val)) != old.v); + if (saw_one && lock_type != SIX_LOCK_read) + goto unlock; + saw_one = true; + + ret = __do_six_trylock(lock, lock_type, w->task, false); + if (ret <= 0) + goto unlock; + + /* + * Similar to percpu_rwsem_wake_function(), we need to guard + * against the wakee noticing w->lock_acquired, returning, and + * then exiting before we do the wakeup: + */ + task = get_task_struct(w->task); + __list_del(w->list.prev, w->list.next); + /* + * The release barrier here ensures the ordering of the + * __list_del before setting w->lock_acquired; @w is on the + * stack of the thread doing the waiting and will be reused + * after it sees w->lock_acquired with no other locking: + * pairs with smp_load_acquire() in six_lock_slowpath() + */ + smp_store_release(&w->lock_acquired, true); + wake_up_process(task); + put_task_struct(task); + } + + six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type); +unlock: + raw_spin_unlock(&lock->wait_lock); + + if (ret < 0) { + lock_type = -ret - 1; + goto again; + } +} - six_acquire(&lock->dep_map, 1); - six_set_owner(lock, type); +__always_inline +static void six_lock_wakeup(struct six_lock *lock, u32 state, + enum six_lock_type lock_type) +{ + if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read)) + return; + + if (!(state & (SIX_LOCK_WAITING_read << lock_type))) + return; + + __six_lock_wakeup(lock, lock_type); +} + +__always_inline +static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try) +{ + int ret; + + ret = __do_six_trylock(lock, type, current, try); + if (ret < 0) + __six_lock_wakeup(lock, -ret - 1); + + return ret > 0; +} + +/** + * six_trylock_ip - attempt to take a six lock without blocking + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ + * + * Return: true on success, false on failure. + */ +bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) +{ + if (!do_six_trylock(lock, type, true)) + return false; + + if (type != SIX_LOCK_write) + six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip); return true; } +EXPORT_SYMBOL_GPL(six_trylock_ip); + +/** + * six_relock_ip - attempt to re-take a lock that was held previously + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @seq: lock sequence number obtained from six_lock_seq() while lock was + * held previously + * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ + * + * Return: true on success, false on failure. + */ +bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, + unsigned seq, unsigned long ip) +{ + if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip)) + return false; -struct six_lock_waiter { - struct list_head list; - struct task_struct *task; -}; + if (six_lock_seq(lock) != seq) { + six_unlock_ip(lock, type, ip); + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(six_relock_ip); -/* This is probably up there with the more evil things I've done */ -#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l)) +#ifdef CONFIG_LOCK_SPIN_ON_OWNER -static inline int six_can_spin_on_owner(struct six_lock *lock) +static inline bool six_can_spin_on_owner(struct six_lock *lock) { struct task_struct *owner; - int retval = 1; + bool ret; if (need_resched()) - return 0; + return false; rcu_read_lock(); owner = READ_ONCE(lock->owner); - if (owner) - retval = owner->on_cpu; + ret = !owner || owner_on_cpu(owner); rcu_read_unlock(); - /* - * if lock->owner is not set, the mutex owner may have just acquired - * it and not set the owner yet or the mutex has been released. - */ - return retval; + + return ret; } -static bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner) +static inline bool six_spin_on_owner(struct six_lock *lock, + struct task_struct *owner, + u64 end_time) { bool ret = true; + unsigned loop = 0; rcu_read_lock(); while (lock->owner == owner) { @@ -164,7 +356,13 @@ static bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner) */ barrier(); - if (!owner->on_cpu || need_resched()) { + if (!owner_on_cpu(owner) || need_resched()) { + ret = false; + break; + } + + if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { + six_set_bitmask(lock, SIX_LOCK_NOSPIN); ret = false; break; } @@ -176,9 +374,10 @@ static bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner) return ret; } -static bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) +static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) { struct task_struct *task = current; + u64 end_time; if (type == SIX_LOCK_write) return false; @@ -190,6 +389,8 @@ static bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) if (!osq_lock(&lock->osq)) goto fail; + end_time = sched_clock() + 10 * NSEC_PER_USEC; + while (1) { struct task_struct *owner; @@ -198,10 +399,10 @@ static bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) * release the lock or go to sleep. */ owner = READ_ONCE(lock->owner); - if (owner && !six_spin_on_owner(lock, owner)) + if (owner && !six_spin_on_owner(lock, owner, end_time)) break; - if (__six_trylock_type(lock, type)) { + if (do_six_trylock(lock, type, false)) { osq_unlock(&lock->osq); preempt_enable(); return true; @@ -240,160 +441,477 @@ fail: return false; } -void six_lock_type(struct six_lock *lock, enum six_lock_type type) +#else /* CONFIG_LOCK_SPIN_ON_OWNER */ + +static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) +{ + return false; +} + +#endif + +noinline +static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, + struct six_lock_waiter *wait, + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip) { - const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state old, new; - struct six_lock_waiter wait; - u64 v; + int ret = 0; - six_acquire(&lock->dep_map, 0); + if (type == SIX_LOCK_write) { + EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write); + atomic_add(SIX_LOCK_HELD_write, &lock->state); + smp_mb__after_atomic(); + } - if (__six_trylock_type(lock, type)) - goto done; + trace_contention_begin(lock, 0); + lock_contended(&lock->dep_map, ip); if (six_optimistic_spin(lock, type)) - goto done; + goto out; + + wait->task = current; + wait->lock_want = type; + wait->lock_acquired = false; - lock_contended(&lock->dep_map, _RET_IP_); + raw_spin_lock(&lock->wait_lock); + six_set_bitmask(lock, SIX_LOCK_WAITING_read << type); + /* + * Retry taking the lock after taking waitlist lock, in case we raced + * with an unlock: + */ + ret = __do_six_trylock(lock, type, current, false); + if (ret <= 0) { + wait->start_time = local_clock(); - INIT_LIST_HEAD(&wait.list); - wait.task = current; + if (!list_empty(&lock->wait_list)) { + struct six_lock_waiter *last = + list_last_entry(&lock->wait_list, + struct six_lock_waiter, list); + + if (time_before_eq64(wait->start_time, last->start_time)) + wait->start_time = last->start_time + 1; + } + + list_add_tail(&wait->list, &lock->wait_list); + } + raw_spin_unlock(&lock->wait_lock); + + if (unlikely(ret > 0)) { + ret = 0; + goto out; + } + + if (unlikely(ret < 0)) { + __six_lock_wakeup(lock, -ret - 1); + ret = 0; + } while (1) { set_current_state(TASK_UNINTERRUPTIBLE); - if (list_empty_careful(&wait.list)) { + + /* + * Ensures that writes to the waitlist entry happen after we see + * wait->lock_acquired: pairs with the smp_store_release in + * __six_lock_wakeup + */ + if (smp_load_acquire(&wait->lock_acquired)) + break; + + ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0; + if (unlikely(ret)) { + bool acquired; + + /* + * If should_sleep_fn() returns an error, we are + * required to return that error even if we already + * acquired the lock - should_sleep_fn() might have + * modified external state (e.g. when the deadlock cycle + * detector in bcachefs issued a transaction restart) + */ raw_spin_lock(&lock->wait_lock); - list_add_tail(&wait.list, &lock->wait_list[type]); + acquired = wait->lock_acquired; + if (!acquired) + list_del(&wait->list); raw_spin_unlock(&lock->wait_lock); - } - v = READ_ONCE(lock->state.v); - do { - new.v = old.v = v; - - if (!(old.v & l[type].lock_fail)) - new.v += l[type].lock_val; - else if (!(new.waiters & (1 << type))) - new.waiters |= 1 << type; - else - break; /* waiting bit already set */ - } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter, - old.v, new.v)) != old.v); - - if (!(old.v & l[type].lock_fail)) + if (unlikely(acquired)) + do_six_unlock_type(lock, type); break; + } schedule(); } __set_current_state(TASK_RUNNING); - - if (!list_empty_careful(&wait.list)) { - raw_spin_lock(&lock->wait_lock); - list_del_init(&wait.list); - raw_spin_unlock(&lock->wait_lock); +out: + if (ret && type == SIX_LOCK_write) { + six_clear_bitmask(lock, SIX_LOCK_HELD_write); + six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); } -done: - lock_acquired(&lock->dep_map, _RET_IP_); - six_set_owner(lock, type); + trace_contention_end(lock, 0); + + return ret; } -static inline void six_lock_wakeup(struct six_lock *lock, - union six_lock_state state, - unsigned waitlist_id) +/** + * six_lock_ip_waiter - take a lock, with full waitlist interface + * @lock: lock to take + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @wait: pointer to wait object, which will be added to lock's waitlist + * @should_sleep_fn: callback run after adding to waitlist, immediately prior + * to scheduling + * @p: passed through to @should_sleep_fn + * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ + * + * This is the most general six_lock() variant, with parameters to support full + * cycle detection for deadlock avoidance. + * + * The code calling this function must implement tracking of held locks, and the + * @wait object should be embedded into the struct that tracks held locks - + * which must also be accessible in a thread-safe way. + * + * @should_sleep_fn should invoke the cycle detector; it should walk each + * lock's waiters, and for each waiter recursively walk their held locks. + * + * When this function must block, @wait will be added to @lock's waitlist before + * calling trylock, and before calling @should_sleep_fn, and @wait will not be + * removed from the lock waitlist until the lock has been successfully acquired, + * or we abort. + * + * @wait.start_time will be monotonically increasing for any given waitlist, and + * thus may be used as a loop cursor. + * + * Return: 0 on success, or the return code from @should_sleep_fn on failure. + */ +int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, + struct six_lock_waiter *wait, + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip) { - struct list_head *wait_list = &lock->wait_list[waitlist_id]; - struct six_lock_waiter *w, *next; + int ret; - if (waitlist_id == SIX_LOCK_write && state.read_lock) - return; + wait->start_time = 0; - if (!(state.waiters & (1 << waitlist_id))) - return; + if (type != SIX_LOCK_write) + six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip); - clear_bit(waitlist_bitnr(waitlist_id), - (unsigned long *) &lock->state.v); + ret = do_six_trylock(lock, type, true) ? 0 + : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip); - raw_spin_lock(&lock->wait_lock); + if (ret && type != SIX_LOCK_write) + six_release(&lock->dep_map, ip); + if (!ret) + lock_acquired(&lock->dep_map, ip); - list_for_each_entry_safe(w, next, wait_list, list) { - list_del_init(&w->list); + return ret; +} +EXPORT_SYMBOL_GPL(six_lock_ip_waiter); - if (wake_up_process(w->task) && - waitlist_id != SIX_LOCK_read) { - if (!list_empty(wait_list)) - set_bit(waitlist_bitnr(waitlist_id), - (unsigned long *) &lock->state.v); - break; - } +__always_inline +static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) +{ + u32 state; + + if (type == SIX_LOCK_intent) + lock->owner = NULL; + + if (type == SIX_LOCK_read && + lock->readers) { + smp_mb(); /* unlock barrier */ + this_cpu_dec(*lock->readers); + smp_mb(); /* between unlocking and checking for waiters */ + state = atomic_read(&lock->state); + } else { + u32 v = l[type].lock_val; + + if (type != SIX_LOCK_read) + v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN; + + EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask)); + state = atomic_sub_return_release(v, &lock->state); } - raw_spin_unlock(&lock->wait_lock); + six_lock_wakeup(lock, state, l[type].unlock_wakeup); } -void six_unlock_type(struct six_lock *lock, enum six_lock_type type) +/** + * six_unlock_ip - drop a six lock + * @lock: lock to unlock + * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write + * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ + * + * When a lock is held multiple times (because six_lock_incement()) was used), + * this decrements the 'lock held' counter by one. + * + * For example: + * six_lock_read(&foo->lock); read count 1 + * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 + * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 + * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 + */ +void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) { - const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state state; - - six_clear_owner(lock, type); - - EBUG_ON(!(lock->state.v & l[type].held_mask)); EBUG_ON(type == SIX_LOCK_write && - !(lock->state.v & __SIX_LOCK_HELD_intent)); + !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); + EBUG_ON((type == SIX_LOCK_write || + type == SIX_LOCK_intent) && + lock->owner != current); + + if (type != SIX_LOCK_write) + six_release(&lock->dep_map, ip); + else + lock->seq++; + + if (type == SIX_LOCK_intent && + lock->intent_lock_recurse) { + --lock->intent_lock_recurse; + return; + } - state.v = atomic64_add_return_release(l[type].unlock_val, - &lock->state.counter); - six_release(&lock->dep_map); - six_lock_wakeup(lock, state, l[type].unlock_wakeup); + do_six_unlock_type(lock, type); } +EXPORT_SYMBOL_GPL(six_unlock_ip); -bool six_trylock_convert(struct six_lock *lock, - enum six_lock_type from, - enum six_lock_type to) +/** + * six_lock_downgrade - convert an intent lock to a read lock + * @lock: lock to dowgrade + * + * @lock will have read count incremented and intent count decremented + */ +void six_lock_downgrade(struct six_lock *lock) { - const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state old, new; - u64 v = READ_ONCE(lock->state.v); + six_lock_increment(lock, SIX_LOCK_read); + six_unlock_intent(lock); +} +EXPORT_SYMBOL_GPL(six_lock_downgrade); + +/** + * six_lock_tryupgrade - attempt to convert read lock to an intent lock + * @lock: lock to upgrade + * + * On success, @lock will have intent count incremented and read count + * decremented + * + * Return: true on success, false on failure + */ +bool six_lock_tryupgrade(struct six_lock *lock) +{ + u32 old = atomic_read(&lock->state), new; do { - new.v = old.v = v; - new.v += l[from].unlock_val; + new = old; - if (new.v & l[to].lock_fail) + if (new & SIX_LOCK_HELD_intent) return false; - } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter, - old.v, - new.v + l[to].lock_val)) != old.v); - six_clear_owner(lock, from); - six_set_owner(lock, to); + if (!lock->readers) { + EBUG_ON(!(new & SIX_LOCK_HELD_read)); + new -= l[SIX_LOCK_read].lock_val; + } + + new |= SIX_LOCK_HELD_intent; + } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new)); + + if (lock->readers) + this_cpu_dec(*lock->readers); - six_lock_wakeup(lock, new, l[from].unlock_wakeup); + six_set_owner(lock, SIX_LOCK_intent, old, current); return true; } +EXPORT_SYMBOL_GPL(six_lock_tryupgrade); + +/** + * six_trylock_convert - attempt to convert a held lock from one type to another + * @lock: lock to upgrade + * @from: SIX_LOCK_read or SIX_LOCK_intent + * @to: SIX_LOCK_read or SIX_LOCK_intent + * + * On success, @lock will have intent count incremented and read count + * decremented + * + * Return: true on success, false on failure + */ +bool six_trylock_convert(struct six_lock *lock, + enum six_lock_type from, + enum six_lock_type to) +{ + EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write); -/* - * Increment read/intent lock count, assuming we already have it read or intent - * locked: + if (to == from) + return true; + + if (to == SIX_LOCK_read) { + six_lock_downgrade(lock); + return true; + } else { + return six_lock_tryupgrade(lock); + } +} +EXPORT_SYMBOL_GPL(six_trylock_convert); + +/** + * six_lock_increment - increase held lock count on a lock that is already held + * @lock: lock to increment + * @type: SIX_LOCK_read or SIX_LOCK_intent + * + * @lock must already be held, with a lock type that is greater than or equal to + * @type + * + * A corresponding six_unlock_type() call will be required for @lock to be fully + * unlocked. */ void six_lock_increment(struct six_lock *lock, enum six_lock_type type) { - const struct six_lock_vals l[] = LOCK_VALS; - - EBUG_ON(type == SIX_LOCK_write); - six_acquire(&lock->dep_map, 0); + six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_); /* XXX: assert already locked, and that we don't overflow: */ - atomic64_add(l[type].lock_val, &lock->state.counter); + switch (type) { + case SIX_LOCK_read: + if (lock->readers) { + this_cpu_inc(*lock->readers); + } else { + EBUG_ON(!(atomic_read(&lock->state) & + (SIX_LOCK_HELD_read| + SIX_LOCK_HELD_intent))); + atomic_add(l[type].lock_val, &lock->state); + } + break; + case SIX_LOCK_intent: + EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); + lock->intent_lock_recurse++; + break; + case SIX_LOCK_write: + BUG(); + break; + } } +EXPORT_SYMBOL_GPL(six_lock_increment); + +/** + * six_lock_wakeup_all - wake up all waiters on @lock + * @lock: lock to wake up waiters for + * + * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then + * abort the lock operation. + * + * This function is never needed in a bug-free program; it's only useful in + * debug code, e.g. to determine if a cycle detector is at fault. + */ +void six_lock_wakeup_all(struct six_lock *lock) +{ + u32 state = atomic_read(&lock->state); + struct six_lock_waiter *w; -/* Convert from intent to read: */ -void six_lock_downgrade(struct six_lock *lock) + six_lock_wakeup(lock, state, SIX_LOCK_read); + six_lock_wakeup(lock, state, SIX_LOCK_intent); + six_lock_wakeup(lock, state, SIX_LOCK_write); + + raw_spin_lock(&lock->wait_lock); + list_for_each_entry(w, &lock->wait_list, list) + wake_up_process(w->task); + raw_spin_unlock(&lock->wait_lock); +} +EXPORT_SYMBOL_GPL(six_lock_wakeup_all); + +/** + * six_lock_counts - return held lock counts, for each lock type + * @lock: lock to return counters for + * + * Return: the number of times a lock is held for read, intent and write. + */ +struct six_lock_count six_lock_counts(struct six_lock *lock) { - six_lock_increment(lock, SIX_LOCK_read); - six_unlock_intent(lock); + struct six_lock_count ret; + + ret.n[SIX_LOCK_read] = !lock->readers + ? atomic_read(&lock->state) & SIX_LOCK_HELD_read + : pcpu_read_count(lock); + ret.n[SIX_LOCK_intent] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) + + lock->intent_lock_recurse; + ret.n[SIX_LOCK_write] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write); + + return ret; +} +EXPORT_SYMBOL_GPL(six_lock_counts); + +/** + * six_lock_readers_add - directly manipulate reader count of a lock + * @lock: lock to add/subtract readers for + * @nr: reader count to add/subtract + * + * When an upper layer is implementing lock reentrency, we may have both read + * and intent locks on the same lock. + * + * When we need to take a write lock, the read locks will cause self-deadlock, + * because six locks themselves do not track which read locks are held by the + * current thread and which are held by a different thread - it does no + * per-thread tracking of held locks. + * + * The upper layer that is tracking held locks may however, if trylock() has + * failed, count up its own read locks, subtract them, take the write lock, and + * then re-add them. + * + * As in any other situation when taking a write lock, @lock must be held for + * intent one (or more) times, so @lock will never be left unlocked. + */ +void six_lock_readers_add(struct six_lock *lock, int nr) +{ + if (lock->readers) { + this_cpu_add(*lock->readers, nr); + } else { + EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0); + /* reader count starts at bit 0 */ + atomic_add(nr, &lock->state); + } +} +EXPORT_SYMBOL_GPL(six_lock_readers_add); + +/** + * six_lock_exit - release resources held by a lock prior to freeing + * @lock: lock to exit + * + * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is + * required to free the percpu read counts. + */ +void six_lock_exit(struct six_lock *lock) +{ + WARN_ON(lock->readers && pcpu_read_count(lock)); + WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read); + + free_percpu(lock->readers); + lock->readers = NULL; +} +EXPORT_SYMBOL_GPL(six_lock_exit); + +void __six_lock_init(struct six_lock *lock, const char *name, + struct lock_class_key *key, enum six_lock_init_flags flags) +{ + atomic_set(&lock->state, 0); + raw_spin_lock_init(&lock->wait_lock); + INIT_LIST_HEAD(&lock->wait_list); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + debug_check_no_locks_freed((void *) lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + + /* + * Don't assume that we have real percpu variables available in + * userspace: + */ +#ifdef __KERNEL__ + if (flags & SIX_LOCK_INIT_PCPU) { + /* + * We don't return an error here on memory allocation failure + * since percpu is an optimization, and locks will work with the + * same semantics in non-percpu mode: callers can check for + * failure if they wish by checking lock->readers, but generally + * will not want to treat it as an error. + */ + lock->readers = alloc_percpu(unsigned); + } +#endif } +EXPORT_SYMBOL_GPL(__six_lock_init);