1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/six.h>
12 #include <linux/slab.h>
14 #include <trace/events/lock.h>
17 #define EBUG_ON(cond) BUG_ON(cond)
19 #define EBUG_ON(cond) do {} while (0)
22 #define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip)
23 #define six_release(l, ip) lock_release(l, ip)
25 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
27 struct six_lock_vals {
28 /* Value we add to the lock in order to take the lock: */
31 /* If the lock has this value (used as a mask), taking the lock fails: */
34 /* Value we add to the lock in order to release the lock: */
37 /* Mask that indicates lock is held for this type: */
40 /* Waitlist we wakeup when releasing the lock: */
41 enum six_lock_type unlock_wakeup;
44 #define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0)
45 #define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
46 #define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1)
50 .lock_val = __SIX_VAL(read_lock, 1), \
51 .lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\
52 .unlock_val = -__SIX_VAL(read_lock, 1), \
53 .held_mask = __SIX_LOCK_HELD_read, \
54 .unlock_wakeup = SIX_LOCK_write, \
56 [SIX_LOCK_intent] = { \
57 .lock_val = __SIX_VAL(intent_lock, 1), \
58 .lock_fail = __SIX_LOCK_HELD_intent, \
59 .unlock_val = -__SIX_VAL(intent_lock, 1), \
60 .held_mask = __SIX_LOCK_HELD_intent, \
61 .unlock_wakeup = SIX_LOCK_intent, \
63 [SIX_LOCK_write] = { \
64 .lock_val = __SIX_VAL(seq, 1), \
65 .lock_fail = __SIX_LOCK_HELD_read, \
66 .unlock_val = __SIX_VAL(seq, 1), \
67 .held_mask = __SIX_LOCK_HELD_write, \
68 .unlock_wakeup = SIX_LOCK_read, \
72 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
73 union six_lock_state old,
74 struct task_struct *owner)
76 if (type != SIX_LOCK_intent)
79 if (!old.intent_lock) {
83 EBUG_ON(lock->owner != current);
87 static inline unsigned pcpu_read_count(struct six_lock *lock)
89 unsigned read_count = 0;
92 for_each_possible_cpu(cpu)
93 read_count += *per_cpu_ptr(lock->readers, cpu);
97 /* This is probably up there with the more evil things I've done */
98 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
100 static int __do_six_trylock_type(struct six_lock *lock,
101 enum six_lock_type type,
102 struct task_struct *task,
105 const struct six_lock_vals l[] = LOCK_VALS;
106 union six_lock_state old, new;
110 EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
111 EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
112 EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
115 * Percpu reader mode:
117 * The basic idea behind this algorithm is that you can implement a lock
118 * between two threads without any atomics, just memory barriers:
120 * For two threads you'll need two variables, one variable for "thread a
121 * has the lock" and another for "thread b has the lock".
123 * To take the lock, a thread sets its variable indicating that it holds
124 * the lock, then issues a full memory barrier, then reads from the
125 * other thread's variable to check if the other thread thinks it has
126 * the lock. If we raced, we backoff and retry/sleep.
129 if (type == SIX_LOCK_read && lock->readers) {
131 this_cpu_inc(*lock->readers); /* signal that we own lock */
135 old.v = READ_ONCE(lock->state.v);
136 ret = !(old.v & l[type].lock_fail);
138 this_cpu_sub(*lock->readers, !ret);
142 * If we failed because a writer was trying to take the
143 * lock, issue a wakeup because we might have caused a
144 * spurious trylock failure:
146 if (old.write_locking)
147 ret = -1 - SIX_LOCK_write;
148 } else if (type == SIX_LOCK_write && lock->readers) {
150 atomic64_add(__SIX_VAL(write_locking, 1),
151 &lock->state.counter);
152 smp_mb__after_atomic();
153 } else if (!(lock->state.waiters & (1 << SIX_LOCK_write))) {
154 atomic64_add(__SIX_VAL(waiters, 1 << SIX_LOCK_write),
155 &lock->state.counter);
157 * pairs with barrier after unlock and before checking
158 * for readers in unlock path
160 smp_mb__after_atomic();
163 ret = !pcpu_read_count(lock);
166 * On success, we increment lock->seq; also we clear
167 * write_locking unless we failed from the lock path:
171 v += __SIX_VAL(seq, 1);
173 v -= __SIX_VAL(write_locking, 1);
176 old.v = atomic64_add_return(v, &lock->state.counter);
177 if (old.waiters & (1 << SIX_LOCK_read))
178 ret = -1 - SIX_LOCK_read;
180 atomic64_add(v, &lock->state.counter);
183 v = READ_ONCE(lock->state.v);
187 if (!(old.v & l[type].lock_fail)) {
188 new.v += l[type].lock_val;
190 if (type == SIX_LOCK_write)
191 new.write_locking = 0;
192 } else if (!try && !(new.waiters & (1 << type)))
193 new.waiters |= 1 << type;
195 break; /* waiting bit already set */
196 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
197 old.v, new.v)) != old.v);
199 ret = !(old.v & l[type].lock_fail);
201 EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
205 six_set_owner(lock, type, old, task);
207 EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking));
212 static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
214 struct six_lock_waiter *w, *next;
215 struct task_struct *task;
221 raw_spin_lock(&lock->wait_lock);
223 list_for_each_entry_safe(w, next, &lock->wait_list, list) {
224 if (w->lock_want != lock_type)
227 if (saw_one && lock_type != SIX_LOCK_read)
231 ret = __do_six_trylock_type(lock, lock_type, w->task, false);
235 __list_del(w->list.prev, w->list.next);
238 * Do no writes to @w besides setting lock_acquired - otherwise
239 * we would need a memory barrier:
242 w->lock_acquired = true;
243 wake_up_process(task);
246 clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
248 raw_spin_unlock(&lock->wait_lock);
251 lock_type = -ret - 1;
256 static inline void six_lock_wakeup(struct six_lock *lock,
257 union six_lock_state state,
258 enum six_lock_type lock_type)
260 if (lock_type == SIX_LOCK_write && state.read_lock)
263 if (!(state.waiters & (1 << lock_type)))
266 __six_lock_wakeup(lock, lock_type);
269 static bool do_six_trylock_type(struct six_lock *lock,
270 enum six_lock_type type,
275 ret = __do_six_trylock_type(lock, type, current, try);
277 __six_lock_wakeup(lock, -ret - 1);
282 __always_inline __flatten
283 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type,
286 if (!do_six_trylock_type(lock, type, true))
289 if (type != SIX_LOCK_write)
290 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
294 __always_inline __flatten
295 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
296 unsigned seq, unsigned long ip)
298 const struct six_lock_vals l[] = LOCK_VALS;
299 union six_lock_state old;
302 EBUG_ON(type == SIX_LOCK_write);
304 if (type == SIX_LOCK_read &&
309 this_cpu_inc(*lock->readers);
313 old.v = READ_ONCE(lock->state.v);
314 ret = !(old.v & l[type].lock_fail) && old.seq == seq;
316 this_cpu_sub(*lock->readers, !ret);
320 * Similar to the lock path, we may have caused a spurious write
321 * lock fail and need to issue a wakeup:
323 if (old.write_locking)
324 six_lock_wakeup(lock, old, SIX_LOCK_write);
327 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
332 v = READ_ONCE(lock->state.v);
336 if (old.seq != seq || old.v & l[type].lock_fail)
338 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
340 old.v + l[type].lock_val)) != old.v);
342 six_set_owner(lock, type, old, current);
343 if (type != SIX_LOCK_write)
344 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
348 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
350 static inline bool six_can_spin_on_owner(struct six_lock *lock)
352 struct task_struct *owner;
359 owner = READ_ONCE(lock->owner);
360 ret = !owner || owner_on_cpu(owner);
366 static inline void six_set_nospin(struct six_lock *lock)
368 union six_lock_state old, new;
369 u64 v = READ_ONCE(lock->state.v);
374 } while ((v = atomic64_cmpxchg(&lock->state.counter, old.v, new.v)) != old.v);
377 static inline bool six_spin_on_owner(struct six_lock *lock,
378 struct task_struct *owner,
385 while (lock->owner == owner) {
387 * Ensure we emit the owner->on_cpu, dereference _after_
388 * checking lock->owner still matches owner. If that fails,
389 * owner might point to freed memory. If it still matches,
390 * the rcu_read_lock() ensures the memory stays valid.
394 if (!owner_on_cpu(owner) || need_resched()) {
399 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
400 six_set_nospin(lock);
412 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
414 struct task_struct *task = current;
417 if (type == SIX_LOCK_write)
421 if (!six_can_spin_on_owner(lock))
424 if (!osq_lock(&lock->osq))
427 end_time = sched_clock() + 10 * NSEC_PER_USEC;
430 struct task_struct *owner;
433 * If there's an owner, wait for it to either
434 * release the lock or go to sleep.
436 owner = READ_ONCE(lock->owner);
437 if (owner && !six_spin_on_owner(lock, owner, end_time))
440 if (do_six_trylock_type(lock, type, false)) {
441 osq_unlock(&lock->osq);
447 * When there's no owner, we might have preempted between the
448 * owner acquiring the lock and setting the owner field. If
449 * we're an RT task that will live-lock because we won't let
450 * the owner complete.
452 if (!owner && (need_resched() || rt_task(task)))
456 * The cpu_relax() call is a compiler barrier which forces
457 * everything in this loop to be re-loaded. We don't need
458 * memory barriers as we'll eventually observe the right
459 * values at the cost of a few extra spins.
464 osq_unlock(&lock->osq);
469 * If we fell out of the spin path because of need_resched(),
470 * reschedule now, before we try-lock again. This avoids getting
471 * scheduled out right after we obtained the lock.
479 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
481 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
489 static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
490 struct six_lock_waiter *wait,
491 six_lock_should_sleep_fn should_sleep_fn, void *p,
494 union six_lock_state old;
497 if (type == SIX_LOCK_write) {
498 EBUG_ON(lock->state.write_locking);
499 atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter);
500 smp_mb__after_atomic();
503 trace_contention_begin(lock, 0);
504 lock_contended(&lock->dep_map, ip);
506 if (six_optimistic_spin(lock, type))
509 wait->task = current;
510 wait->lock_want = type;
511 wait->lock_acquired = false;
513 raw_spin_lock(&lock->wait_lock);
514 if (!(lock->state.waiters & (1 << type)))
515 set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v);
517 * Retry taking the lock after taking waitlist lock, have raced with an
520 ret = __do_six_trylock_type(lock, type, current, false);
522 wait->start_time = local_clock();
524 if (!list_empty(&lock->wait_list)) {
525 struct six_lock_waiter *last =
526 list_last_entry(&lock->wait_list,
527 struct six_lock_waiter, list);
529 if (time_before_eq64(wait->start_time, last->start_time))
530 wait->start_time = last->start_time + 1;
533 list_add_tail(&wait->list, &lock->wait_list);
535 raw_spin_unlock(&lock->wait_lock);
537 if (unlikely(ret > 0)) {
542 if (unlikely(ret < 0)) {
543 __six_lock_wakeup(lock, -ret - 1);
548 set_current_state(TASK_UNINTERRUPTIBLE);
550 if (wait->lock_acquired)
553 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
555 raw_spin_lock(&lock->wait_lock);
556 if (!wait->lock_acquired)
557 list_del(&wait->list);
558 raw_spin_unlock(&lock->wait_lock);
560 if (wait->lock_acquired)
561 do_six_unlock_type(lock, type);
568 __set_current_state(TASK_RUNNING);
570 if (ret && type == SIX_LOCK_write && lock->state.write_locking) {
571 old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
572 &lock->state.counter);
573 six_lock_wakeup(lock, old, SIX_LOCK_read);
575 trace_contention_end(lock, 0);
580 __always_inline __flatten
581 static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
582 struct six_lock_waiter *wait,
583 six_lock_should_sleep_fn should_sleep_fn, void *p,
588 wait->start_time = 0;
590 if (type != SIX_LOCK_write)
591 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
593 ret = do_six_trylock_type(lock, type, true) ? 0
594 : __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p, ip);
596 if (ret && type != SIX_LOCK_write)
597 six_release(&lock->dep_map, ip);
599 lock_acquired(&lock->dep_map, ip);
605 static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
606 six_lock_should_sleep_fn should_sleep_fn, void *p,
609 struct six_lock_waiter wait;
611 return __six_lock_type_waiter(lock, type, &wait, should_sleep_fn, p, ip);
614 __always_inline __flatten
615 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
617 const struct six_lock_vals l[] = LOCK_VALS;
618 union six_lock_state state;
620 if (type == SIX_LOCK_intent)
623 if (type == SIX_LOCK_read &&
625 smp_mb(); /* unlock barrier */
626 this_cpu_dec(*lock->readers);
627 smp_mb(); /* between unlocking and checking for waiters */
628 state.v = READ_ONCE(lock->state.v);
630 u64 v = l[type].unlock_val;
632 if (type != SIX_LOCK_read)
633 v -= lock->state.v & __SIX_VAL(nospin, 1);
635 EBUG_ON(!(lock->state.v & l[type].held_mask));
636 state.v = atomic64_add_return_release(v, &lock->state.counter);
639 six_lock_wakeup(lock, state, l[type].unlock_wakeup);
642 __always_inline __flatten
643 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type,
646 EBUG_ON(type == SIX_LOCK_write &&
647 !(lock->state.v & __SIX_LOCK_HELD_intent));
648 EBUG_ON((type == SIX_LOCK_write ||
649 type == SIX_LOCK_intent) &&
650 lock->owner != current);
652 if (type != SIX_LOCK_write)
653 six_release(&lock->dep_map, ip);
655 if (type == SIX_LOCK_intent &&
656 lock->intent_lock_recurse) {
657 --lock->intent_lock_recurse;
661 do_six_unlock_type(lock, type);
664 #define __SIX_LOCK(type) \
665 bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip) \
667 return __six_trylock_type(lock, SIX_LOCK_##type, ip); \
669 EXPORT_SYMBOL_GPL(six_trylock_ip_##type); \
671 bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
673 return __six_relock_type(lock, SIX_LOCK_##type, seq, ip); \
675 EXPORT_SYMBOL_GPL(six_relock_ip_##type); \
677 int six_lock_ip_##type(struct six_lock *lock, \
678 six_lock_should_sleep_fn should_sleep_fn, void *p, \
681 return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
683 EXPORT_SYMBOL_GPL(six_lock_ip_##type); \
685 int six_lock_ip_waiter_##type(struct six_lock *lock, \
686 struct six_lock_waiter *wait, \
687 six_lock_should_sleep_fn should_sleep_fn, void *p,\
690 return __six_lock_type_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
692 EXPORT_SYMBOL_GPL(six_lock_ip_waiter_##type); \
694 void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
696 __six_unlock_type(lock, SIX_LOCK_##type, ip); \
698 EXPORT_SYMBOL_GPL(six_unlock_ip_##type);
706 /* Convert from intent to read: */
707 void six_lock_downgrade(struct six_lock *lock)
709 six_lock_increment(lock, SIX_LOCK_read);
710 six_unlock_intent(lock);
712 EXPORT_SYMBOL_GPL(six_lock_downgrade);
714 bool six_lock_tryupgrade(struct six_lock *lock)
716 union six_lock_state old, new;
717 u64 v = READ_ONCE(lock->state.v);
725 if (!lock->readers) {
726 EBUG_ON(!new.read_lock);
731 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
732 old.v, new.v)) != old.v);
735 this_cpu_dec(*lock->readers);
737 six_set_owner(lock, SIX_LOCK_intent, old, current);
741 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
743 bool six_trylock_convert(struct six_lock *lock,
744 enum six_lock_type from,
745 enum six_lock_type to)
747 EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
752 if (to == SIX_LOCK_read) {
753 six_lock_downgrade(lock);
756 return six_lock_tryupgrade(lock);
759 EXPORT_SYMBOL_GPL(six_trylock_convert);
762 * Increment read/intent lock count, assuming we already have it read or intent
765 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
767 const struct six_lock_vals l[] = LOCK_VALS;
769 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
771 /* XXX: assert already locked, and that we don't overflow: */
776 this_cpu_inc(*lock->readers);
778 EBUG_ON(!lock->state.read_lock &&
779 !lock->state.intent_lock);
780 atomic64_add(l[type].lock_val, &lock->state.counter);
783 case SIX_LOCK_intent:
784 EBUG_ON(!lock->state.intent_lock);
785 lock->intent_lock_recurse++;
792 EXPORT_SYMBOL_GPL(six_lock_increment);
794 void six_lock_wakeup_all(struct six_lock *lock)
796 union six_lock_state state = lock->state;
797 struct six_lock_waiter *w;
799 six_lock_wakeup(lock, state, SIX_LOCK_read);
800 six_lock_wakeup(lock, state, SIX_LOCK_intent);
801 six_lock_wakeup(lock, state, SIX_LOCK_write);
803 raw_spin_lock(&lock->wait_lock);
804 list_for_each_entry(w, &lock->wait_list, list)
805 wake_up_process(w->task);
806 raw_spin_unlock(&lock->wait_lock);
808 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
810 void six_lock_pcpu_free(struct six_lock *lock)
812 BUG_ON(lock->readers && pcpu_read_count(lock));
813 BUG_ON(lock->state.read_lock);
815 free_percpu(lock->readers);
816 lock->readers = NULL;
818 EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
820 void six_lock_pcpu_alloc(struct six_lock *lock)
824 lock->readers = alloc_percpu(unsigned);
827 EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);
830 * Returns lock held counts, for both read and intent
832 struct six_lock_count six_lock_counts(struct six_lock *lock)
834 struct six_lock_count ret;
836 ret.n[SIX_LOCK_read] = 0;
837 ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
838 ret.n[SIX_LOCK_write] = lock->state.seq & 1;
841 ret.n[SIX_LOCK_read] += lock->state.read_lock;
845 for_each_possible_cpu(cpu)
846 ret.n[SIX_LOCK_read] += *per_cpu_ptr(lock->readers, cpu);
851 EXPORT_SYMBOL_GPL(six_lock_counts);