1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/six.h>
12 #include <linux/slab.h>
14 #include <trace/events/lock.h>
17 #define EBUG_ON(cond) BUG_ON(cond)
19 #define EBUG_ON(cond) do {} while (0)
22 #define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip)
23 #define six_release(l, ip) lock_release(l, ip)
25 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
27 struct six_lock_vals {
28 /* Value we add to the lock in order to take the lock: */
31 /* If the lock has this value (used as a mask), taking the lock fails: */
34 /* Value we add to the lock in order to release the lock: */
37 /* Mask that indicates lock is held for this type: */
40 /* Waitlist we wakeup when releasing the lock: */
41 enum six_lock_type unlock_wakeup;
44 #define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0)
45 #define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
46 #define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1)
50 .lock_val = __SIX_VAL(read_lock, 1), \
51 .lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\
52 .unlock_val = -__SIX_VAL(read_lock, 1), \
53 .held_mask = __SIX_LOCK_HELD_read, \
54 .unlock_wakeup = SIX_LOCK_write, \
56 [SIX_LOCK_intent] = { \
57 .lock_val = __SIX_VAL(intent_lock, 1), \
58 .lock_fail = __SIX_LOCK_HELD_intent, \
59 .unlock_val = -__SIX_VAL(intent_lock, 1), \
60 .held_mask = __SIX_LOCK_HELD_intent, \
61 .unlock_wakeup = SIX_LOCK_intent, \
63 [SIX_LOCK_write] = { \
64 .lock_val = __SIX_VAL(seq, 1), \
65 .lock_fail = __SIX_LOCK_HELD_read, \
66 .unlock_val = __SIX_VAL(seq, 1), \
67 .held_mask = __SIX_LOCK_HELD_write, \
68 .unlock_wakeup = SIX_LOCK_read, \
72 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
73 union six_lock_state old,
74 struct task_struct *owner)
76 if (type != SIX_LOCK_intent)
79 if (!old.intent_lock) {
83 EBUG_ON(lock->owner != current);
87 static inline unsigned pcpu_read_count(struct six_lock *lock)
89 unsigned read_count = 0;
92 for_each_possible_cpu(cpu)
93 read_count += *per_cpu_ptr(lock->readers, cpu);
97 /* This is probably up there with the more evil things I've done */
98 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
100 static int __do_six_trylock_type(struct six_lock *lock,
101 enum six_lock_type type,
102 struct task_struct *task,
105 const struct six_lock_vals l[] = LOCK_VALS;
106 union six_lock_state old, new;
110 EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
111 EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
112 EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
115 * Percpu reader mode:
117 * The basic idea behind this algorithm is that you can implement a lock
118 * between two threads without any atomics, just memory barriers:
120 * For two threads you'll need two variables, one variable for "thread a
121 * has the lock" and another for "thread b has the lock".
123 * To take the lock, a thread sets its variable indicating that it holds
124 * the lock, then issues a full memory barrier, then reads from the
125 * other thread's variable to check if the other thread thinks it has
126 * the lock. If we raced, we backoff and retry/sleep.
129 if (type == SIX_LOCK_read && lock->readers) {
131 this_cpu_inc(*lock->readers); /* signal that we own lock */
135 old.v = READ_ONCE(lock->state.v);
136 ret = !(old.v & l[type].lock_fail);
138 this_cpu_sub(*lock->readers, !ret);
142 * If we failed because a writer was trying to take the
143 * lock, issue a wakeup because we might have caused a
144 * spurious trylock failure:
148 * This code should be sufficient, but we're seeing unexplained
151 if (old.write_locking)
152 ret = -1 - SIX_LOCK_write;
155 ret = -1 - SIX_LOCK_write;
157 } else if (type == SIX_LOCK_write && lock->readers) {
159 atomic64_add(__SIX_VAL(write_locking, 1),
160 &lock->state.counter);
161 smp_mb__after_atomic();
162 } else if (!(lock->state.waiters & (1 << SIX_LOCK_write))) {
163 atomic64_add(__SIX_VAL(waiters, 1 << SIX_LOCK_write),
164 &lock->state.counter);
166 * pairs with barrier after unlock and before checking
167 * for readers in unlock path
169 smp_mb__after_atomic();
172 ret = !pcpu_read_count(lock);
175 * On success, we increment lock->seq; also we clear
176 * write_locking unless we failed from the lock path:
180 v += __SIX_VAL(seq, 1);
182 v -= __SIX_VAL(write_locking, 1);
185 old.v = atomic64_add_return(v, &lock->state.counter);
186 if (old.waiters & (1 << SIX_LOCK_read))
187 ret = -1 - SIX_LOCK_read;
189 atomic64_add(v, &lock->state.counter);
192 v = READ_ONCE(lock->state.v);
196 if (!(old.v & l[type].lock_fail)) {
197 new.v += l[type].lock_val;
199 if (type == SIX_LOCK_write)
200 new.write_locking = 0;
201 } else if (!try && !(new.waiters & (1 << type)))
202 new.waiters |= 1 << type;
204 break; /* waiting bit already set */
205 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
206 old.v, new.v)) != old.v);
208 ret = !(old.v & l[type].lock_fail);
210 EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
214 six_set_owner(lock, type, old, task);
216 EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking));
221 static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
223 struct six_lock_waiter *w, *next;
224 struct task_struct *task;
230 raw_spin_lock(&lock->wait_lock);
232 list_for_each_entry_safe(w, next, &lock->wait_list, list) {
233 if (w->lock_want != lock_type)
236 if (saw_one && lock_type != SIX_LOCK_read)
240 ret = __do_six_trylock_type(lock, lock_type, w->task, false);
244 __list_del(w->list.prev, w->list.next);
247 * Do no writes to @w besides setting lock_acquired - otherwise
248 * we would need a memory barrier:
251 w->lock_acquired = true;
252 wake_up_process(task);
255 clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
257 raw_spin_unlock(&lock->wait_lock);
260 lock_type = -ret - 1;
265 static inline void six_lock_wakeup(struct six_lock *lock,
266 union six_lock_state state,
267 enum six_lock_type lock_type)
269 if (lock_type == SIX_LOCK_write && state.read_lock)
272 if (!(state.waiters & (1 << lock_type)))
275 __six_lock_wakeup(lock, lock_type);
278 static bool do_six_trylock_type(struct six_lock *lock,
279 enum six_lock_type type,
284 ret = __do_six_trylock_type(lock, type, current, try);
286 __six_lock_wakeup(lock, -ret - 1);
291 __always_inline __flatten
292 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type,
295 if (!do_six_trylock_type(lock, type, true))
298 if (type != SIX_LOCK_write)
299 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
303 __always_inline __flatten
304 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
305 unsigned seq, unsigned long ip)
307 const struct six_lock_vals l[] = LOCK_VALS;
308 union six_lock_state old;
311 EBUG_ON(type == SIX_LOCK_write);
313 if (type == SIX_LOCK_read &&
318 this_cpu_inc(*lock->readers);
322 old.v = READ_ONCE(lock->state.v);
323 ret = !(old.v & l[type].lock_fail) && old.seq == seq;
325 this_cpu_sub(*lock->readers, !ret);
329 * Similar to the lock path, we may have caused a spurious write
330 * lock fail and need to issue a wakeup:
333 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
335 six_lock_wakeup(lock, old, SIX_LOCK_write);
340 v = READ_ONCE(lock->state.v);
344 if (old.seq != seq || old.v & l[type].lock_fail)
346 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
348 old.v + l[type].lock_val)) != old.v);
350 six_set_owner(lock, type, old, current);
351 if (type != SIX_LOCK_write)
352 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
356 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
358 static inline bool six_can_spin_on_owner(struct six_lock *lock)
360 struct task_struct *owner;
367 owner = READ_ONCE(lock->owner);
368 ret = !owner || owner_on_cpu(owner);
374 static inline void six_set_nospin(struct six_lock *lock)
376 union six_lock_state old, new;
377 u64 v = READ_ONCE(lock->state.v);
382 } while ((v = atomic64_cmpxchg(&lock->state.counter, old.v, new.v)) != old.v);
385 static inline bool six_spin_on_owner(struct six_lock *lock,
386 struct task_struct *owner,
393 while (lock->owner == owner) {
395 * Ensure we emit the owner->on_cpu, dereference _after_
396 * checking lock->owner still matches owner. If that fails,
397 * owner might point to freed memory. If it still matches,
398 * the rcu_read_lock() ensures the memory stays valid.
402 if (!owner_on_cpu(owner) || need_resched()) {
407 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
408 six_set_nospin(lock);
420 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
422 struct task_struct *task = current;
425 if (type == SIX_LOCK_write)
429 if (!six_can_spin_on_owner(lock))
432 if (!osq_lock(&lock->osq))
435 end_time = sched_clock() + 10 * NSEC_PER_USEC;
438 struct task_struct *owner;
441 * If there's an owner, wait for it to either
442 * release the lock or go to sleep.
444 owner = READ_ONCE(lock->owner);
445 if (owner && !six_spin_on_owner(lock, owner, end_time))
448 if (do_six_trylock_type(lock, type, false)) {
449 osq_unlock(&lock->osq);
455 * When there's no owner, we might have preempted between the
456 * owner acquiring the lock and setting the owner field. If
457 * we're an RT task that will live-lock because we won't let
458 * the owner complete.
460 if (!owner && (need_resched() || rt_task(task)))
464 * The cpu_relax() call is a compiler barrier which forces
465 * everything in this loop to be re-loaded. We don't need
466 * memory barriers as we'll eventually observe the right
467 * values at the cost of a few extra spins.
472 osq_unlock(&lock->osq);
477 * If we fell out of the spin path because of need_resched(),
478 * reschedule now, before we try-lock again. This avoids getting
479 * scheduled out right after we obtained the lock.
487 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
489 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
497 static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
498 struct six_lock_waiter *wait,
499 six_lock_should_sleep_fn should_sleep_fn, void *p,
502 union six_lock_state old;
505 if (type == SIX_LOCK_write) {
506 EBUG_ON(lock->state.write_locking);
507 atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter);
508 smp_mb__after_atomic();
511 trace_contention_begin(lock, 0);
512 lock_contended(&lock->dep_map, ip);
514 if (six_optimistic_spin(lock, type))
517 wait->task = current;
518 wait->lock_want = type;
519 wait->lock_acquired = false;
521 raw_spin_lock(&lock->wait_lock);
522 if (!(lock->state.waiters & (1 << type)))
523 set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v);
525 * Retry taking the lock after taking waitlist lock, have raced with an
528 ret = __do_six_trylock_type(lock, type, current, false);
530 wait->start_time = local_clock();
532 if (!list_empty(&lock->wait_list)) {
533 struct six_lock_waiter *last =
534 list_last_entry(&lock->wait_list,
535 struct six_lock_waiter, list);
537 if (time_before_eq64(wait->start_time, last->start_time))
538 wait->start_time = last->start_time + 1;
541 list_add_tail(&wait->list, &lock->wait_list);
543 raw_spin_unlock(&lock->wait_lock);
545 if (unlikely(ret > 0)) {
550 if (unlikely(ret < 0)) {
551 __six_lock_wakeup(lock, -ret - 1);
556 set_current_state(TASK_UNINTERRUPTIBLE);
558 if (wait->lock_acquired)
561 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
563 raw_spin_lock(&lock->wait_lock);
564 if (!wait->lock_acquired)
565 list_del(&wait->list);
566 raw_spin_unlock(&lock->wait_lock);
568 if (wait->lock_acquired)
569 do_six_unlock_type(lock, type);
576 __set_current_state(TASK_RUNNING);
578 if (ret && type == SIX_LOCK_write && lock->state.write_locking) {
579 old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
580 &lock->state.counter);
581 six_lock_wakeup(lock, old, SIX_LOCK_read);
583 trace_contention_end(lock, 0);
588 __always_inline __flatten
589 static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
590 struct six_lock_waiter *wait,
591 six_lock_should_sleep_fn should_sleep_fn, void *p,
596 wait->start_time = 0;
598 if (type != SIX_LOCK_write)
599 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
601 ret = do_six_trylock_type(lock, type, true) ? 0
602 : __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p, ip);
604 if (ret && type != SIX_LOCK_write)
605 six_release(&lock->dep_map, ip);
607 lock_acquired(&lock->dep_map, ip);
613 static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
614 six_lock_should_sleep_fn should_sleep_fn, void *p,
617 struct six_lock_waiter wait;
619 return __six_lock_type_waiter(lock, type, &wait, should_sleep_fn, p, ip);
622 __always_inline __flatten
623 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
625 const struct six_lock_vals l[] = LOCK_VALS;
626 union six_lock_state state;
628 if (type == SIX_LOCK_intent)
631 if (type == SIX_LOCK_read &&
633 smp_mb(); /* unlock barrier */
634 this_cpu_dec(*lock->readers);
635 smp_mb(); /* between unlocking and checking for waiters */
636 state.v = READ_ONCE(lock->state.v);
638 u64 v = l[type].unlock_val;
640 if (type != SIX_LOCK_read)
641 v -= lock->state.v & __SIX_VAL(nospin, 1);
643 EBUG_ON(!(lock->state.v & l[type].held_mask));
644 state.v = atomic64_add_return_release(v, &lock->state.counter);
647 six_lock_wakeup(lock, state, l[type].unlock_wakeup);
650 __always_inline __flatten
651 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type,
654 EBUG_ON(type == SIX_LOCK_write &&
655 !(lock->state.v & __SIX_LOCK_HELD_intent));
656 EBUG_ON((type == SIX_LOCK_write ||
657 type == SIX_LOCK_intent) &&
658 lock->owner != current);
660 if (type != SIX_LOCK_write)
661 six_release(&lock->dep_map, ip);
663 if (type == SIX_LOCK_intent &&
664 lock->intent_lock_recurse) {
665 --lock->intent_lock_recurse;
669 do_six_unlock_type(lock, type);
672 #define __SIX_LOCK(type) \
673 bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip) \
675 return __six_trylock_type(lock, SIX_LOCK_##type, ip); \
677 EXPORT_SYMBOL_GPL(six_trylock_ip_##type); \
679 bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
681 return __six_relock_type(lock, SIX_LOCK_##type, seq, ip); \
683 EXPORT_SYMBOL_GPL(six_relock_ip_##type); \
685 int six_lock_ip_##type(struct six_lock *lock, \
686 six_lock_should_sleep_fn should_sleep_fn, void *p, \
689 return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
691 EXPORT_SYMBOL_GPL(six_lock_ip_##type); \
693 int six_lock_ip_waiter_##type(struct six_lock *lock, \
694 struct six_lock_waiter *wait, \
695 six_lock_should_sleep_fn should_sleep_fn, void *p,\
698 return __six_lock_type_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
700 EXPORT_SYMBOL_GPL(six_lock_ip_waiter_##type); \
702 void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
704 __six_unlock_type(lock, SIX_LOCK_##type, ip); \
706 EXPORT_SYMBOL_GPL(six_unlock_ip_##type);
714 /* Convert from intent to read: */
715 void six_lock_downgrade(struct six_lock *lock)
717 six_lock_increment(lock, SIX_LOCK_read);
718 six_unlock_intent(lock);
720 EXPORT_SYMBOL_GPL(six_lock_downgrade);
722 bool six_lock_tryupgrade(struct six_lock *lock)
724 union six_lock_state old, new;
725 u64 v = READ_ONCE(lock->state.v);
733 if (!lock->readers) {
734 EBUG_ON(!new.read_lock);
739 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
740 old.v, new.v)) != old.v);
743 this_cpu_dec(*lock->readers);
745 six_set_owner(lock, SIX_LOCK_intent, old, current);
749 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
751 bool six_trylock_convert(struct six_lock *lock,
752 enum six_lock_type from,
753 enum six_lock_type to)
755 EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
760 if (to == SIX_LOCK_read) {
761 six_lock_downgrade(lock);
764 return six_lock_tryupgrade(lock);
767 EXPORT_SYMBOL_GPL(six_trylock_convert);
770 * Increment read/intent lock count, assuming we already have it read or intent
773 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
775 const struct six_lock_vals l[] = LOCK_VALS;
777 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
779 /* XXX: assert already locked, and that we don't overflow: */
784 this_cpu_inc(*lock->readers);
786 EBUG_ON(!lock->state.read_lock &&
787 !lock->state.intent_lock);
788 atomic64_add(l[type].lock_val, &lock->state.counter);
791 case SIX_LOCK_intent:
792 EBUG_ON(!lock->state.intent_lock);
793 lock->intent_lock_recurse++;
800 EXPORT_SYMBOL_GPL(six_lock_increment);
802 void six_lock_wakeup_all(struct six_lock *lock)
804 union six_lock_state state = lock->state;
805 struct six_lock_waiter *w;
807 six_lock_wakeup(lock, state, SIX_LOCK_read);
808 six_lock_wakeup(lock, state, SIX_LOCK_intent);
809 six_lock_wakeup(lock, state, SIX_LOCK_write);
811 raw_spin_lock(&lock->wait_lock);
812 list_for_each_entry(w, &lock->wait_list, list)
813 wake_up_process(w->task);
814 raw_spin_unlock(&lock->wait_lock);
816 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
818 void six_lock_pcpu_free(struct six_lock *lock)
820 BUG_ON(lock->readers && pcpu_read_count(lock));
821 BUG_ON(lock->state.read_lock);
823 free_percpu(lock->readers);
824 lock->readers = NULL;
826 EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
828 void six_lock_pcpu_alloc(struct six_lock *lock)
832 lock->readers = alloc_percpu(unsigned);
835 EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);
838 * Returns lock held counts, for both read and intent
840 struct six_lock_count six_lock_counts(struct six_lock *lock)
842 struct six_lock_count ret;
844 ret.n[SIX_LOCK_read] = !lock->readers
845 ? lock->state.read_lock
846 : pcpu_read_count(lock);
847 ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
848 ret.n[SIX_LOCK_write] = lock->state.seq & 1;
852 EXPORT_SYMBOL_GPL(six_lock_counts);