1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/rt.h>
10 #include <linux/six.h>
13 #define EBUG_ON(cond) BUG_ON(cond)
15 #define EBUG_ON(cond) do {} while (0)
18 #define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
19 #define six_release(l) lock_release(l, _RET_IP_)
21 struct six_lock_vals {
22 /* Value we add to the lock in order to take the lock: */
25 /* If the lock has this value (used as a mask), taking the lock fails: */
28 /* Value we add to the lock in order to release the lock: */
31 /* Mask that indicates lock is held for this type: */
34 /* Waitlist we wakeup when releasing the lock: */
35 enum six_lock_type unlock_wakeup;
38 #define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0)
39 #define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
40 #define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1)
44 .lock_val = __SIX_VAL(read_lock, 1), \
45 .lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\
46 .unlock_val = -__SIX_VAL(read_lock, 1), \
47 .held_mask = __SIX_LOCK_HELD_read, \
48 .unlock_wakeup = SIX_LOCK_write, \
50 [SIX_LOCK_intent] = { \
51 .lock_val = __SIX_VAL(intent_lock, 1), \
52 .lock_fail = __SIX_LOCK_HELD_intent, \
53 .unlock_val = -__SIX_VAL(intent_lock, 1), \
54 .held_mask = __SIX_LOCK_HELD_intent, \
55 .unlock_wakeup = SIX_LOCK_intent, \
57 [SIX_LOCK_write] = { \
58 .lock_val = __SIX_VAL(seq, 1), \
59 .lock_fail = __SIX_LOCK_HELD_read, \
60 .unlock_val = __SIX_VAL(seq, 1), \
61 .held_mask = __SIX_LOCK_HELD_write, \
62 .unlock_wakeup = SIX_LOCK_read, \
66 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
67 union six_lock_state old)
69 if (type != SIX_LOCK_intent)
72 if (!old.intent_lock) {
74 lock->owner = current;
76 EBUG_ON(lock->owner != current);
80 static inline unsigned pcpu_read_count(struct six_lock *lock)
82 unsigned read_count = 0;
85 for_each_possible_cpu(cpu)
86 read_count += *per_cpu_ptr(lock->readers, cpu);
90 struct six_lock_waiter {
91 struct list_head list;
92 struct task_struct *task;
95 /* This is probably up there with the more evil things I've done */
96 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
98 static inline void six_lock_wakeup(struct six_lock *lock,
99 union six_lock_state state,
100 unsigned waitlist_id)
102 if (waitlist_id == SIX_LOCK_write) {
103 if (state.write_locking && !state.read_lock) {
104 struct task_struct *p = READ_ONCE(lock->owner);
109 struct list_head *wait_list = &lock->wait_list[waitlist_id];
110 struct six_lock_waiter *w, *next;
112 if (!(state.waiters & (1 << waitlist_id)))
115 clear_bit(waitlist_bitnr(waitlist_id),
116 (unsigned long *) &lock->state.v);
118 raw_spin_lock(&lock->wait_lock);
120 list_for_each_entry_safe(w, next, wait_list, list) {
121 list_del_init(&w->list);
123 if (wake_up_process(w->task) &&
124 waitlist_id != SIX_LOCK_read) {
125 if (!list_empty(wait_list))
126 set_bit(waitlist_bitnr(waitlist_id),
127 (unsigned long *) &lock->state.v);
132 raw_spin_unlock(&lock->wait_lock);
136 static __always_inline bool do_six_trylock_type(struct six_lock *lock,
137 enum six_lock_type type,
140 const struct six_lock_vals l[] = LOCK_VALS;
141 union six_lock_state old, new;
145 EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
146 EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
148 EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
151 * Percpu reader mode:
153 * The basic idea behind this algorithm is that you can implement a lock
154 * between two threads without any atomics, just memory barriers:
156 * For two threads you'll need two variables, one variable for "thread a
157 * has the lock" and another for "thread b has the lock".
159 * To take the lock, a thread sets its variable indicating that it holds
160 * the lock, then issues a full memory barrier, then reads from the
161 * other thread's variable to check if the other thread thinks it has
162 * the lock. If we raced, we backoff and retry/sleep.
165 if (type == SIX_LOCK_read && lock->readers) {
168 this_cpu_inc(*lock->readers); /* signal that we own lock */
172 old.v = READ_ONCE(lock->state.v);
173 ret = !(old.v & l[type].lock_fail);
175 this_cpu_sub(*lock->readers, !ret);
179 * If we failed because a writer was trying to take the
180 * lock, issue a wakeup because we might have caused a
181 * spurious trylock failure:
183 if (old.write_locking) {
184 struct task_struct *p = READ_ONCE(lock->owner);
191 * If we failed from the lock path and the waiting bit wasn't
200 if (!(old.v & l[type].lock_fail))
203 if (new.waiters & (1 << type))
206 new.waiters |= 1 << type;
207 } while ((v = atomic64_cmpxchg(&lock->state.counter,
208 old.v, new.v)) != old.v);
210 } else if (type == SIX_LOCK_write && lock->readers) {
212 atomic64_add(__SIX_VAL(write_locking, 1),
213 &lock->state.counter);
214 smp_mb__after_atomic();
217 ret = !pcpu_read_count(lock);
220 * On success, we increment lock->seq; also we clear
221 * write_locking unless we failed from the lock path:
225 v += __SIX_VAL(seq, 1);
227 v -= __SIX_VAL(write_locking, 1);
230 old.v = atomic64_add_return(v, &lock->state.counter);
231 six_lock_wakeup(lock, old, SIX_LOCK_read);
233 atomic64_add(v, &lock->state.counter);
236 v = READ_ONCE(lock->state.v);
240 if (!(old.v & l[type].lock_fail)) {
241 new.v += l[type].lock_val;
243 if (type == SIX_LOCK_write)
244 new.write_locking = 0;
245 } else if (!try && type != SIX_LOCK_write &&
246 !(new.waiters & (1 << type)))
247 new.waiters |= 1 << type;
249 break; /* waiting bit already set */
250 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
251 old.v, new.v)) != old.v);
253 ret = !(old.v & l[type].lock_fail);
257 six_set_owner(lock, type, old);
259 EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
260 EBUG_ON(type == SIX_LOCK_write && (try || ret) && (lock->state.write_locking));
265 __always_inline __flatten
266 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
268 if (!do_six_trylock_type(lock, type, true))
271 if (type != SIX_LOCK_write)
272 six_acquire(&lock->dep_map, 1);
276 __always_inline __flatten
277 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
280 const struct six_lock_vals l[] = LOCK_VALS;
281 union six_lock_state old;
284 EBUG_ON(type == SIX_LOCK_write);
286 if (type == SIX_LOCK_read &&
291 this_cpu_inc(*lock->readers);
295 old.v = READ_ONCE(lock->state.v);
296 ret = !(old.v & l[type].lock_fail) && old.seq == seq;
298 this_cpu_sub(*lock->readers, !ret);
302 * Similar to the lock path, we may have caused a spurious write
303 * lock fail and need to issue a wakeup:
305 if (old.write_locking) {
306 struct task_struct *p = READ_ONCE(lock->owner);
315 v = READ_ONCE(lock->state.v);
319 if (old.seq != seq || old.v & l[type].lock_fail)
321 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
323 old.v + l[type].lock_val)) != old.v);
325 six_set_owner(lock, type, old);
326 if (type != SIX_LOCK_write)
327 six_acquire(&lock->dep_map, 1);
331 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
333 static inline int six_can_spin_on_owner(struct six_lock *lock)
335 struct task_struct *owner;
342 owner = READ_ONCE(lock->owner);
344 retval = owner->on_cpu;
347 * if lock->owner is not set, the mutex owner may have just acquired
348 * it and not set the owner yet or the mutex has been released.
353 static inline bool six_spin_on_owner(struct six_lock *lock,
354 struct task_struct *owner)
359 while (lock->owner == owner) {
361 * Ensure we emit the owner->on_cpu, dereference _after_
362 * checking lock->owner still matches owner. If that fails,
363 * owner might point to freed memory. If it still matches,
364 * the rcu_read_lock() ensures the memory stays valid.
368 if (!owner->on_cpu || need_resched()) {
380 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
382 struct task_struct *task = current;
384 if (type == SIX_LOCK_write)
388 if (!six_can_spin_on_owner(lock))
391 if (!osq_lock(&lock->osq))
395 struct task_struct *owner;
398 * If there's an owner, wait for it to either
399 * release the lock or go to sleep.
401 owner = READ_ONCE(lock->owner);
402 if (owner && !six_spin_on_owner(lock, owner))
405 if (do_six_trylock_type(lock, type, false)) {
406 osq_unlock(&lock->osq);
412 * When there's no owner, we might have preempted between the
413 * owner acquiring the lock and setting the owner field. If
414 * we're an RT task that will live-lock because we won't let
415 * the owner complete.
417 if (!owner && (need_resched() || rt_task(task)))
421 * The cpu_relax() call is a compiler barrier which forces
422 * everything in this loop to be re-loaded. We don't need
423 * memory barriers as we'll eventually observe the right
424 * values at the cost of a few extra spins.
429 osq_unlock(&lock->osq);
434 * If we fell out of the spin path because of need_resched(),
435 * reschedule now, before we try-lock again. This avoids getting
436 * scheduled out right after we obtained the lock.
444 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
446 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
454 static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
455 six_lock_should_sleep_fn should_sleep_fn, void *p)
457 union six_lock_state old;
458 struct six_lock_waiter wait;
461 if (type == SIX_LOCK_write) {
462 EBUG_ON(lock->state.write_locking);
463 atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter);
464 smp_mb__after_atomic();
467 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
469 goto out_before_sleep;
471 if (six_optimistic_spin(lock, type))
472 goto out_before_sleep;
474 lock_contended(&lock->dep_map, _RET_IP_);
476 INIT_LIST_HEAD(&wait.list);
480 set_current_state(TASK_UNINTERRUPTIBLE);
481 if (type == SIX_LOCK_write)
482 EBUG_ON(lock->owner != current);
483 else if (list_empty_careful(&wait.list)) {
484 raw_spin_lock(&lock->wait_lock);
485 list_add_tail(&wait.list, &lock->wait_list[type]);
486 raw_spin_unlock(&lock->wait_lock);
489 if (do_six_trylock_type(lock, type, false))
492 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
499 __set_current_state(TASK_RUNNING);
501 if (!list_empty_careful(&wait.list)) {
502 raw_spin_lock(&lock->wait_lock);
503 list_del_init(&wait.list);
504 raw_spin_unlock(&lock->wait_lock);
507 if (ret && type == SIX_LOCK_write) {
508 old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
509 &lock->state.counter);
510 six_lock_wakeup(lock, old, SIX_LOCK_read);
517 static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
518 six_lock_should_sleep_fn should_sleep_fn, void *p)
522 if (type != SIX_LOCK_write)
523 six_acquire(&lock->dep_map, 0);
525 ret = do_six_trylock_type(lock, type, true) ? 0
526 : __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
528 if (ret && type != SIX_LOCK_write)
529 six_release(&lock->dep_map);
531 lock_acquired(&lock->dep_map, _RET_IP_);
536 __always_inline __flatten
537 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
539 const struct six_lock_vals l[] = LOCK_VALS;
540 union six_lock_state state;
542 EBUG_ON(type == SIX_LOCK_write &&
543 !(lock->state.v & __SIX_LOCK_HELD_intent));
545 if (type != SIX_LOCK_write)
546 six_release(&lock->dep_map);
548 if (type == SIX_LOCK_intent) {
549 EBUG_ON(lock->owner != current);
551 if (lock->intent_lock_recurse) {
552 --lock->intent_lock_recurse;
559 if (type == SIX_LOCK_read &&
561 smp_mb(); /* unlock barrier */
562 this_cpu_dec(*lock->readers);
563 state.v = READ_ONCE(lock->state.v);
565 EBUG_ON(!(lock->state.v & l[type].held_mask));
566 state.v = atomic64_add_return_release(l[type].unlock_val,
567 &lock->state.counter);
570 six_lock_wakeup(lock, state, l[type].unlock_wakeup);
573 #define __SIX_LOCK(type) \
574 bool six_trylock_##type(struct six_lock *lock) \
576 return __six_trylock_type(lock, SIX_LOCK_##type); \
578 EXPORT_SYMBOL_GPL(six_trylock_##type); \
580 bool six_relock_##type(struct six_lock *lock, u32 seq) \
582 return __six_relock_type(lock, SIX_LOCK_##type, seq); \
584 EXPORT_SYMBOL_GPL(six_relock_##type); \
586 int six_lock_##type(struct six_lock *lock, \
587 six_lock_should_sleep_fn should_sleep_fn, void *p) \
589 return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
591 EXPORT_SYMBOL_GPL(six_lock_##type); \
593 void six_unlock_##type(struct six_lock *lock) \
595 __six_unlock_type(lock, SIX_LOCK_##type); \
597 EXPORT_SYMBOL_GPL(six_unlock_##type);
605 /* Convert from intent to read: */
606 void six_lock_downgrade(struct six_lock *lock)
608 six_lock_increment(lock, SIX_LOCK_read);
609 six_unlock_intent(lock);
611 EXPORT_SYMBOL_GPL(six_lock_downgrade);
613 bool six_lock_tryupgrade(struct six_lock *lock)
615 union six_lock_state old, new;
616 u64 v = READ_ONCE(lock->state.v);
624 if (!lock->readers) {
625 EBUG_ON(!new.read_lock);
630 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
631 old.v, new.v)) != old.v);
634 this_cpu_dec(*lock->readers);
636 six_set_owner(lock, SIX_LOCK_intent, old);
640 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
642 bool six_trylock_convert(struct six_lock *lock,
643 enum six_lock_type from,
644 enum six_lock_type to)
646 EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
651 if (to == SIX_LOCK_read) {
652 six_lock_downgrade(lock);
655 return six_lock_tryupgrade(lock);
658 EXPORT_SYMBOL_GPL(six_trylock_convert);
661 * Increment read/intent lock count, assuming we already have it read or intent
664 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
666 const struct six_lock_vals l[] = LOCK_VALS;
668 six_acquire(&lock->dep_map, 0);
670 /* XXX: assert already locked, and that we don't overflow: */
675 this_cpu_inc(*lock->readers);
677 EBUG_ON(!lock->state.read_lock &&
678 !lock->state.intent_lock);
679 atomic64_add(l[type].lock_val, &lock->state.counter);
682 case SIX_LOCK_intent:
683 EBUG_ON(!lock->state.intent_lock);
684 lock->intent_lock_recurse++;
691 EXPORT_SYMBOL_GPL(six_lock_increment);
693 void six_lock_wakeup_all(struct six_lock *lock)
695 struct six_lock_waiter *w;
697 raw_spin_lock(&lock->wait_lock);
699 list_for_each_entry(w, &lock->wait_list[0], list)
700 wake_up_process(w->task);
701 list_for_each_entry(w, &lock->wait_list[1], list)
702 wake_up_process(w->task);
704 raw_spin_unlock(&lock->wait_lock);
706 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
708 void six_lock_pcpu_free(struct six_lock *lock)
710 BUG_ON(lock->readers && pcpu_read_count(lock));
711 BUG_ON(lock->state.read_lock);
713 free_percpu(lock->readers);
714 lock->readers = NULL;
716 EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
718 void six_lock_pcpu_alloc(struct six_lock *lock)
720 BUG_ON(lock->readers && pcpu_read_count(lock));
721 BUG_ON(lock->state.read_lock);
724 lock->readers = alloc_percpu(unsigned);
727 EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);