1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/six.h>
12 #include <linux/slab.h>
15 #define EBUG_ON(cond) BUG_ON(cond)
17 #define EBUG_ON(cond) do {} while (0)
20 #define six_acquire(l, t, r) lock_acquire(l, 0, t, r, 1, NULL, _RET_IP_)
21 #define six_release(l) lock_release(l, _RET_IP_)
23 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
25 struct six_lock_vals {
26 /* Value we add to the lock in order to take the lock: */
29 /* If the lock has this value (used as a mask), taking the lock fails: */
32 /* Value we add to the lock in order to release the lock: */
35 /* Mask that indicates lock is held for this type: */
38 /* Waitlist we wakeup when releasing the lock: */
39 enum six_lock_type unlock_wakeup;
42 #define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0)
43 #define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
44 #define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1)
48 .lock_val = __SIX_VAL(read_lock, 1), \
49 .lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\
50 .unlock_val = -__SIX_VAL(read_lock, 1), \
51 .held_mask = __SIX_LOCK_HELD_read, \
52 .unlock_wakeup = SIX_LOCK_write, \
54 [SIX_LOCK_intent] = { \
55 .lock_val = __SIX_VAL(intent_lock, 1), \
56 .lock_fail = __SIX_LOCK_HELD_intent, \
57 .unlock_val = -__SIX_VAL(intent_lock, 1), \
58 .held_mask = __SIX_LOCK_HELD_intent, \
59 .unlock_wakeup = SIX_LOCK_intent, \
61 [SIX_LOCK_write] = { \
62 .lock_val = __SIX_VAL(seq, 1), \
63 .lock_fail = __SIX_LOCK_HELD_read, \
64 .unlock_val = __SIX_VAL(seq, 1), \
65 .held_mask = __SIX_LOCK_HELD_write, \
66 .unlock_wakeup = SIX_LOCK_read, \
70 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
71 union six_lock_state old,
72 struct task_struct *owner)
74 if (type != SIX_LOCK_intent)
77 if (!old.intent_lock) {
81 EBUG_ON(lock->owner != current);
85 static inline unsigned pcpu_read_count(struct six_lock *lock)
87 unsigned read_count = 0;
90 for_each_possible_cpu(cpu)
91 read_count += *per_cpu_ptr(lock->readers, cpu);
95 /* This is probably up there with the more evil things I've done */
96 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
98 static int __do_six_trylock_type(struct six_lock *lock,
99 enum six_lock_type type,
100 struct task_struct *task,
103 const struct six_lock_vals l[] = LOCK_VALS;
104 union six_lock_state old, new;
108 EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
109 EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
110 EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
113 * Percpu reader mode:
115 * The basic idea behind this algorithm is that you can implement a lock
116 * between two threads without any atomics, just memory barriers:
118 * For two threads you'll need two variables, one variable for "thread a
119 * has the lock" and another for "thread b has the lock".
121 * To take the lock, a thread sets its variable indicating that it holds
122 * the lock, then issues a full memory barrier, then reads from the
123 * other thread's variable to check if the other thread thinks it has
124 * the lock. If we raced, we backoff and retry/sleep.
127 if (type == SIX_LOCK_read && lock->readers) {
129 this_cpu_inc(*lock->readers); /* signal that we own lock */
133 old.v = READ_ONCE(lock->state.v);
134 ret = !(old.v & l[type].lock_fail);
136 this_cpu_sub(*lock->readers, !ret);
140 * If we failed because a writer was trying to take the
141 * lock, issue a wakeup because we might have caused a
142 * spurious trylock failure:
144 if (old.write_locking)
145 ret = -1 - SIX_LOCK_write;
146 } else if (type == SIX_LOCK_write && lock->readers) {
148 atomic64_add(__SIX_VAL(write_locking, 1),
149 &lock->state.counter);
150 smp_mb__after_atomic();
151 } else if (!(lock->state.waiters & (1 << SIX_LOCK_write))) {
152 atomic64_add(__SIX_VAL(waiters, 1 << SIX_LOCK_write),
153 &lock->state.counter);
155 * pairs with barrier after unlock and before checking
156 * for readers in unlock path
158 smp_mb__after_atomic();
161 ret = !pcpu_read_count(lock);
164 * On success, we increment lock->seq; also we clear
165 * write_locking unless we failed from the lock path:
169 v += __SIX_VAL(seq, 1);
171 v -= __SIX_VAL(write_locking, 1);
174 old.v = atomic64_add_return(v, &lock->state.counter);
175 if (old.waiters & (1 << SIX_LOCK_read))
176 ret = -1 - SIX_LOCK_read;
178 atomic64_add(v, &lock->state.counter);
181 v = READ_ONCE(lock->state.v);
185 if (!(old.v & l[type].lock_fail)) {
186 new.v += l[type].lock_val;
188 if (type == SIX_LOCK_write)
189 new.write_locking = 0;
190 } else if (!try && !(new.waiters & (1 << type)))
191 new.waiters |= 1 << type;
193 break; /* waiting bit already set */
194 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
195 old.v, new.v)) != old.v);
197 ret = !(old.v & l[type].lock_fail);
199 EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
203 six_set_owner(lock, type, old, task);
205 EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking));
210 static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
212 struct six_lock_waiter *w, *next;
213 struct task_struct *task;
219 raw_spin_lock(&lock->wait_lock);
221 list_for_each_entry_safe(w, next, &lock->wait_list, list) {
222 if (w->lock_want != lock_type)
225 if (saw_one && lock_type != SIX_LOCK_read)
229 ret = __do_six_trylock_type(lock, lock_type, w->task, false);
233 __list_del(w->list.prev, w->list.next);
236 * Do no writes to @w besides setting lock_acquired - otherwise
237 * we would need a memory barrier:
240 w->lock_acquired = true;
241 wake_up_process(task);
244 clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
246 raw_spin_unlock(&lock->wait_lock);
249 lock_type = -ret - 1;
254 static inline void six_lock_wakeup(struct six_lock *lock,
255 union six_lock_state state,
256 enum six_lock_type lock_type)
258 if (lock_type == SIX_LOCK_write && state.read_lock)
261 if (!(state.waiters & (1 << lock_type)))
264 __six_lock_wakeup(lock, lock_type);
267 static bool do_six_trylock_type(struct six_lock *lock,
268 enum six_lock_type type,
273 ret = __do_six_trylock_type(lock, type, current, try);
275 __six_lock_wakeup(lock, -ret - 1);
280 __always_inline __flatten
281 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
283 if (!do_six_trylock_type(lock, type, true))
286 if (type != SIX_LOCK_write)
287 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read);
291 __always_inline __flatten
292 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
295 const struct six_lock_vals l[] = LOCK_VALS;
296 union six_lock_state old;
299 EBUG_ON(type == SIX_LOCK_write);
301 if (type == SIX_LOCK_read &&
306 this_cpu_inc(*lock->readers);
310 old.v = READ_ONCE(lock->state.v);
311 ret = !(old.v & l[type].lock_fail) && old.seq == seq;
313 this_cpu_sub(*lock->readers, !ret);
317 * Similar to the lock path, we may have caused a spurious write
318 * lock fail and need to issue a wakeup:
320 if (old.write_locking)
321 six_lock_wakeup(lock, old, SIX_LOCK_write);
324 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read);
329 v = READ_ONCE(lock->state.v);
333 if (old.seq != seq || old.v & l[type].lock_fail)
335 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
337 old.v + l[type].lock_val)) != old.v);
339 six_set_owner(lock, type, old, current);
340 if (type != SIX_LOCK_write)
341 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read);
346 * We don't see stable performance with SIX_LOCK_SPIN_ON_OWNER enabled, so it's
349 #ifdef SIX_LOCK_SPIN_ON_OWNER
351 static inline bool six_optimistic_spin(struct six_lock *lock,
352 struct six_lock_waiter *wait)
354 struct task_struct *owner, *task = current;
356 switch (wait->lock_want) {
359 case SIX_LOCK_intent:
360 if (lock->wait_list.next != &wait->list)
368 owner = READ_ONCE(lock->owner);
370 while (owner && lock->owner == owner) {
372 * Ensure we emit the owner->on_cpu, dereference _after_
373 * checking lock->owner still matches owner. If that fails,
374 * owner might point to freed memory. If it still matches,
375 * the rcu_read_lock() ensures the memory stays valid.
380 * If we're an RT task that will live-lock because we won't let
381 * the owner complete.
383 if (wait->lock_acquired ||
393 return wait->lock_acquired;
396 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
398 static inline bool six_optimistic_spin(struct six_lock *lock,
399 struct six_lock_waiter *wait)
407 static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
408 struct six_lock_waiter *wait,
409 six_lock_should_sleep_fn should_sleep_fn, void *p)
411 union six_lock_state old;
414 if (type == SIX_LOCK_write) {
415 EBUG_ON(lock->state.write_locking);
416 atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter);
417 smp_mb__after_atomic();
420 lock_contended(&lock->dep_map, _RET_IP_);
422 wait->task = current;
423 wait->lock_want = type;
424 wait->lock_acquired = false;
426 raw_spin_lock(&lock->wait_lock);
427 if (!(lock->state.waiters & (1 << type)))
428 set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v);
430 * Retry taking the lock after taking waitlist lock, have raced with an
433 ret = __do_six_trylock_type(lock, type, current, false);
435 wait->start_time = local_clock();
437 if (!list_empty(&lock->wait_list)) {
438 struct six_lock_waiter *last =
439 list_last_entry(&lock->wait_list,
440 struct six_lock_waiter, list);
442 if (time_before_eq64(wait->start_time, last->start_time))
443 wait->start_time = last->start_time + 1;
446 list_add_tail(&wait->list, &lock->wait_list);
448 raw_spin_unlock(&lock->wait_lock);
450 if (unlikely(ret > 0)) {
455 if (unlikely(ret < 0)) {
456 __six_lock_wakeup(lock, -ret - 1);
460 if (six_optimistic_spin(lock, wait))
464 set_current_state(TASK_UNINTERRUPTIBLE);
466 if (wait->lock_acquired)
469 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
471 raw_spin_lock(&lock->wait_lock);
472 if (!wait->lock_acquired)
473 list_del(&wait->list);
474 raw_spin_unlock(&lock->wait_lock);
476 if (wait->lock_acquired)
477 do_six_unlock_type(lock, type);
484 __set_current_state(TASK_RUNNING);
486 if (ret && type == SIX_LOCK_write && lock->state.write_locking) {
487 old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
488 &lock->state.counter);
489 six_lock_wakeup(lock, old, SIX_LOCK_read);
495 __always_inline __flatten
496 static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
497 struct six_lock_waiter *wait,
498 six_lock_should_sleep_fn should_sleep_fn, void *p)
502 wait->start_time = 0;
504 if (type != SIX_LOCK_write)
505 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read);
507 ret = do_six_trylock_type(lock, type, true) ? 0
508 : __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p);
510 if (ret && type != SIX_LOCK_write)
511 six_release(&lock->dep_map);
513 lock_acquired(&lock->dep_map, _RET_IP_);
519 static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
520 six_lock_should_sleep_fn should_sleep_fn, void *p)
522 struct six_lock_waiter wait;
524 return __six_lock_type_waiter(lock, type, &wait, should_sleep_fn, p);
527 __always_inline __flatten
528 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
530 const struct six_lock_vals l[] = LOCK_VALS;
531 union six_lock_state state;
533 if (type == SIX_LOCK_intent)
536 if (type == SIX_LOCK_read &&
538 smp_mb(); /* unlock barrier */
539 this_cpu_dec(*lock->readers);
540 smp_mb(); /* between unlocking and checking for waiters */
541 state.v = READ_ONCE(lock->state.v);
543 EBUG_ON(!(lock->state.v & l[type].held_mask));
544 state.v = atomic64_add_return_release(l[type].unlock_val,
545 &lock->state.counter);
548 six_lock_wakeup(lock, state, l[type].unlock_wakeup);
551 __always_inline __flatten
552 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
554 EBUG_ON(type == SIX_LOCK_write &&
555 !(lock->state.v & __SIX_LOCK_HELD_intent));
556 EBUG_ON((type == SIX_LOCK_write ||
557 type == SIX_LOCK_intent) &&
558 lock->owner != current);
560 if (type != SIX_LOCK_write)
561 six_release(&lock->dep_map);
563 if (type == SIX_LOCK_intent &&
564 lock->intent_lock_recurse) {
565 --lock->intent_lock_recurse;
569 do_six_unlock_type(lock, type);
572 #define __SIX_LOCK(type) \
573 bool six_trylock_##type(struct six_lock *lock) \
575 return __six_trylock_type(lock, SIX_LOCK_##type); \
577 EXPORT_SYMBOL_GPL(six_trylock_##type); \
579 bool six_relock_##type(struct six_lock *lock, u32 seq) \
581 return __six_relock_type(lock, SIX_LOCK_##type, seq); \
583 EXPORT_SYMBOL_GPL(six_relock_##type); \
585 int six_lock_##type(struct six_lock *lock, \
586 six_lock_should_sleep_fn should_sleep_fn, void *p) \
588 return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
590 EXPORT_SYMBOL_GPL(six_lock_##type); \
592 int six_lock_waiter_##type(struct six_lock *lock, \
593 struct six_lock_waiter *wait, \
594 six_lock_should_sleep_fn should_sleep_fn, void *p)\
596 return __six_lock_type_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p);\
598 EXPORT_SYMBOL_GPL(six_lock_waiter_##type); \
600 void six_unlock_##type(struct six_lock *lock) \
602 __six_unlock_type(lock, SIX_LOCK_##type); \
604 EXPORT_SYMBOL_GPL(six_unlock_##type);
612 /* Convert from intent to read: */
613 void six_lock_downgrade(struct six_lock *lock)
615 six_lock_increment(lock, SIX_LOCK_read);
616 six_unlock_intent(lock);
618 EXPORT_SYMBOL_GPL(six_lock_downgrade);
620 bool six_lock_tryupgrade(struct six_lock *lock)
622 union six_lock_state old, new;
623 u64 v = READ_ONCE(lock->state.v);
631 if (!lock->readers) {
632 EBUG_ON(!new.read_lock);
637 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
638 old.v, new.v)) != old.v);
641 this_cpu_dec(*lock->readers);
643 six_set_owner(lock, SIX_LOCK_intent, old, current);
647 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
649 bool six_trylock_convert(struct six_lock *lock,
650 enum six_lock_type from,
651 enum six_lock_type to)
653 EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
658 if (to == SIX_LOCK_read) {
659 six_lock_downgrade(lock);
662 return six_lock_tryupgrade(lock);
665 EXPORT_SYMBOL_GPL(six_trylock_convert);
668 * Increment read/intent lock count, assuming we already have it read or intent
671 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
673 const struct six_lock_vals l[] = LOCK_VALS;
675 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read);
677 /* XXX: assert already locked, and that we don't overflow: */
682 this_cpu_inc(*lock->readers);
684 EBUG_ON(!lock->state.read_lock &&
685 !lock->state.intent_lock);
686 atomic64_add(l[type].lock_val, &lock->state.counter);
689 case SIX_LOCK_intent:
690 EBUG_ON(!lock->state.intent_lock);
691 lock->intent_lock_recurse++;
698 EXPORT_SYMBOL_GPL(six_lock_increment);
700 void six_lock_wakeup_all(struct six_lock *lock)
702 union six_lock_state state = lock->state;
703 struct six_lock_waiter *w;
705 six_lock_wakeup(lock, state, SIX_LOCK_read);
706 six_lock_wakeup(lock, state, SIX_LOCK_intent);
707 six_lock_wakeup(lock, state, SIX_LOCK_write);
709 raw_spin_lock(&lock->wait_lock);
710 list_for_each_entry(w, &lock->wait_list, list)
711 wake_up_process(w->task);
712 raw_spin_unlock(&lock->wait_lock);
714 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
716 void six_lock_pcpu_free(struct six_lock *lock)
718 BUG_ON(lock->readers && pcpu_read_count(lock));
719 BUG_ON(lock->state.read_lock);
721 free_percpu(lock->readers);
722 lock->readers = NULL;
724 EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
726 void six_lock_pcpu_alloc(struct six_lock *lock)
730 lock->readers = alloc_percpu(unsigned);
733 EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);
736 * Returns lock held counts, for both read and intent
738 struct six_lock_count six_lock_counts(struct six_lock *lock)
740 struct six_lock_count ret;
742 ret.n[SIX_LOCK_read] = 0;
743 ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
744 ret.n[SIX_LOCK_write] = lock->state.seq & 1;
747 ret.n[SIX_LOCK_read] += lock->state.read_lock;
751 for_each_possible_cpu(cpu)
752 ret.n[SIX_LOCK_read] += *per_cpu_ptr(lock->readers, cpu);
757 EXPORT_SYMBOL_GPL(six_lock_counts);