1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/rt.h>
10 #include <linux/six.h>
11 #include <linux/slab.h>
14 #define EBUG_ON(cond) BUG_ON(cond)
16 #define EBUG_ON(cond) do {} while (0)
19 #define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
20 #define six_release(l) lock_release(l, _RET_IP_)
22 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
24 struct six_lock_vals {
25 /* Value we add to the lock in order to take the lock: */
28 /* If the lock has this value (used as a mask), taking the lock fails: */
31 /* Value we add to the lock in order to release the lock: */
34 /* Mask that indicates lock is held for this type: */
37 /* Waitlist we wakeup when releasing the lock: */
38 enum six_lock_type unlock_wakeup;
41 #define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0)
42 #define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
43 #define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1)
47 .lock_val = __SIX_VAL(read_lock, 1), \
48 .lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\
49 .unlock_val = -__SIX_VAL(read_lock, 1), \
50 .held_mask = __SIX_LOCK_HELD_read, \
51 .unlock_wakeup = SIX_LOCK_write, \
53 [SIX_LOCK_intent] = { \
54 .lock_val = __SIX_VAL(intent_lock, 1), \
55 .lock_fail = __SIX_LOCK_HELD_intent, \
56 .unlock_val = -__SIX_VAL(intent_lock, 1), \
57 .held_mask = __SIX_LOCK_HELD_intent, \
58 .unlock_wakeup = SIX_LOCK_intent, \
60 [SIX_LOCK_write] = { \
61 .lock_val = __SIX_VAL(seq, 1), \
62 .lock_fail = __SIX_LOCK_HELD_read, \
63 .unlock_val = __SIX_VAL(seq, 1), \
64 .held_mask = __SIX_LOCK_HELD_write, \
65 .unlock_wakeup = SIX_LOCK_read, \
69 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
70 union six_lock_state old,
71 struct task_struct *owner)
73 if (type != SIX_LOCK_intent)
76 if (!old.intent_lock) {
80 EBUG_ON(lock->owner != current);
84 static inline unsigned pcpu_read_count(struct six_lock *lock)
86 unsigned read_count = 0;
89 for_each_possible_cpu(cpu)
90 read_count += *per_cpu_ptr(lock->readers, cpu);
94 /* This is probably up there with the more evil things I've done */
95 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
97 static int __do_six_trylock_type(struct six_lock *lock,
98 enum six_lock_type type,
99 struct task_struct *task,
102 const struct six_lock_vals l[] = LOCK_VALS;
103 union six_lock_state old, new;
107 EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
108 EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
109 EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
112 * Percpu reader mode:
114 * The basic idea behind this algorithm is that you can implement a lock
115 * between two threads without any atomics, just memory barriers:
117 * For two threads you'll need two variables, one variable for "thread a
118 * has the lock" and another for "thread b has the lock".
120 * To take the lock, a thread sets its variable indicating that it holds
121 * the lock, then issues a full memory barrier, then reads from the
122 * other thread's variable to check if the other thread thinks it has
123 * the lock. If we raced, we backoff and retry/sleep.
126 if (type == SIX_LOCK_read && lock->readers) {
129 this_cpu_inc(*lock->readers); /* signal that we own lock */
133 old.v = READ_ONCE(lock->state.v);
134 ret = !(old.v & l[type].lock_fail);
136 this_cpu_sub(*lock->readers, !ret);
140 * If we failed from the lock path and the waiting bit wasn't
149 if (!(old.v & l[type].lock_fail))
152 if (new.waiters & (1 << type))
155 new.waiters |= 1 << type;
156 } while ((v = atomic64_cmpxchg(&lock->state.counter,
157 old.v, new.v)) != old.v);
161 * If we failed because a writer was trying to take the
162 * lock, issue a wakeup because we might have caused a
163 * spurious trylock failure:
165 if (old.write_locking)
166 ret = -1 - SIX_LOCK_write;
167 } else if (type == SIX_LOCK_write && lock->readers) {
169 atomic64_add(__SIX_VAL(write_locking, 1),
170 &lock->state.counter);
171 smp_mb__after_atomic();
174 ret = !pcpu_read_count(lock);
177 * On success, we increment lock->seq; also we clear
178 * write_locking unless we failed from the lock path:
182 v += __SIX_VAL(seq, 1);
184 v -= __SIX_VAL(write_locking, 1);
186 if (!ret && !try && !(lock->state.waiters & (1 << SIX_LOCK_write)))
187 v += __SIX_VAL(waiters, 1 << SIX_LOCK_write);
190 old.v = atomic64_add_return(v, &lock->state.counter);
191 if (old.waiters & (1 << SIX_LOCK_read))
192 ret = -1 - SIX_LOCK_read;
194 atomic64_add(v, &lock->state.counter);
197 v = READ_ONCE(lock->state.v);
201 if (!(old.v & l[type].lock_fail)) {
202 new.v += l[type].lock_val;
204 if (type == SIX_LOCK_write)
205 new.write_locking = 0;
206 } else if (!try && !(new.waiters & (1 << type)))
207 new.waiters |= 1 << type;
209 break; /* waiting bit already set */
210 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
211 old.v, new.v)) != old.v);
213 ret = !(old.v & l[type].lock_fail);
215 EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
219 six_set_owner(lock, type, old, task);
221 EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking));
226 static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
228 struct six_lock_waiter *w, *next;
229 struct task_struct *task;
235 raw_spin_lock(&lock->wait_lock);
237 list_for_each_entry_safe(w, next, &lock->wait_list, list) {
238 if (w->lock_want != lock_type)
241 if (saw_one && lock_type != SIX_LOCK_read)
245 ret = __do_six_trylock_type(lock, lock_type, w->task, false);
249 __list_del(w->list.prev, w->list.next);
252 * Do no writes to @w besides setting lock_acquired - otherwise
253 * we would need a memory barrier:
256 w->lock_acquired = true;
257 wake_up_process(task);
260 clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
262 raw_spin_unlock(&lock->wait_lock);
265 lock_type = -ret - 1;
270 static inline void six_lock_wakeup(struct six_lock *lock,
271 union six_lock_state state,
272 enum six_lock_type lock_type)
274 if (lock_type == SIX_LOCK_write && state.read_lock)
277 if (!(state.waiters & (1 << lock_type)))
280 __six_lock_wakeup(lock, lock_type);
283 static bool do_six_trylock_type(struct six_lock *lock,
284 enum six_lock_type type,
289 ret = __do_six_trylock_type(lock, type, current, try);
291 __six_lock_wakeup(lock, -ret - 1);
296 __always_inline __flatten
297 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
299 if (!do_six_trylock_type(lock, type, true))
302 if (type != SIX_LOCK_write)
303 six_acquire(&lock->dep_map, 1);
307 __always_inline __flatten
308 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
311 const struct six_lock_vals l[] = LOCK_VALS;
312 union six_lock_state old;
315 EBUG_ON(type == SIX_LOCK_write);
317 if (type == SIX_LOCK_read &&
322 this_cpu_inc(*lock->readers);
326 old.v = READ_ONCE(lock->state.v);
327 ret = !(old.v & l[type].lock_fail) && old.seq == seq;
329 this_cpu_sub(*lock->readers, !ret);
333 * Similar to the lock path, we may have caused a spurious write
334 * lock fail and need to issue a wakeup:
336 if (old.write_locking)
337 six_lock_wakeup(lock, old, SIX_LOCK_write);
340 six_acquire(&lock->dep_map, 1);
345 v = READ_ONCE(lock->state.v);
349 if (old.seq != seq || old.v & l[type].lock_fail)
351 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
353 old.v + l[type].lock_val)) != old.v);
355 six_set_owner(lock, type, old, current);
356 if (type != SIX_LOCK_write)
357 six_acquire(&lock->dep_map, 1);
361 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
363 static inline bool six_optimistic_spin(struct six_lock *lock,
364 struct six_lock_waiter *wait)
366 struct task_struct *owner, *task = current;
368 switch (wait->lock_want) {
371 case SIX_LOCK_intent:
372 if (lock->wait_list.next != &wait->list)
380 owner = READ_ONCE(lock->owner);
382 while (owner && lock->owner == owner) {
384 * Ensure we emit the owner->on_cpu, dereference _after_
385 * checking lock->owner still matches owner. If that fails,
386 * owner might point to freed memory. If it still matches,
387 * the rcu_read_lock() ensures the memory stays valid.
392 * If we're an RT task that will live-lock because we won't let
393 * the owner complete.
395 if (wait->lock_acquired ||
405 return wait->lock_acquired;
408 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
410 static inline bool six_optimistic_spin(struct six_lock *lock,
411 struct six_lock_waiter *wait)
419 static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
420 struct six_lock_waiter *wait,
421 six_lock_should_sleep_fn should_sleep_fn, void *p)
423 union six_lock_state old;
426 if (type == SIX_LOCK_write) {
427 EBUG_ON(lock->state.write_locking);
428 atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter);
429 smp_mb__after_atomic();
432 lock_contended(&lock->dep_map, _RET_IP_);
434 wait->task = current;
435 wait->lock_want = type;
436 wait->lock_acquired = false;
438 raw_spin_lock(&lock->wait_lock);
440 * Retry taking the lock after taking waitlist lock, have raced with an
443 ret = __do_six_trylock_type(lock, type, current, false);
445 list_add_tail(&wait->list, &lock->wait_list);
446 raw_spin_unlock(&lock->wait_lock);
448 if (unlikely(ret > 0)) {
453 if (unlikely(ret < 0)) {
454 __six_lock_wakeup(lock, -ret - 1);
458 if (six_optimistic_spin(lock, wait))
462 set_current_state(TASK_UNINTERRUPTIBLE);
464 if (wait->lock_acquired)
467 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
469 raw_spin_lock(&lock->wait_lock);
470 if (!wait->lock_acquired)
471 list_del(&wait->list);
472 raw_spin_unlock(&lock->wait_lock);
474 if (wait->lock_acquired)
475 do_six_unlock_type(lock, type);
482 __set_current_state(TASK_RUNNING);
484 if (ret && type == SIX_LOCK_write) {
485 old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
486 &lock->state.counter);
487 six_lock_wakeup(lock, old, SIX_LOCK_read);
493 __always_inline __flatten
494 static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
495 struct six_lock_waiter *wait,
496 six_lock_should_sleep_fn should_sleep_fn, void *p)
500 if (type != SIX_LOCK_write)
501 six_acquire(&lock->dep_map, 0);
503 ret = do_six_trylock_type(lock, type, true) ? 0
504 : __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p);
506 if (ret && type != SIX_LOCK_write)
507 six_release(&lock->dep_map);
509 lock_acquired(&lock->dep_map, _RET_IP_);
515 static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
516 six_lock_should_sleep_fn should_sleep_fn, void *p)
518 struct six_lock_waiter wait;
520 return __six_lock_type_waiter(lock, type, &wait, should_sleep_fn, p);
523 __always_inline __flatten
524 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
526 const struct six_lock_vals l[] = LOCK_VALS;
527 union six_lock_state state;
529 if (type == SIX_LOCK_intent)
532 if (type == SIX_LOCK_read &&
534 smp_mb(); /* unlock barrier */
535 this_cpu_dec(*lock->readers);
536 smp_mb(); /* between unlocking and checking for waiters */
537 state.v = READ_ONCE(lock->state.v);
539 EBUG_ON(!(lock->state.v & l[type].held_mask));
540 state.v = atomic64_add_return_release(l[type].unlock_val,
541 &lock->state.counter);
544 six_lock_wakeup(lock, state, l[type].unlock_wakeup);
547 __always_inline __flatten
548 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
550 EBUG_ON(type == SIX_LOCK_write &&
551 !(lock->state.v & __SIX_LOCK_HELD_intent));
552 EBUG_ON((type == SIX_LOCK_write ||
553 type == SIX_LOCK_intent) &&
554 lock->owner != current);
556 if (type != SIX_LOCK_write)
557 six_release(&lock->dep_map);
559 if (type == SIX_LOCK_intent &&
560 lock->intent_lock_recurse) {
561 --lock->intent_lock_recurse;
565 do_six_unlock_type(lock, type);
568 #define __SIX_LOCK(type) \
569 bool six_trylock_##type(struct six_lock *lock) \
571 return __six_trylock_type(lock, SIX_LOCK_##type); \
573 EXPORT_SYMBOL_GPL(six_trylock_##type); \
575 bool six_relock_##type(struct six_lock *lock, u32 seq) \
577 return __six_relock_type(lock, SIX_LOCK_##type, seq); \
579 EXPORT_SYMBOL_GPL(six_relock_##type); \
581 int six_lock_##type(struct six_lock *lock, \
582 six_lock_should_sleep_fn should_sleep_fn, void *p) \
584 return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
586 EXPORT_SYMBOL_GPL(six_lock_##type); \
588 int six_lock_waiter_##type(struct six_lock *lock, \
589 struct six_lock_waiter *wait, \
590 six_lock_should_sleep_fn should_sleep_fn, void *p)\
592 return __six_lock_type_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p);\
594 EXPORT_SYMBOL_GPL(six_lock_waiter_##type); \
596 void six_unlock_##type(struct six_lock *lock) \
598 __six_unlock_type(lock, SIX_LOCK_##type); \
600 EXPORT_SYMBOL_GPL(six_unlock_##type);
608 /* Convert from intent to read: */
609 void six_lock_downgrade(struct six_lock *lock)
611 six_lock_increment(lock, SIX_LOCK_read);
612 six_unlock_intent(lock);
614 EXPORT_SYMBOL_GPL(six_lock_downgrade);
616 bool six_lock_tryupgrade(struct six_lock *lock)
618 union six_lock_state old, new;
619 u64 v = READ_ONCE(lock->state.v);
627 if (!lock->readers) {
628 EBUG_ON(!new.read_lock);
633 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
634 old.v, new.v)) != old.v);
637 this_cpu_dec(*lock->readers);
639 six_set_owner(lock, SIX_LOCK_intent, old, current);
643 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
645 bool six_trylock_convert(struct six_lock *lock,
646 enum six_lock_type from,
647 enum six_lock_type to)
649 EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
654 if (to == SIX_LOCK_read) {
655 six_lock_downgrade(lock);
658 return six_lock_tryupgrade(lock);
661 EXPORT_SYMBOL_GPL(six_trylock_convert);
664 * Increment read/intent lock count, assuming we already have it read or intent
667 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
669 const struct six_lock_vals l[] = LOCK_VALS;
671 six_acquire(&lock->dep_map, 0);
673 /* XXX: assert already locked, and that we don't overflow: */
678 this_cpu_inc(*lock->readers);
680 EBUG_ON(!lock->state.read_lock &&
681 !lock->state.intent_lock);
682 atomic64_add(l[type].lock_val, &lock->state.counter);
685 case SIX_LOCK_intent:
686 EBUG_ON(!lock->state.intent_lock);
687 lock->intent_lock_recurse++;
694 EXPORT_SYMBOL_GPL(six_lock_increment);
696 void six_lock_wakeup_all(struct six_lock *lock)
698 struct six_lock_waiter *w;
700 raw_spin_lock(&lock->wait_lock);
701 list_for_each_entry(w, &lock->wait_list, list)
702 wake_up_process(w->task);
703 raw_spin_unlock(&lock->wait_lock);
705 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
707 void six_lock_pcpu_free(struct six_lock *lock)
709 BUG_ON(lock->readers && pcpu_read_count(lock));
710 BUG_ON(lock->state.read_lock);
712 free_percpu(lock->readers);
713 lock->readers = NULL;
715 EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
717 void six_lock_pcpu_alloc(struct six_lock *lock)
721 lock->readers = alloc_percpu(unsigned);
724 EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);
727 * Returns lock held counts, for both read and intent
729 struct six_lock_count six_lock_counts(struct six_lock *lock)
731 struct six_lock_count ret;
733 ret.n[SIX_LOCK_read] = 0;
734 ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
735 ret.n[SIX_LOCK_write] = lock->state.seq & 1;
738 ret.n[SIX_LOCK_read] += lock->state.read_lock;
742 for_each_possible_cpu(cpu)
743 ret.n[SIX_LOCK_read] += *per_cpu_ptr(lock->readers, cpu);
748 EXPORT_SYMBOL_GPL(six_lock_counts);