1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/rt.h>
10 #include <linux/six.h>
11 #include <linux/slab.h>
14 #define EBUG_ON(cond) BUG_ON(cond)
16 #define EBUG_ON(cond) do {} while (0)
19 #define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
20 #define six_release(l) lock_release(l, _RET_IP_)
22 struct six_lock_vals {
23 /* Value we add to the lock in order to take the lock: */
26 /* If the lock has this value (used as a mask), taking the lock fails: */
29 /* Value we add to the lock in order to release the lock: */
32 /* Mask that indicates lock is held for this type: */
35 /* Waitlist we wakeup when releasing the lock: */
36 enum six_lock_type unlock_wakeup;
39 #define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0)
40 #define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
41 #define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1)
45 .lock_val = __SIX_VAL(read_lock, 1), \
46 .lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\
47 .unlock_val = -__SIX_VAL(read_lock, 1), \
48 .held_mask = __SIX_LOCK_HELD_read, \
49 .unlock_wakeup = SIX_LOCK_write, \
51 [SIX_LOCK_intent] = { \
52 .lock_val = __SIX_VAL(intent_lock, 1), \
53 .lock_fail = __SIX_LOCK_HELD_intent, \
54 .unlock_val = -__SIX_VAL(intent_lock, 1), \
55 .held_mask = __SIX_LOCK_HELD_intent, \
56 .unlock_wakeup = SIX_LOCK_intent, \
58 [SIX_LOCK_write] = { \
59 .lock_val = __SIX_VAL(seq, 1), \
60 .lock_fail = __SIX_LOCK_HELD_read, \
61 .unlock_val = __SIX_VAL(seq, 1), \
62 .held_mask = __SIX_LOCK_HELD_write, \
63 .unlock_wakeup = SIX_LOCK_read, \
67 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
68 union six_lock_state old)
70 if (type != SIX_LOCK_intent)
73 if (!old.intent_lock) {
75 lock->owner = current;
77 EBUG_ON(lock->owner != current);
81 static inline unsigned pcpu_read_count(struct six_lock *lock)
83 unsigned read_count = 0;
86 for_each_possible_cpu(cpu)
87 read_count += *per_cpu_ptr(lock->readers, cpu);
91 struct six_lock_waiter {
92 struct list_head list;
93 struct task_struct *task;
96 /* This is probably up there with the more evil things I've done */
97 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
99 static inline void six_lock_wakeup(struct six_lock *lock,
100 union six_lock_state state,
101 unsigned waitlist_id)
103 if (waitlist_id == SIX_LOCK_write) {
104 if (state.write_locking && !state.read_lock) {
105 struct task_struct *p = READ_ONCE(lock->owner);
110 struct list_head *wait_list = &lock->wait_list[waitlist_id];
111 struct six_lock_waiter *w, *next;
113 if (!(state.waiters & (1 << waitlist_id)))
116 clear_bit(waitlist_bitnr(waitlist_id),
117 (unsigned long *) &lock->state.v);
119 raw_spin_lock(&lock->wait_lock);
121 list_for_each_entry_safe(w, next, wait_list, list) {
122 list_del_init(&w->list);
124 if (wake_up_process(w->task) &&
125 waitlist_id != SIX_LOCK_read) {
126 if (!list_empty(wait_list))
127 set_bit(waitlist_bitnr(waitlist_id),
128 (unsigned long *) &lock->state.v);
133 raw_spin_unlock(&lock->wait_lock);
137 static __always_inline bool do_six_trylock_type(struct six_lock *lock,
138 enum six_lock_type type,
141 const struct six_lock_vals l[] = LOCK_VALS;
142 union six_lock_state old, new;
146 EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
147 EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
149 EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
152 * Percpu reader mode:
154 * The basic idea behind this algorithm is that you can implement a lock
155 * between two threads without any atomics, just memory barriers:
157 * For two threads you'll need two variables, one variable for "thread a
158 * has the lock" and another for "thread b has the lock".
160 * To take the lock, a thread sets its variable indicating that it holds
161 * the lock, then issues a full memory barrier, then reads from the
162 * other thread's variable to check if the other thread thinks it has
163 * the lock. If we raced, we backoff and retry/sleep.
166 if (type == SIX_LOCK_read && lock->readers) {
169 this_cpu_inc(*lock->readers); /* signal that we own lock */
173 old.v = READ_ONCE(lock->state.v);
174 ret = !(old.v & l[type].lock_fail);
176 this_cpu_sub(*lock->readers, !ret);
180 * If we failed because a writer was trying to take the
181 * lock, issue a wakeup because we might have caused a
182 * spurious trylock failure:
184 if (old.write_locking) {
185 struct task_struct *p = READ_ONCE(lock->owner);
192 * If we failed from the lock path and the waiting bit wasn't
201 if (!(old.v & l[type].lock_fail))
204 if (new.waiters & (1 << type))
207 new.waiters |= 1 << type;
208 } while ((v = atomic64_cmpxchg(&lock->state.counter,
209 old.v, new.v)) != old.v);
211 } else if (type == SIX_LOCK_write && lock->readers) {
213 atomic64_add(__SIX_VAL(write_locking, 1),
214 &lock->state.counter);
215 smp_mb__after_atomic();
218 ret = !pcpu_read_count(lock);
221 * On success, we increment lock->seq; also we clear
222 * write_locking unless we failed from the lock path:
226 v += __SIX_VAL(seq, 1);
228 v -= __SIX_VAL(write_locking, 1);
231 old.v = atomic64_add_return(v, &lock->state.counter);
232 six_lock_wakeup(lock, old, SIX_LOCK_read);
234 atomic64_add(v, &lock->state.counter);
237 v = READ_ONCE(lock->state.v);
241 if (!(old.v & l[type].lock_fail)) {
242 new.v += l[type].lock_val;
244 if (type == SIX_LOCK_write)
245 new.write_locking = 0;
246 } else if (!try && type != SIX_LOCK_write &&
247 !(new.waiters & (1 << type)))
248 new.waiters |= 1 << type;
250 break; /* waiting bit already set */
251 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
252 old.v, new.v)) != old.v);
254 ret = !(old.v & l[type].lock_fail);
256 EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
260 six_set_owner(lock, type, old);
262 EBUG_ON(type == SIX_LOCK_write && (try || ret) && (lock->state.write_locking));
267 __always_inline __flatten
268 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
270 if (!do_six_trylock_type(lock, type, true))
273 if (type != SIX_LOCK_write)
274 six_acquire(&lock->dep_map, 1);
278 __always_inline __flatten
279 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
282 const struct six_lock_vals l[] = LOCK_VALS;
283 union six_lock_state old;
286 EBUG_ON(type == SIX_LOCK_write);
288 if (type == SIX_LOCK_read &&
293 this_cpu_inc(*lock->readers);
297 old.v = READ_ONCE(lock->state.v);
298 ret = !(old.v & l[type].lock_fail) && old.seq == seq;
300 this_cpu_sub(*lock->readers, !ret);
304 * Similar to the lock path, we may have caused a spurious write
305 * lock fail and need to issue a wakeup:
307 if (old.write_locking) {
308 struct task_struct *p = READ_ONCE(lock->owner);
315 six_acquire(&lock->dep_map, 1);
320 v = READ_ONCE(lock->state.v);
324 if (old.seq != seq || old.v & l[type].lock_fail)
326 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
328 old.v + l[type].lock_val)) != old.v);
330 six_set_owner(lock, type, old);
331 if (type != SIX_LOCK_write)
332 six_acquire(&lock->dep_map, 1);
336 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
338 static inline int six_can_spin_on_owner(struct six_lock *lock)
340 struct task_struct *owner;
347 owner = READ_ONCE(lock->owner);
349 retval = owner->on_cpu;
352 * if lock->owner is not set, the mutex owner may have just acquired
353 * it and not set the owner yet or the mutex has been released.
358 static inline bool six_spin_on_owner(struct six_lock *lock,
359 struct task_struct *owner)
364 while (lock->owner == owner) {
366 * Ensure we emit the owner->on_cpu, dereference _after_
367 * checking lock->owner still matches owner. If that fails,
368 * owner might point to freed memory. If it still matches,
369 * the rcu_read_lock() ensures the memory stays valid.
373 if (!owner->on_cpu || need_resched()) {
385 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
387 struct task_struct *task = current;
389 if (type == SIX_LOCK_write)
393 if (!six_can_spin_on_owner(lock))
396 if (!osq_lock(&lock->osq))
400 struct task_struct *owner;
403 * If there's an owner, wait for it to either
404 * release the lock or go to sleep.
406 owner = READ_ONCE(lock->owner);
407 if (owner && !six_spin_on_owner(lock, owner))
410 if (do_six_trylock_type(lock, type, false)) {
411 osq_unlock(&lock->osq);
417 * When there's no owner, we might have preempted between the
418 * owner acquiring the lock and setting the owner field. If
419 * we're an RT task that will live-lock because we won't let
420 * the owner complete.
422 if (!owner && (need_resched() || rt_task(task)))
426 * The cpu_relax() call is a compiler barrier which forces
427 * everything in this loop to be re-loaded. We don't need
428 * memory barriers as we'll eventually observe the right
429 * values at the cost of a few extra spins.
434 osq_unlock(&lock->osq);
439 * If we fell out of the spin path because of need_resched(),
440 * reschedule now, before we try-lock again. This avoids getting
441 * scheduled out right after we obtained the lock.
449 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
451 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
459 static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
460 six_lock_should_sleep_fn should_sleep_fn, void *p)
462 union six_lock_state old;
463 struct six_lock_waiter wait;
466 if (type == SIX_LOCK_write) {
467 EBUG_ON(lock->state.write_locking);
468 atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter);
469 smp_mb__after_atomic();
472 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
474 goto out_before_sleep;
476 if (six_optimistic_spin(lock, type))
477 goto out_before_sleep;
479 lock_contended(&lock->dep_map, _RET_IP_);
481 INIT_LIST_HEAD(&wait.list);
485 set_current_state(TASK_UNINTERRUPTIBLE);
486 if (type == SIX_LOCK_write)
487 EBUG_ON(lock->owner != current);
488 else if (list_empty_careful(&wait.list)) {
489 raw_spin_lock(&lock->wait_lock);
490 list_add_tail(&wait.list, &lock->wait_list[type]);
491 raw_spin_unlock(&lock->wait_lock);
494 if (do_six_trylock_type(lock, type, false))
497 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
504 __set_current_state(TASK_RUNNING);
506 if (!list_empty_careful(&wait.list)) {
507 raw_spin_lock(&lock->wait_lock);
508 list_del_init(&wait.list);
509 raw_spin_unlock(&lock->wait_lock);
512 if (ret && type == SIX_LOCK_write) {
513 old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
514 &lock->state.counter);
515 six_lock_wakeup(lock, old, SIX_LOCK_read);
522 static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
523 six_lock_should_sleep_fn should_sleep_fn, void *p)
527 if (type != SIX_LOCK_write)
528 six_acquire(&lock->dep_map, 0);
530 ret = do_six_trylock_type(lock, type, true) ? 0
531 : __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
533 if (ret && type != SIX_LOCK_write)
534 six_release(&lock->dep_map);
536 lock_acquired(&lock->dep_map, _RET_IP_);
541 __always_inline __flatten
542 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
544 const struct six_lock_vals l[] = LOCK_VALS;
545 union six_lock_state state;
547 EBUG_ON(type == SIX_LOCK_write &&
548 !(lock->state.v & __SIX_LOCK_HELD_intent));
550 if (type != SIX_LOCK_write)
551 six_release(&lock->dep_map);
553 if (type == SIX_LOCK_intent) {
554 EBUG_ON(lock->owner != current);
556 if (lock->intent_lock_recurse) {
557 --lock->intent_lock_recurse;
564 if (type == SIX_LOCK_read &&
566 smp_mb(); /* unlock barrier */
567 this_cpu_dec(*lock->readers);
568 smp_mb(); /* between unlocking and checking for waiters */
569 state.v = READ_ONCE(lock->state.v);
571 EBUG_ON(!(lock->state.v & l[type].held_mask));
572 state.v = atomic64_add_return_release(l[type].unlock_val,
573 &lock->state.counter);
576 six_lock_wakeup(lock, state, l[type].unlock_wakeup);
579 #define __SIX_LOCK(type) \
580 bool six_trylock_##type(struct six_lock *lock) \
582 return __six_trylock_type(lock, SIX_LOCK_##type); \
584 EXPORT_SYMBOL_GPL(six_trylock_##type); \
586 bool six_relock_##type(struct six_lock *lock, u32 seq) \
588 return __six_relock_type(lock, SIX_LOCK_##type, seq); \
590 EXPORT_SYMBOL_GPL(six_relock_##type); \
592 int six_lock_##type(struct six_lock *lock, \
593 six_lock_should_sleep_fn should_sleep_fn, void *p) \
595 return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
597 EXPORT_SYMBOL_GPL(six_lock_##type); \
599 void six_unlock_##type(struct six_lock *lock) \
601 __six_unlock_type(lock, SIX_LOCK_##type); \
603 EXPORT_SYMBOL_GPL(six_unlock_##type);
611 /* Convert from intent to read: */
612 void six_lock_downgrade(struct six_lock *lock)
614 six_lock_increment(lock, SIX_LOCK_read);
615 six_unlock_intent(lock);
617 EXPORT_SYMBOL_GPL(six_lock_downgrade);
619 bool six_lock_tryupgrade(struct six_lock *lock)
621 union six_lock_state old, new;
622 u64 v = READ_ONCE(lock->state.v);
630 if (!lock->readers) {
631 EBUG_ON(!new.read_lock);
636 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
637 old.v, new.v)) != old.v);
640 this_cpu_dec(*lock->readers);
642 six_set_owner(lock, SIX_LOCK_intent, old);
646 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
648 bool six_trylock_convert(struct six_lock *lock,
649 enum six_lock_type from,
650 enum six_lock_type to)
652 EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
657 if (to == SIX_LOCK_read) {
658 six_lock_downgrade(lock);
661 return six_lock_tryupgrade(lock);
664 EXPORT_SYMBOL_GPL(six_trylock_convert);
667 * Increment read/intent lock count, assuming we already have it read or intent
670 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
672 const struct six_lock_vals l[] = LOCK_VALS;
674 six_acquire(&lock->dep_map, 0);
676 /* XXX: assert already locked, and that we don't overflow: */
681 this_cpu_inc(*lock->readers);
683 EBUG_ON(!lock->state.read_lock &&
684 !lock->state.intent_lock);
685 atomic64_add(l[type].lock_val, &lock->state.counter);
688 case SIX_LOCK_intent:
689 EBUG_ON(!lock->state.intent_lock);
690 lock->intent_lock_recurse++;
697 EXPORT_SYMBOL_GPL(six_lock_increment);
699 void six_lock_wakeup_all(struct six_lock *lock)
701 struct six_lock_waiter *w;
703 raw_spin_lock(&lock->wait_lock);
705 list_for_each_entry(w, &lock->wait_list[0], list)
706 wake_up_process(w->task);
707 list_for_each_entry(w, &lock->wait_list[1], list)
708 wake_up_process(w->task);
710 raw_spin_unlock(&lock->wait_lock);
712 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
714 struct free_pcpu_rcu {
719 static void free_pcpu_rcu_fn(struct rcu_head *_rcu)
721 struct free_pcpu_rcu *rcu =
722 container_of(_rcu, struct free_pcpu_rcu, rcu);
728 void six_lock_pcpu_free_rcu(struct six_lock *lock)
730 struct free_pcpu_rcu *rcu = kzalloc(sizeof(*rcu), GFP_KERNEL);
735 rcu->p = lock->readers;
736 lock->readers = NULL;
738 call_rcu(&rcu->rcu, free_pcpu_rcu_fn);
740 EXPORT_SYMBOL_GPL(six_lock_pcpu_free_rcu);
742 void six_lock_pcpu_free(struct six_lock *lock)
744 BUG_ON(lock->readers && pcpu_read_count(lock));
745 BUG_ON(lock->state.read_lock);
747 free_percpu(lock->readers);
748 lock->readers = NULL;
750 EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
752 void six_lock_pcpu_alloc(struct six_lock *lock)
756 lock->readers = alloc_percpu(unsigned);
759 EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);