]> git.sesse.net Git - bcachefs-tools-debian/blob - linux/six.c
5b81c3fc18be1df6302325ddd288550496b82d34
[bcachefs-tools-debian] / linux / six.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/six.h>
12 #include <linux/slab.h>
13
14 #include <trace/events/lock.h>
15
16 #ifdef DEBUG
17 #define EBUG_ON(cond)                   BUG_ON(cond)
18 #else
19 #define EBUG_ON(cond)                   do {} while (0)
20 #endif
21
22 #define six_acquire(l, t, r, ip)        lock_acquire(l, 0, t, r, 1, NULL, ip)
23 #define six_release(l, ip)              lock_release(l, ip)
24
25 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
26
27 #define SIX_LOCK_HELD_read_OFFSET       0
28 #define SIX_LOCK_HELD_read              ~(~0U << 26)
29 #define SIX_LOCK_HELD_intent            (1U << 26)
30 #define SIX_LOCK_HELD_write             (1U << 27)
31 #define SIX_LOCK_WAITING_read           (1U << (28 + SIX_LOCK_read))
32 #define SIX_LOCK_WAITING_intent         (1U << (28 + SIX_LOCK_intent))
33 #define SIX_LOCK_WAITING_write          (1U << (28 + SIX_LOCK_write))
34 #define SIX_LOCK_NOSPIN                 (1U << 31)
35
36 struct six_lock_vals {
37         /* Value we add to the lock in order to take the lock: */
38         u32                     lock_val;
39
40         /* If the lock has this value (used as a mask), taking the lock fails: */
41         u32                     lock_fail;
42
43         /* Mask that indicates lock is held for this type: */
44         u32                     held_mask;
45
46         /* Waitlist we wakeup when releasing the lock: */
47         enum six_lock_type      unlock_wakeup;
48 };
49
50 #define LOCK_VALS {                                                     \
51         [SIX_LOCK_read] = {                                             \
52                 .lock_val       = 1U << SIX_LOCK_HELD_read_OFFSET,      \
53                 .lock_fail      = SIX_LOCK_HELD_write,                  \
54                 .held_mask      = SIX_LOCK_HELD_read,                   \
55                 .unlock_wakeup  = SIX_LOCK_write,                       \
56         },                                                              \
57         [SIX_LOCK_intent] = {                                           \
58                 .lock_val       = SIX_LOCK_HELD_intent,                 \
59                 .lock_fail      = SIX_LOCK_HELD_intent,                 \
60                 .held_mask      = SIX_LOCK_HELD_intent,                 \
61                 .unlock_wakeup  = SIX_LOCK_intent,                      \
62         },                                                              \
63         [SIX_LOCK_write] = {                                            \
64                 .lock_val       = SIX_LOCK_HELD_write,                  \
65                 .lock_fail      = SIX_LOCK_HELD_read,                   \
66                 .held_mask      = SIX_LOCK_HELD_write,                  \
67                 .unlock_wakeup  = SIX_LOCK_read,                        \
68         },                                                              \
69 }
70
71 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
72 {
73         if ((atomic_read(&lock->state) & mask) != mask)
74                 atomic_or(mask, &lock->state);
75 }
76
77 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
78 {
79         if (atomic_read(&lock->state) & mask)
80                 atomic_and(~mask, &lock->state);
81 }
82
83 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
84                                  u32 old, struct task_struct *owner)
85 {
86         if (type != SIX_LOCK_intent)
87                 return;
88
89         if (!(old & SIX_LOCK_HELD_intent)) {
90                 EBUG_ON(lock->owner);
91                 lock->owner = owner;
92         } else {
93                 EBUG_ON(lock->owner != current);
94         }
95 }
96
97 static inline unsigned pcpu_read_count(struct six_lock *lock)
98 {
99         unsigned read_count = 0;
100         int cpu;
101
102         for_each_possible_cpu(cpu)
103                 read_count += *per_cpu_ptr(lock->readers, cpu);
104         return read_count;
105 }
106
107 /*
108  * __do_six_trylock() - main trylock routine
109  *
110  * Returns 1 on success, 0 on failure
111  *
112  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
113  * for anoter thread taking the competing lock type, and we may havve to do a
114  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
115  */
116 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
117                             struct task_struct *task, bool try)
118 {
119         const struct six_lock_vals l[] = LOCK_VALS;
120         int ret;
121         u32 old;
122
123         EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
124         EBUG_ON(type == SIX_LOCK_write &&
125                 (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
126
127         /*
128          * Percpu reader mode:
129          *
130          * The basic idea behind this algorithm is that you can implement a lock
131          * between two threads without any atomics, just memory barriers:
132          *
133          * For two threads you'll need two variables, one variable for "thread a
134          * has the lock" and another for "thread b has the lock".
135          *
136          * To take the lock, a thread sets its variable indicating that it holds
137          * the lock, then issues a full memory barrier, then reads from the
138          * other thread's variable to check if the other thread thinks it has
139          * the lock. If we raced, we backoff and retry/sleep.
140          *
141          * Failure to take the lock may cause a spurious trylock failure in
142          * another thread, because we temporarily set the lock to indicate that
143          * we held it. This would be a problem for a thread in six_lock(), when
144          * they are calling trylock after adding themself to the waitlist and
145          * prior to sleeping.
146          *
147          * Therefore, if we fail to get the lock, and there were waiters of the
148          * type we conflict with, we will have to issue a wakeup.
149          *
150          * Since we may be called under wait_lock (and by the wakeup code
151          * itself), we return that the wakeup has to be done instead of doing it
152          * here.
153          */
154         if (type == SIX_LOCK_read && lock->readers) {
155                 preempt_disable();
156                 this_cpu_inc(*lock->readers); /* signal that we own lock */
157
158                 smp_mb();
159
160                 old = atomic_read(&lock->state);
161                 ret = !(old & l[type].lock_fail);
162
163                 this_cpu_sub(*lock->readers, !ret);
164                 preempt_enable();
165
166                 if (!ret && (old & SIX_LOCK_WAITING_write))
167                         ret = -1 - SIX_LOCK_write;
168         } else if (type == SIX_LOCK_write && lock->readers) {
169                 if (try) {
170                         atomic_add(SIX_LOCK_HELD_write, &lock->state);
171                         smp_mb__after_atomic();
172                 }
173
174                 ret = !pcpu_read_count(lock);
175
176                 if (try && !ret) {
177                         old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
178                         if (old & SIX_LOCK_WAITING_read)
179                                 ret = -1 - SIX_LOCK_read;
180                 }
181         } else {
182                 old = atomic_read(&lock->state);
183                 do {
184                         ret = !(old & l[type].lock_fail);
185                         if (!ret || (type == SIX_LOCK_write && !try)) {
186                                 smp_mb();
187                                 break;
188                         }
189                 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
190
191                 EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
192         }
193
194         if (ret > 0)
195                 six_set_owner(lock, type, old, task);
196
197         EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
198                 (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
199
200         return ret;
201 }
202
203 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
204 {
205         struct six_lock_waiter *w, *next;
206         struct task_struct *task;
207         bool saw_one;
208         int ret;
209 again:
210         ret = 0;
211         saw_one = false;
212         raw_spin_lock(&lock->wait_lock);
213
214         list_for_each_entry_safe(w, next, &lock->wait_list, list) {
215                 if (w->lock_want != lock_type)
216                         continue;
217
218                 if (saw_one && lock_type != SIX_LOCK_read)
219                         goto unlock;
220                 saw_one = true;
221
222                 ret = __do_six_trylock(lock, lock_type, w->task, false);
223                 if (ret <= 0)
224                         goto unlock;
225
226                 __list_del(w->list.prev, w->list.next);
227                 task = w->task;
228                 /*
229                  * Do no writes to @w besides setting lock_acquired - otherwise
230                  * we would need a memory barrier:
231                  */
232                 barrier();
233                 w->lock_acquired = true;
234                 wake_up_process(task);
235         }
236
237         six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
238 unlock:
239         raw_spin_unlock(&lock->wait_lock);
240
241         if (ret < 0) {
242                 lock_type = -ret - 1;
243                 goto again;
244         }
245 }
246
247 __always_inline
248 static void six_lock_wakeup(struct six_lock *lock, u32 state,
249                             enum six_lock_type lock_type)
250 {
251         if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
252                 return;
253
254         if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
255                 return;
256
257         __six_lock_wakeup(lock, lock_type);
258 }
259
260 __always_inline
261 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
262 {
263         int ret;
264
265         ret = __do_six_trylock(lock, type, current, try);
266         if (ret < 0)
267                 __six_lock_wakeup(lock, -ret - 1);
268
269         return ret > 0;
270 }
271
272 /**
273  * six_trylock_ip - attempt to take a six lock without blocking
274  * @lock:       lock to take
275  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
276  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
277  *
278  * Return: true on success, false on failure.
279  */
280 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
281 {
282         if (!do_six_trylock(lock, type, true))
283                 return false;
284
285         if (type != SIX_LOCK_write)
286                 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
287         return true;
288 }
289 EXPORT_SYMBOL_GPL(six_trylock_ip);
290
291 /**
292  * six_relock_ip - attempt to re-take a lock that was held previously
293  * @lock:       lock to take
294  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
295  * @seq:        lock sequence number obtained from six_lock_seq() while lock was
296  *              held previously
297  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
298  *
299  * Return: true on success, false on failure.
300  */
301 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
302                    unsigned seq, unsigned long ip)
303 {
304         if (lock->seq != seq || !six_trylock_ip(lock, type, ip))
305                 return false;
306
307         if (lock->seq != seq) {
308                 six_unlock_ip(lock, type, ip);
309                 return false;
310         }
311
312         return true;
313 }
314 EXPORT_SYMBOL_GPL(six_relock_ip);
315
316 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
317
318 static inline bool six_can_spin_on_owner(struct six_lock *lock)
319 {
320         struct task_struct *owner;
321         bool ret;
322
323         if (need_resched())
324                 return false;
325
326         rcu_read_lock();
327         owner = READ_ONCE(lock->owner);
328         ret = !owner || owner_on_cpu(owner);
329         rcu_read_unlock();
330
331         return ret;
332 }
333
334 static inline bool six_spin_on_owner(struct six_lock *lock,
335                                      struct task_struct *owner,
336                                      u64 end_time)
337 {
338         bool ret = true;
339         unsigned loop = 0;
340
341         rcu_read_lock();
342         while (lock->owner == owner) {
343                 /*
344                  * Ensure we emit the owner->on_cpu, dereference _after_
345                  * checking lock->owner still matches owner. If that fails,
346                  * owner might point to freed memory. If it still matches,
347                  * the rcu_read_lock() ensures the memory stays valid.
348                  */
349                 barrier();
350
351                 if (!owner_on_cpu(owner) || need_resched()) {
352                         ret = false;
353                         break;
354                 }
355
356                 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
357                         six_set_bitmask(lock, SIX_LOCK_NOSPIN);
358                         ret = false;
359                         break;
360                 }
361
362                 cpu_relax();
363         }
364         rcu_read_unlock();
365
366         return ret;
367 }
368
369 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
370 {
371         struct task_struct *task = current;
372         u64 end_time;
373
374         if (type == SIX_LOCK_write)
375                 return false;
376
377         preempt_disable();
378         if (!six_can_spin_on_owner(lock))
379                 goto fail;
380
381         if (!osq_lock(&lock->osq))
382                 goto fail;
383
384         end_time = sched_clock() + 10 * NSEC_PER_USEC;
385
386         while (1) {
387                 struct task_struct *owner;
388
389                 /*
390                  * If there's an owner, wait for it to either
391                  * release the lock or go to sleep.
392                  */
393                 owner = READ_ONCE(lock->owner);
394                 if (owner && !six_spin_on_owner(lock, owner, end_time))
395                         break;
396
397                 if (do_six_trylock(lock, type, false)) {
398                         osq_unlock(&lock->osq);
399                         preempt_enable();
400                         return true;
401                 }
402
403                 /*
404                  * When there's no owner, we might have preempted between the
405                  * owner acquiring the lock and setting the owner field. If
406                  * we're an RT task that will live-lock because we won't let
407                  * the owner complete.
408                  */
409                 if (!owner && (need_resched() || rt_task(task)))
410                         break;
411
412                 /*
413                  * The cpu_relax() call is a compiler barrier which forces
414                  * everything in this loop to be re-loaded. We don't need
415                  * memory barriers as we'll eventually observe the right
416                  * values at the cost of a few extra spins.
417                  */
418                 cpu_relax();
419         }
420
421         osq_unlock(&lock->osq);
422 fail:
423         preempt_enable();
424
425         /*
426          * If we fell out of the spin path because of need_resched(),
427          * reschedule now, before we try-lock again. This avoids getting
428          * scheduled out right after we obtained the lock.
429          */
430         if (need_resched())
431                 schedule();
432
433         return false;
434 }
435
436 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
437
438 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
439 {
440         return false;
441 }
442
443 #endif
444
445 noinline
446 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
447                              struct six_lock_waiter *wait,
448                              six_lock_should_sleep_fn should_sleep_fn, void *p,
449                              unsigned long ip)
450 {
451         int ret = 0;
452
453         if (type == SIX_LOCK_write) {
454                 EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
455                 atomic_add(SIX_LOCK_HELD_write, &lock->state);
456                 smp_mb__after_atomic();
457         }
458
459         trace_contention_begin(lock, 0);
460         lock_contended(&lock->dep_map, ip);
461
462         if (six_optimistic_spin(lock, type))
463                 goto out;
464
465         wait->task              = current;
466         wait->lock_want         = type;
467         wait->lock_acquired     = false;
468
469         raw_spin_lock(&lock->wait_lock);
470         six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
471         /*
472          * Retry taking the lock after taking waitlist lock, in case we raced
473          * with an unlock:
474          */
475         ret = __do_six_trylock(lock, type, current, false);
476         if (ret <= 0) {
477                 wait->start_time = local_clock();
478
479                 if (!list_empty(&lock->wait_list)) {
480                         struct six_lock_waiter *last =
481                                 list_last_entry(&lock->wait_list,
482                                         struct six_lock_waiter, list);
483
484                         if (time_before_eq64(wait->start_time, last->start_time))
485                                 wait->start_time = last->start_time + 1;
486                 }
487
488                 list_add_tail(&wait->list, &lock->wait_list);
489         }
490         raw_spin_unlock(&lock->wait_lock);
491
492         if (unlikely(ret > 0)) {
493                 ret = 0;
494                 goto out;
495         }
496
497         if (unlikely(ret < 0)) {
498                 __six_lock_wakeup(lock, -ret - 1);
499                 ret = 0;
500         }
501
502         while (1) {
503                 set_current_state(TASK_UNINTERRUPTIBLE);
504
505                 if (wait->lock_acquired)
506                         break;
507
508                 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
509                 if (unlikely(ret)) {
510                         raw_spin_lock(&lock->wait_lock);
511                         if (!wait->lock_acquired)
512                                 list_del(&wait->list);
513                         raw_spin_unlock(&lock->wait_lock);
514
515                         if (unlikely(wait->lock_acquired))
516                                 do_six_unlock_type(lock, type);
517                         break;
518                 }
519
520                 schedule();
521         }
522
523         __set_current_state(TASK_RUNNING);
524 out:
525         if (ret && type == SIX_LOCK_write) {
526                 six_clear_bitmask(lock, SIX_LOCK_HELD_write);
527                 six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
528         }
529         trace_contention_end(lock, 0);
530
531         return ret;
532 }
533
534 /**
535  * six_lock_ip_waiter - take a lock, with full waitlist interface
536  * @lock:       lock to take
537  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
538  * @wait:       pointer to wait object, which will be added to lock's waitlist
539  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
540  *              to scheduling
541  * @p:          passed through to @should_sleep_fn
542  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
543  *
544  * This is the most general six_lock() variant, with parameters to support full
545  * cycle detection for deadlock avoidance.
546  *
547  * The code calling this function must implement tracking of held locks, and the
548  * @wait object should be embedded into the struct that tracks held locks -
549  * which must also be accessible in a thread-safe way.
550  *
551  * @should_sleep_fn should invoke the cycle detector; it should walk each
552  * lock's waiters, and for each waiter recursively walk their held locks.
553  *
554  * When this function must block, @wait will be added to @lock's waitlist before
555  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
556  * removed from the lock waitlist until the lock has been successfully acquired,
557  * or we abort.
558  *
559  * @wait.start_time will be monotonically increasing for any given waitlist, and
560  * thus may be used as a loop cursor.
561  *
562  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
563  */
564 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
565                        struct six_lock_waiter *wait,
566                        six_lock_should_sleep_fn should_sleep_fn, void *p,
567                        unsigned long ip)
568 {
569         int ret;
570
571         wait->start_time = 0;
572
573         if (type != SIX_LOCK_write)
574                 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
575
576         ret = do_six_trylock(lock, type, true) ? 0
577                 : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
578
579         if (ret && type != SIX_LOCK_write)
580                 six_release(&lock->dep_map, ip);
581         if (!ret)
582                 lock_acquired(&lock->dep_map, ip);
583
584         return ret;
585 }
586 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
587
588 __always_inline
589 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
590 {
591         const struct six_lock_vals l[] = LOCK_VALS;
592         u32 state;
593
594         if (type == SIX_LOCK_intent)
595                 lock->owner = NULL;
596
597         if (type == SIX_LOCK_read &&
598             lock->readers) {
599                 smp_mb(); /* unlock barrier */
600                 this_cpu_dec(*lock->readers);
601                 smp_mb(); /* between unlocking and checking for waiters */
602                 state = atomic_read(&lock->state);
603         } else {
604                 u32 v = l[type].lock_val;
605
606                 if (type != SIX_LOCK_read)
607                         v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
608
609                 EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
610                 state = atomic_sub_return_release(v, &lock->state);
611         }
612
613         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
614 }
615
616 /**
617  * six_unlock_ip - drop a six lock
618  * @lock:       lock to unlock
619  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
620  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
621  *
622  * When a lock is held multiple times (because six_lock_incement()) was used),
623  * this decrements the 'lock held' counter by one.
624  *
625  * For example:
626  * six_lock_read(&foo->lock);                           read count 1
627  * six_lock_increment(&foo->lock, SIX_LOCK_read);       read count 2
628  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 1
629  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 0
630  */
631 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
632 {
633         EBUG_ON(type == SIX_LOCK_write &&
634                 !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
635         EBUG_ON((type == SIX_LOCK_write ||
636                  type == SIX_LOCK_intent) &&
637                 lock->owner != current);
638
639         if (type != SIX_LOCK_write)
640                 six_release(&lock->dep_map, ip);
641
642         if (type == SIX_LOCK_intent &&
643             lock->intent_lock_recurse) {
644                 --lock->intent_lock_recurse;
645                 return;
646         }
647
648         lock->seq += type == SIX_LOCK_write;
649
650         do_six_unlock_type(lock, type);
651 }
652 EXPORT_SYMBOL_GPL(six_unlock_ip);
653
654 /**
655  * six_lock_downgrade - convert an intent lock to a read lock
656  * @lock:       lock to dowgrade
657  *
658  * @lock will have read count incremented and intent count decremented
659  */
660 void six_lock_downgrade(struct six_lock *lock)
661 {
662         six_lock_increment(lock, SIX_LOCK_read);
663         six_unlock_intent(lock);
664 }
665 EXPORT_SYMBOL_GPL(six_lock_downgrade);
666
667 /**
668  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
669  * @lock:       lock to upgrade
670  *
671  * On success, @lock will have intent count incremented and read count
672  * decremented
673  *
674  * Return: true on success, false on failure
675  */
676 bool six_lock_tryupgrade(struct six_lock *lock)
677 {
678         const struct six_lock_vals l[] = LOCK_VALS;
679         u32 old = atomic_read(&lock->state), new;
680
681         do {
682                 new = old;
683
684                 if (new & SIX_LOCK_HELD_intent)
685                         return false;
686
687                 if (!lock->readers) {
688                         EBUG_ON(!(new & SIX_LOCK_HELD_read));
689                         new -= l[SIX_LOCK_read].lock_val;
690                 }
691
692                 new |= SIX_LOCK_HELD_intent;
693         } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
694
695         if (lock->readers)
696                 this_cpu_dec(*lock->readers);
697
698         six_set_owner(lock, SIX_LOCK_intent, old, current);
699
700         return true;
701 }
702 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
703
704 /**
705  * six_trylock_convert - attempt to convert a held lock from one type to another
706  * @lock:       lock to upgrade
707  * @from:       SIX_LOCK_read or SIX_LOCK_intent
708  * @to:         SIX_LOCK_read or SIX_LOCK_intent
709  *
710  * On success, @lock will have intent count incremented and read count
711  * decremented
712  *
713  * Return: true on success, false on failure
714  */
715 bool six_trylock_convert(struct six_lock *lock,
716                          enum six_lock_type from,
717                          enum six_lock_type to)
718 {
719         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
720
721         if (to == from)
722                 return true;
723
724         if (to == SIX_LOCK_read) {
725                 six_lock_downgrade(lock);
726                 return true;
727         } else {
728                 return six_lock_tryupgrade(lock);
729         }
730 }
731 EXPORT_SYMBOL_GPL(six_trylock_convert);
732
733 /**
734  * six_lock_increment - increase held lock count on a lock that is already held
735  * @lock:       lock to increment
736  * @type:       SIX_LOCK_read or SIX_LOCK_intent
737  *
738  * @lock must already be held, with a lock type that is greater than or equal to
739  * @type
740  *
741  * A corresponding six_unlock_type() call will be required for @lock to be fully
742  * unlocked.
743  */
744 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
745 {
746         const struct six_lock_vals l[] = LOCK_VALS;
747
748         six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
749
750         /* XXX: assert already locked, and that we don't overflow: */
751
752         switch (type) {
753         case SIX_LOCK_read:
754                 if (lock->readers) {
755                         this_cpu_inc(*lock->readers);
756                 } else {
757                         EBUG_ON(!(atomic_read(&lock->state) &
758                                   (SIX_LOCK_HELD_read|
759                                    SIX_LOCK_HELD_intent)));
760                         atomic_add(l[type].lock_val, &lock->state);
761                 }
762                 break;
763         case SIX_LOCK_intent:
764                 EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
765                 lock->intent_lock_recurse++;
766                 break;
767         case SIX_LOCK_write:
768                 BUG();
769                 break;
770         }
771 }
772 EXPORT_SYMBOL_GPL(six_lock_increment);
773
774 /**
775  * six_lock_wakeup_all - wake up all waiters on @lock
776  * @lock:       lock to wake up waiters for
777  *
778  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
779  * abort the lock operation.
780  *
781  * This function is never needed in a bug-free program; it's only useful in
782  * debug code, e.g. to determine if a cycle detector is at fault.
783  */
784 void six_lock_wakeup_all(struct six_lock *lock)
785 {
786         u32 state = atomic_read(&lock->state);
787         struct six_lock_waiter *w;
788
789         six_lock_wakeup(lock, state, SIX_LOCK_read);
790         six_lock_wakeup(lock, state, SIX_LOCK_intent);
791         six_lock_wakeup(lock, state, SIX_LOCK_write);
792
793         raw_spin_lock(&lock->wait_lock);
794         list_for_each_entry(w, &lock->wait_list, list)
795                 wake_up_process(w->task);
796         raw_spin_unlock(&lock->wait_lock);
797 }
798 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
799
800 /**
801  * six_lock_counts - return held lock counts, for each lock type
802  * @lock:       lock to return counters for
803  *
804  * Return: the number of times a lock is held for read, intent and write.
805  */
806 struct six_lock_count six_lock_counts(struct six_lock *lock)
807 {
808         struct six_lock_count ret;
809
810         ret.n[SIX_LOCK_read]    = !lock->readers
811                 ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
812                 : pcpu_read_count(lock);
813         ret.n[SIX_LOCK_intent]  = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
814                 lock->intent_lock_recurse;
815         ret.n[SIX_LOCK_write]   = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
816
817         return ret;
818 }
819 EXPORT_SYMBOL_GPL(six_lock_counts);
820
821 /**
822  * six_lock_readers_add - directly manipulate reader count of a lock
823  * @lock:       lock to add/subtract readers for
824  * @nr:         reader count to add/subtract
825  *
826  * When an upper layer is implementing lock reentrency, we may have both read
827  * and intent locks on the same lock.
828  *
829  * When we need to take a write lock, the read locks will cause self-deadlock,
830  * because six locks themselves do not track which read locks are held by the
831  * current thread and which are held by a different thread - it does no
832  * per-thread tracking of held locks.
833  *
834  * The upper layer that is tracking held locks may however, if trylock() has
835  * failed, count up its own read locks, subtract them, take the write lock, and
836  * then re-add them.
837  *
838  * As in any other situation when taking a write lock, @lock must be held for
839  * intent one (or more) times, so @lock will never be left unlocked.
840  */
841 void six_lock_readers_add(struct six_lock *lock, int nr)
842 {
843         if (lock->readers) {
844                 this_cpu_add(*lock->readers, nr);
845         } else {
846                 EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
847                 /* reader count starts at bit 0 */
848                 atomic_add(nr, &lock->state);
849         }
850 }
851 EXPORT_SYMBOL_GPL(six_lock_readers_add);
852
853 /**
854  * six_lock_exit - release resources held by a lock prior to freeing
855  * @lock:       lock to exit
856  *
857  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
858  * required to free the percpu read counts.
859  */
860 void six_lock_exit(struct six_lock *lock)
861 {
862         WARN_ON(lock->readers && pcpu_read_count(lock));
863         WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
864
865         free_percpu(lock->readers);
866         lock->readers = NULL;
867 }
868 EXPORT_SYMBOL_GPL(six_lock_exit);
869
870 void __six_lock_init(struct six_lock *lock, const char *name,
871                      struct lock_class_key *key, enum six_lock_init_flags flags)
872 {
873         atomic_set(&lock->state, 0);
874         raw_spin_lock_init(&lock->wait_lock);
875         INIT_LIST_HEAD(&lock->wait_list);
876 #ifdef CONFIG_DEBUG_LOCK_ALLOC
877         debug_check_no_locks_freed((void *) lock, sizeof(*lock));
878         lockdep_init_map(&lock->dep_map, name, key, 0);
879 #endif
880
881         /*
882          * Don't assume that we have real percpu variables available in
883          * userspace:
884          */
885 #ifdef __KERNEL__
886         if (flags & SIX_LOCK_INIT_PCPU) {
887                 /*
888                  * We don't return an error here on memory allocation failure
889                  * since percpu is an optimization, and locks will work with the
890                  * same semantics in non-percpu mode: callers can check for
891                  * failure if they wish by checking lock->readers, but generally
892                  * will not want to treat it as an error.
893                  */
894                 lock->readers = alloc_percpu(unsigned);
895         }
896 #endif
897 }
898 EXPORT_SYMBOL_GPL(__six_lock_init);