]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/six.c
Update bcachefs sources to d464ec667b2b bcachefs: Add missing printk newlines
[bcachefs-tools-debian] / libbcachefs / six.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/sched/task.h>
12 #include <linux/slab.h>
13
14 #include <trace/events/lock.h>
15
16 #include "six.h"
17
18 #ifdef DEBUG
19 #define EBUG_ON(cond)                   BUG_ON(cond)
20 #else
21 #define EBUG_ON(cond)                   do {} while (0)
22 #endif
23
24 #define six_acquire(l, t, r, ip)        lock_acquire(l, 0, t, r, 1, NULL, ip)
25 #define six_release(l, ip)              lock_release(l, ip)
26
27 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
28
29 #define SIX_LOCK_HELD_read_OFFSET       0
30 #define SIX_LOCK_HELD_read              ~(~0U << 26)
31 #define SIX_LOCK_HELD_intent            (1U << 26)
32 #define SIX_LOCK_HELD_write             (1U << 27)
33 #define SIX_LOCK_WAITING_read           (1U << (28 + SIX_LOCK_read))
34 #define SIX_LOCK_WAITING_write          (1U << (28 + SIX_LOCK_write))
35 #define SIX_LOCK_NOSPIN                 (1U << 31)
36
37 struct six_lock_vals {
38         /* Value we add to the lock in order to take the lock: */
39         u32                     lock_val;
40
41         /* If the lock has this value (used as a mask), taking the lock fails: */
42         u32                     lock_fail;
43
44         /* Mask that indicates lock is held for this type: */
45         u32                     held_mask;
46
47         /* Waitlist we wakeup when releasing the lock: */
48         enum six_lock_type      unlock_wakeup;
49 };
50
51 static const struct six_lock_vals l[] = {
52         [SIX_LOCK_read] = {
53                 .lock_val       = 1U << SIX_LOCK_HELD_read_OFFSET,
54                 .lock_fail      = SIX_LOCK_HELD_write,
55                 .held_mask      = SIX_LOCK_HELD_read,
56                 .unlock_wakeup  = SIX_LOCK_write,
57         },
58         [SIX_LOCK_intent] = {
59                 .lock_val       = SIX_LOCK_HELD_intent,
60                 .lock_fail      = SIX_LOCK_HELD_intent,
61                 .held_mask      = SIX_LOCK_HELD_intent,
62                 .unlock_wakeup  = SIX_LOCK_intent,
63         },
64         [SIX_LOCK_write] = {
65                 .lock_val       = SIX_LOCK_HELD_write,
66                 .lock_fail      = SIX_LOCK_HELD_read,
67                 .held_mask      = SIX_LOCK_HELD_write,
68                 .unlock_wakeup  = SIX_LOCK_read,
69         },
70 };
71
72 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
73 {
74         if ((atomic_read(&lock->state) & mask) != mask)
75                 atomic_or(mask, &lock->state);
76 }
77
78 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
79 {
80         if (atomic_read(&lock->state) & mask)
81                 atomic_and(~mask, &lock->state);
82 }
83
84 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
85                                  u32 old, struct task_struct *owner)
86 {
87         if (type != SIX_LOCK_intent)
88                 return;
89
90         if (!(old & SIX_LOCK_HELD_intent)) {
91                 EBUG_ON(lock->owner);
92                 lock->owner = owner;
93         } else {
94                 EBUG_ON(lock->owner != current);
95         }
96 }
97
98 static inline unsigned pcpu_read_count(struct six_lock *lock)
99 {
100         unsigned read_count = 0;
101         int cpu;
102
103         for_each_possible_cpu(cpu)
104                 read_count += *per_cpu_ptr(lock->readers, cpu);
105         return read_count;
106 }
107
108 /*
109  * __do_six_trylock() - main trylock routine
110  *
111  * Returns 1 on success, 0 on failure
112  *
113  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
114  * for anoter thread taking the competing lock type, and we may havve to do a
115  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
116  */
117 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
118                             struct task_struct *task, bool try)
119 {
120         int ret;
121         u32 old;
122
123         EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
124         EBUG_ON(type == SIX_LOCK_write &&
125                 (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
126
127         /*
128          * Percpu reader mode:
129          *
130          * The basic idea behind this algorithm is that you can implement a lock
131          * between two threads without any atomics, just memory barriers:
132          *
133          * For two threads you'll need two variables, one variable for "thread a
134          * has the lock" and another for "thread b has the lock".
135          *
136          * To take the lock, a thread sets its variable indicating that it holds
137          * the lock, then issues a full memory barrier, then reads from the
138          * other thread's variable to check if the other thread thinks it has
139          * the lock. If we raced, we backoff and retry/sleep.
140          *
141          * Failure to take the lock may cause a spurious trylock failure in
142          * another thread, because we temporarily set the lock to indicate that
143          * we held it. This would be a problem for a thread in six_lock(), when
144          * they are calling trylock after adding themself to the waitlist and
145          * prior to sleeping.
146          *
147          * Therefore, if we fail to get the lock, and there were waiters of the
148          * type we conflict with, we will have to issue a wakeup.
149          *
150          * Since we may be called under wait_lock (and by the wakeup code
151          * itself), we return that the wakeup has to be done instead of doing it
152          * here.
153          */
154         if (type == SIX_LOCK_read && lock->readers) {
155                 preempt_disable();
156                 this_cpu_inc(*lock->readers); /* signal that we own lock */
157
158                 smp_mb();
159
160                 old = atomic_read(&lock->state);
161                 ret = !(old & l[type].lock_fail);
162
163                 this_cpu_sub(*lock->readers, !ret);
164                 preempt_enable();
165
166                 if (!ret && (old & SIX_LOCK_WAITING_write))
167                         ret = -1 - SIX_LOCK_write;
168         } else if (type == SIX_LOCK_write && lock->readers) {
169                 if (try) {
170                         atomic_add(SIX_LOCK_HELD_write, &lock->state);
171                         smp_mb__after_atomic();
172                 }
173
174                 ret = !pcpu_read_count(lock);
175
176                 if (try && !ret) {
177                         old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
178                         if (old & SIX_LOCK_WAITING_read)
179                                 ret = -1 - SIX_LOCK_read;
180                 }
181         } else {
182                 old = atomic_read(&lock->state);
183                 do {
184                         ret = !(old & l[type].lock_fail);
185                         if (!ret || (type == SIX_LOCK_write && !try)) {
186                                 smp_mb();
187                                 break;
188                         }
189                 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
190
191                 EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
192         }
193
194         if (ret > 0)
195                 six_set_owner(lock, type, old, task);
196
197         EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
198                 (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
199
200         return ret;
201 }
202
203 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
204 {
205         struct six_lock_waiter *w, *next;
206         struct task_struct *task;
207         bool saw_one;
208         int ret;
209 again:
210         ret = 0;
211         saw_one = false;
212         raw_spin_lock(&lock->wait_lock);
213
214         list_for_each_entry_safe(w, next, &lock->wait_list, list) {
215                 if (w->lock_want != lock_type)
216                         continue;
217
218                 if (saw_one && lock_type != SIX_LOCK_read)
219                         goto unlock;
220                 saw_one = true;
221
222                 ret = __do_six_trylock(lock, lock_type, w->task, false);
223                 if (ret <= 0)
224                         goto unlock;
225
226                 /*
227                  * Similar to percpu_rwsem_wake_function(), we need to guard
228                  * against the wakee noticing w->lock_acquired, returning, and
229                  * then exiting before we do the wakeup:
230                  */
231                 task = get_task_struct(w->task);
232                 __list_del(w->list.prev, w->list.next);
233                 /*
234                  * The release barrier here ensures the ordering of the
235                  * __list_del before setting w->lock_acquired; @w is on the
236                  * stack of the thread doing the waiting and will be reused
237                  * after it sees w->lock_acquired with no other locking:
238                  * pairs with smp_load_acquire() in six_lock_slowpath()
239                  */
240                 smp_store_release(&w->lock_acquired, true);
241                 wake_up_process(task);
242                 put_task_struct(task);
243         }
244
245         six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
246 unlock:
247         raw_spin_unlock(&lock->wait_lock);
248
249         if (ret < 0) {
250                 lock_type = -ret - 1;
251                 goto again;
252         }
253 }
254
255 __always_inline
256 static void six_lock_wakeup(struct six_lock *lock, u32 state,
257                             enum six_lock_type lock_type)
258 {
259         if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
260                 return;
261
262         if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
263                 return;
264
265         __six_lock_wakeup(lock, lock_type);
266 }
267
268 __always_inline
269 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
270 {
271         int ret;
272
273         ret = __do_six_trylock(lock, type, current, try);
274         if (ret < 0)
275                 __six_lock_wakeup(lock, -ret - 1);
276
277         return ret > 0;
278 }
279
280 /**
281  * six_trylock_ip - attempt to take a six lock without blocking
282  * @lock:       lock to take
283  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
284  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
285  *
286  * Return: true on success, false on failure.
287  */
288 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
289 {
290         if (!do_six_trylock(lock, type, true))
291                 return false;
292
293         if (type != SIX_LOCK_write)
294                 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
295         return true;
296 }
297 EXPORT_SYMBOL_GPL(six_trylock_ip);
298
299 /**
300  * six_relock_ip - attempt to re-take a lock that was held previously
301  * @lock:       lock to take
302  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
303  * @seq:        lock sequence number obtained from six_lock_seq() while lock was
304  *              held previously
305  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
306  *
307  * Return: true on success, false on failure.
308  */
309 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
310                    unsigned seq, unsigned long ip)
311 {
312         if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
313                 return false;
314
315         if (six_lock_seq(lock) != seq) {
316                 six_unlock_ip(lock, type, ip);
317                 return false;
318         }
319
320         return true;
321 }
322 EXPORT_SYMBOL_GPL(six_relock_ip);
323
324 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
325
326 static inline bool six_owner_running(struct six_lock *lock)
327 {
328         /*
329          * When there's no owner, we might have preempted between the owner
330          * acquiring the lock and setting the owner field. If we're an RT task
331          * that will live-lock because we won't let the owner complete.
332          */
333         rcu_read_lock();
334         struct task_struct *owner = READ_ONCE(lock->owner);
335         bool ret = owner ? owner_on_cpu(owner) : !rt_task(current);
336         rcu_read_unlock();
337
338         return ret;
339 }
340
341 static inline bool six_optimistic_spin(struct six_lock *lock,
342                                        struct six_lock_waiter *wait,
343                                        enum six_lock_type type)
344 {
345         unsigned loop = 0;
346         u64 end_time;
347
348         if (type == SIX_LOCK_write)
349                 return false;
350
351         if (lock->wait_list.next != &wait->list)
352                 return false;
353
354         if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN)
355                 return false;
356
357         preempt_disable();
358         end_time = sched_clock() + 10 * NSEC_PER_USEC;
359
360         while (!need_resched() && six_owner_running(lock)) {
361                 /*
362                  * Ensures that writes to the waitlist entry happen after we see
363                  * wait->lock_acquired: pairs with the smp_store_release in
364                  * __six_lock_wakeup
365                  */
366                 if (smp_load_acquire(&wait->lock_acquired)) {
367                         preempt_enable();
368                         return true;
369                 }
370
371                 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
372                         six_set_bitmask(lock, SIX_LOCK_NOSPIN);
373                         break;
374                 }
375
376                 /*
377                  * The cpu_relax() call is a compiler barrier which forces
378                  * everything in this loop to be re-loaded. We don't need
379                  * memory barriers as we'll eventually observe the right
380                  * values at the cost of a few extra spins.
381                  */
382                 cpu_relax();
383         }
384
385         preempt_enable();
386         return false;
387 }
388
389 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
390
391 static inline bool six_optimistic_spin(struct six_lock *lock,
392                                        struct six_lock_waiter *wait,
393                                        enum six_lock_type type)
394 {
395         return false;
396 }
397
398 #endif
399
400 noinline
401 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
402                              struct six_lock_waiter *wait,
403                              six_lock_should_sleep_fn should_sleep_fn, void *p,
404                              unsigned long ip)
405 {
406         int ret = 0;
407
408         if (type == SIX_LOCK_write) {
409                 EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
410                 atomic_add(SIX_LOCK_HELD_write, &lock->state);
411                 smp_mb__after_atomic();
412         }
413
414         trace_contention_begin(lock, 0);
415         lock_contended(&lock->dep_map, ip);
416
417         wait->task              = current;
418         wait->lock_want         = type;
419         wait->lock_acquired     = false;
420
421         raw_spin_lock(&lock->wait_lock);
422         six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
423         /*
424          * Retry taking the lock after taking waitlist lock, in case we raced
425          * with an unlock:
426          */
427         ret = __do_six_trylock(lock, type, current, false);
428         if (ret <= 0) {
429                 wait->start_time = local_clock();
430
431                 if (!list_empty(&lock->wait_list)) {
432                         struct six_lock_waiter *last =
433                                 list_last_entry(&lock->wait_list,
434                                         struct six_lock_waiter, list);
435
436                         if (time_before_eq64(wait->start_time, last->start_time))
437                                 wait->start_time = last->start_time + 1;
438                 }
439
440                 list_add_tail(&wait->list, &lock->wait_list);
441         }
442         raw_spin_unlock(&lock->wait_lock);
443
444         if (unlikely(ret > 0)) {
445                 ret = 0;
446                 goto out;
447         }
448
449         if (unlikely(ret < 0)) {
450                 __six_lock_wakeup(lock, -ret - 1);
451                 ret = 0;
452         }
453
454         if (six_optimistic_spin(lock, wait, type))
455                 goto out;
456
457         while (1) {
458                 set_current_state(TASK_UNINTERRUPTIBLE);
459
460                 /*
461                  * Ensures that writes to the waitlist entry happen after we see
462                  * wait->lock_acquired: pairs with the smp_store_release in
463                  * __six_lock_wakeup
464                  */
465                 if (smp_load_acquire(&wait->lock_acquired))
466                         break;
467
468                 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
469                 if (unlikely(ret)) {
470                         bool acquired;
471
472                         /*
473                          * If should_sleep_fn() returns an error, we are
474                          * required to return that error even if we already
475                          * acquired the lock - should_sleep_fn() might have
476                          * modified external state (e.g. when the deadlock cycle
477                          * detector in bcachefs issued a transaction restart)
478                          */
479                         raw_spin_lock(&lock->wait_lock);
480                         acquired = wait->lock_acquired;
481                         if (!acquired)
482                                 list_del(&wait->list);
483                         raw_spin_unlock(&lock->wait_lock);
484
485                         if (unlikely(acquired))
486                                 do_six_unlock_type(lock, type);
487                         break;
488                 }
489
490                 schedule();
491         }
492
493         __set_current_state(TASK_RUNNING);
494 out:
495         if (ret && type == SIX_LOCK_write) {
496                 six_clear_bitmask(lock, SIX_LOCK_HELD_write);
497                 six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
498         }
499         trace_contention_end(lock, 0);
500
501         return ret;
502 }
503
504 /**
505  * six_lock_ip_waiter - take a lock, with full waitlist interface
506  * @lock:       lock to take
507  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
508  * @wait:       pointer to wait object, which will be added to lock's waitlist
509  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
510  *              to scheduling
511  * @p:          passed through to @should_sleep_fn
512  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
513  *
514  * This is the most general six_lock() variant, with parameters to support full
515  * cycle detection for deadlock avoidance.
516  *
517  * The code calling this function must implement tracking of held locks, and the
518  * @wait object should be embedded into the struct that tracks held locks -
519  * which must also be accessible in a thread-safe way.
520  *
521  * @should_sleep_fn should invoke the cycle detector; it should walk each
522  * lock's waiters, and for each waiter recursively walk their held locks.
523  *
524  * When this function must block, @wait will be added to @lock's waitlist before
525  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
526  * removed from the lock waitlist until the lock has been successfully acquired,
527  * or we abort.
528  *
529  * @wait.start_time will be monotonically increasing for any given waitlist, and
530  * thus may be used as a loop cursor.
531  *
532  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
533  */
534 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
535                        struct six_lock_waiter *wait,
536                        six_lock_should_sleep_fn should_sleep_fn, void *p,
537                        unsigned long ip)
538 {
539         int ret;
540
541         wait->start_time = 0;
542
543         if (type != SIX_LOCK_write)
544                 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
545
546         ret = do_six_trylock(lock, type, true) ? 0
547                 : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
548
549         if (ret && type != SIX_LOCK_write)
550                 six_release(&lock->dep_map, ip);
551         if (!ret)
552                 lock_acquired(&lock->dep_map, ip);
553
554         return ret;
555 }
556 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
557
558 __always_inline
559 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
560 {
561         u32 state;
562
563         if (type == SIX_LOCK_intent)
564                 lock->owner = NULL;
565
566         if (type == SIX_LOCK_read &&
567             lock->readers) {
568                 smp_mb(); /* unlock barrier */
569                 this_cpu_dec(*lock->readers);
570                 smp_mb(); /* between unlocking and checking for waiters */
571                 state = atomic_read(&lock->state);
572         } else {
573                 u32 v = l[type].lock_val;
574
575                 if (type != SIX_LOCK_read)
576                         v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
577
578                 EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
579                 state = atomic_sub_return_release(v, &lock->state);
580         }
581
582         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
583 }
584
585 /**
586  * six_unlock_ip - drop a six lock
587  * @lock:       lock to unlock
588  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
589  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
590  *
591  * When a lock is held multiple times (because six_lock_incement()) was used),
592  * this decrements the 'lock held' counter by one.
593  *
594  * For example:
595  * six_lock_read(&foo->lock);                           read count 1
596  * six_lock_increment(&foo->lock, SIX_LOCK_read);       read count 2
597  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 1
598  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 0
599  */
600 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
601 {
602         EBUG_ON(type == SIX_LOCK_write &&
603                 !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
604         EBUG_ON((type == SIX_LOCK_write ||
605                  type == SIX_LOCK_intent) &&
606                 lock->owner != current);
607
608         if (type != SIX_LOCK_write)
609                 six_release(&lock->dep_map, ip);
610         else
611                 lock->seq++;
612
613         if (type == SIX_LOCK_intent &&
614             lock->intent_lock_recurse) {
615                 --lock->intent_lock_recurse;
616                 return;
617         }
618
619         do_six_unlock_type(lock, type);
620 }
621 EXPORT_SYMBOL_GPL(six_unlock_ip);
622
623 /**
624  * six_lock_downgrade - convert an intent lock to a read lock
625  * @lock:       lock to dowgrade
626  *
627  * @lock will have read count incremented and intent count decremented
628  */
629 void six_lock_downgrade(struct six_lock *lock)
630 {
631         six_lock_increment(lock, SIX_LOCK_read);
632         six_unlock_intent(lock);
633 }
634 EXPORT_SYMBOL_GPL(six_lock_downgrade);
635
636 /**
637  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
638  * @lock:       lock to upgrade
639  *
640  * On success, @lock will have intent count incremented and read count
641  * decremented
642  *
643  * Return: true on success, false on failure
644  */
645 bool six_lock_tryupgrade(struct six_lock *lock)
646 {
647         u32 old = atomic_read(&lock->state), new;
648
649         do {
650                 new = old;
651
652                 if (new & SIX_LOCK_HELD_intent)
653                         return false;
654
655                 if (!lock->readers) {
656                         EBUG_ON(!(new & SIX_LOCK_HELD_read));
657                         new -= l[SIX_LOCK_read].lock_val;
658                 }
659
660                 new |= SIX_LOCK_HELD_intent;
661         } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
662
663         if (lock->readers)
664                 this_cpu_dec(*lock->readers);
665
666         six_set_owner(lock, SIX_LOCK_intent, old, current);
667
668         return true;
669 }
670 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
671
672 /**
673  * six_trylock_convert - attempt to convert a held lock from one type to another
674  * @lock:       lock to upgrade
675  * @from:       SIX_LOCK_read or SIX_LOCK_intent
676  * @to:         SIX_LOCK_read or SIX_LOCK_intent
677  *
678  * On success, @lock will have intent count incremented and read count
679  * decremented
680  *
681  * Return: true on success, false on failure
682  */
683 bool six_trylock_convert(struct six_lock *lock,
684                          enum six_lock_type from,
685                          enum six_lock_type to)
686 {
687         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
688
689         if (to == from)
690                 return true;
691
692         if (to == SIX_LOCK_read) {
693                 six_lock_downgrade(lock);
694                 return true;
695         } else {
696                 return six_lock_tryupgrade(lock);
697         }
698 }
699 EXPORT_SYMBOL_GPL(six_trylock_convert);
700
701 /**
702  * six_lock_increment - increase held lock count on a lock that is already held
703  * @lock:       lock to increment
704  * @type:       SIX_LOCK_read or SIX_LOCK_intent
705  *
706  * @lock must already be held, with a lock type that is greater than or equal to
707  * @type
708  *
709  * A corresponding six_unlock_type() call will be required for @lock to be fully
710  * unlocked.
711  */
712 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
713 {
714         six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
715
716         /* XXX: assert already locked, and that we don't overflow: */
717
718         switch (type) {
719         case SIX_LOCK_read:
720                 if (lock->readers) {
721                         this_cpu_inc(*lock->readers);
722                 } else {
723                         EBUG_ON(!(atomic_read(&lock->state) &
724                                   (SIX_LOCK_HELD_read|
725                                    SIX_LOCK_HELD_intent)));
726                         atomic_add(l[type].lock_val, &lock->state);
727                 }
728                 break;
729         case SIX_LOCK_intent:
730                 EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
731                 lock->intent_lock_recurse++;
732                 break;
733         case SIX_LOCK_write:
734                 BUG();
735                 break;
736         }
737 }
738 EXPORT_SYMBOL_GPL(six_lock_increment);
739
740 /**
741  * six_lock_wakeup_all - wake up all waiters on @lock
742  * @lock:       lock to wake up waiters for
743  *
744  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
745  * abort the lock operation.
746  *
747  * This function is never needed in a bug-free program; it's only useful in
748  * debug code, e.g. to determine if a cycle detector is at fault.
749  */
750 void six_lock_wakeup_all(struct six_lock *lock)
751 {
752         u32 state = atomic_read(&lock->state);
753         struct six_lock_waiter *w;
754
755         six_lock_wakeup(lock, state, SIX_LOCK_read);
756         six_lock_wakeup(lock, state, SIX_LOCK_intent);
757         six_lock_wakeup(lock, state, SIX_LOCK_write);
758
759         raw_spin_lock(&lock->wait_lock);
760         list_for_each_entry(w, &lock->wait_list, list)
761                 wake_up_process(w->task);
762         raw_spin_unlock(&lock->wait_lock);
763 }
764 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
765
766 /**
767  * six_lock_counts - return held lock counts, for each lock type
768  * @lock:       lock to return counters for
769  *
770  * Return: the number of times a lock is held for read, intent and write.
771  */
772 struct six_lock_count six_lock_counts(struct six_lock *lock)
773 {
774         struct six_lock_count ret;
775
776         ret.n[SIX_LOCK_read]    = !lock->readers
777                 ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
778                 : pcpu_read_count(lock);
779         ret.n[SIX_LOCK_intent]  = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
780                 lock->intent_lock_recurse;
781         ret.n[SIX_LOCK_write]   = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
782
783         return ret;
784 }
785 EXPORT_SYMBOL_GPL(six_lock_counts);
786
787 /**
788  * six_lock_readers_add - directly manipulate reader count of a lock
789  * @lock:       lock to add/subtract readers for
790  * @nr:         reader count to add/subtract
791  *
792  * When an upper layer is implementing lock reentrency, we may have both read
793  * and intent locks on the same lock.
794  *
795  * When we need to take a write lock, the read locks will cause self-deadlock,
796  * because six locks themselves do not track which read locks are held by the
797  * current thread and which are held by a different thread - it does no
798  * per-thread tracking of held locks.
799  *
800  * The upper layer that is tracking held locks may however, if trylock() has
801  * failed, count up its own read locks, subtract them, take the write lock, and
802  * then re-add them.
803  *
804  * As in any other situation when taking a write lock, @lock must be held for
805  * intent one (or more) times, so @lock will never be left unlocked.
806  */
807 void six_lock_readers_add(struct six_lock *lock, int nr)
808 {
809         if (lock->readers) {
810                 this_cpu_add(*lock->readers, nr);
811         } else {
812                 EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
813                 /* reader count starts at bit 0 */
814                 atomic_add(nr, &lock->state);
815         }
816 }
817 EXPORT_SYMBOL_GPL(six_lock_readers_add);
818
819 /**
820  * six_lock_exit - release resources held by a lock prior to freeing
821  * @lock:       lock to exit
822  *
823  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
824  * required to free the percpu read counts.
825  */
826 void six_lock_exit(struct six_lock *lock)
827 {
828         WARN_ON(lock->readers && pcpu_read_count(lock));
829         WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
830
831         free_percpu(lock->readers);
832         lock->readers = NULL;
833 }
834 EXPORT_SYMBOL_GPL(six_lock_exit);
835
836 void __six_lock_init(struct six_lock *lock, const char *name,
837                      struct lock_class_key *key, enum six_lock_init_flags flags)
838 {
839         atomic_set(&lock->state, 0);
840         raw_spin_lock_init(&lock->wait_lock);
841         INIT_LIST_HEAD(&lock->wait_list);
842 #ifdef CONFIG_DEBUG_LOCK_ALLOC
843         debug_check_no_locks_freed((void *) lock, sizeof(*lock));
844         lockdep_init_map(&lock->dep_map, name, key, 0);
845 #endif
846
847         /*
848          * Don't assume that we have real percpu variables available in
849          * userspace:
850          */
851 #ifdef __KERNEL__
852         if (flags & SIX_LOCK_INIT_PCPU) {
853                 /*
854                  * We don't return an error here on memory allocation failure
855                  * since percpu is an optimization, and locks will work with the
856                  * same semantics in non-percpu mode: callers can check for
857                  * failure if they wish by checking lock->readers, but generally
858                  * will not want to treat it as an error.
859                  */
860                 lock->readers = alloc_percpu(unsigned);
861         }
862 #endif
863 }
864 EXPORT_SYMBOL_GPL(__six_lock_init);