]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/six.c
Update bcachefs sources to bed61fae3b bcachefs: Delete a faulty assertion
[bcachefs-tools-debian] / libbcachefs / six.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/sched/task.h>
12 #include <linux/slab.h>
13
14 #include <trace/events/lock.h>
15
16 #include "six.h"
17
18 #ifdef DEBUG
19 #define EBUG_ON(cond)                   BUG_ON(cond)
20 #else
21 #define EBUG_ON(cond)                   do {} while (0)
22 #endif
23
24 #define six_acquire(l, t, r, ip)        lock_acquire(l, 0, t, r, 1, NULL, ip)
25 #define six_release(l, ip)              lock_release(l, ip)
26
27 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
28
29 #define SIX_LOCK_HELD_read_OFFSET       0
30 #define SIX_LOCK_HELD_read              ~(~0U << 26)
31 #define SIX_LOCK_HELD_intent            (1U << 26)
32 #define SIX_LOCK_HELD_write             (1U << 27)
33 #define SIX_LOCK_WAITING_read           (1U << (28 + SIX_LOCK_read))
34 #define SIX_LOCK_WAITING_intent         (1U << (28 + SIX_LOCK_intent))
35 #define SIX_LOCK_WAITING_write          (1U << (28 + SIX_LOCK_write))
36 #define SIX_LOCK_NOSPIN                 (1U << 31)
37
38 struct six_lock_vals {
39         /* Value we add to the lock in order to take the lock: */
40         u32                     lock_val;
41
42         /* If the lock has this value (used as a mask), taking the lock fails: */
43         u32                     lock_fail;
44
45         /* Mask that indicates lock is held for this type: */
46         u32                     held_mask;
47
48         /* Waitlist we wakeup when releasing the lock: */
49         enum six_lock_type      unlock_wakeup;
50 };
51
52 static const struct six_lock_vals l[] = {
53         [SIX_LOCK_read] = {
54                 .lock_val       = 1U << SIX_LOCK_HELD_read_OFFSET,
55                 .lock_fail      = SIX_LOCK_HELD_write,
56                 .held_mask      = SIX_LOCK_HELD_read,
57                 .unlock_wakeup  = SIX_LOCK_write,
58         },
59         [SIX_LOCK_intent] = {
60                 .lock_val       = SIX_LOCK_HELD_intent,
61                 .lock_fail      = SIX_LOCK_HELD_intent,
62                 .held_mask      = SIX_LOCK_HELD_intent,
63                 .unlock_wakeup  = SIX_LOCK_intent,
64         },
65         [SIX_LOCK_write] = {
66                 .lock_val       = SIX_LOCK_HELD_write,
67                 .lock_fail      = SIX_LOCK_HELD_read,
68                 .held_mask      = SIX_LOCK_HELD_write,
69                 .unlock_wakeup  = SIX_LOCK_read,
70         },
71 };
72
73 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
74 {
75         if ((atomic_read(&lock->state) & mask) != mask)
76                 atomic_or(mask, &lock->state);
77 }
78
79 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
80 {
81         if (atomic_read(&lock->state) & mask)
82                 atomic_and(~mask, &lock->state);
83 }
84
85 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
86                                  u32 old, struct task_struct *owner)
87 {
88         if (type != SIX_LOCK_intent)
89                 return;
90
91         if (!(old & SIX_LOCK_HELD_intent)) {
92                 EBUG_ON(lock->owner);
93                 lock->owner = owner;
94         } else {
95                 EBUG_ON(lock->owner != current);
96         }
97 }
98
99 static inline unsigned pcpu_read_count(struct six_lock *lock)
100 {
101         unsigned read_count = 0;
102         int cpu;
103
104         for_each_possible_cpu(cpu)
105                 read_count += *per_cpu_ptr(lock->readers, cpu);
106         return read_count;
107 }
108
109 /*
110  * __do_six_trylock() - main trylock routine
111  *
112  * Returns 1 on success, 0 on failure
113  *
114  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
115  * for anoter thread taking the competing lock type, and we may havve to do a
116  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
117  */
118 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
119                             struct task_struct *task, bool try)
120 {
121         int ret;
122         u32 old;
123
124         EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
125         EBUG_ON(type == SIX_LOCK_write &&
126                 (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
127
128         /*
129          * Percpu reader mode:
130          *
131          * The basic idea behind this algorithm is that you can implement a lock
132          * between two threads without any atomics, just memory barriers:
133          *
134          * For two threads you'll need two variables, one variable for "thread a
135          * has the lock" and another for "thread b has the lock".
136          *
137          * To take the lock, a thread sets its variable indicating that it holds
138          * the lock, then issues a full memory barrier, then reads from the
139          * other thread's variable to check if the other thread thinks it has
140          * the lock. If we raced, we backoff and retry/sleep.
141          *
142          * Failure to take the lock may cause a spurious trylock failure in
143          * another thread, because we temporarily set the lock to indicate that
144          * we held it. This would be a problem for a thread in six_lock(), when
145          * they are calling trylock after adding themself to the waitlist and
146          * prior to sleeping.
147          *
148          * Therefore, if we fail to get the lock, and there were waiters of the
149          * type we conflict with, we will have to issue a wakeup.
150          *
151          * Since we may be called under wait_lock (and by the wakeup code
152          * itself), we return that the wakeup has to be done instead of doing it
153          * here.
154          */
155         if (type == SIX_LOCK_read && lock->readers) {
156                 preempt_disable();
157                 this_cpu_inc(*lock->readers); /* signal that we own lock */
158
159                 smp_mb();
160
161                 old = atomic_read(&lock->state);
162                 ret = !(old & l[type].lock_fail);
163
164                 this_cpu_sub(*lock->readers, !ret);
165                 preempt_enable();
166
167                 if (!ret && (old & SIX_LOCK_WAITING_write))
168                         ret = -1 - SIX_LOCK_write;
169         } else if (type == SIX_LOCK_write && lock->readers) {
170                 if (try) {
171                         atomic_add(SIX_LOCK_HELD_write, &lock->state);
172                         smp_mb__after_atomic();
173                 }
174
175                 ret = !pcpu_read_count(lock);
176
177                 if (try && !ret) {
178                         old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
179                         if (old & SIX_LOCK_WAITING_read)
180                                 ret = -1 - SIX_LOCK_read;
181                 }
182         } else {
183                 old = atomic_read(&lock->state);
184                 do {
185                         ret = !(old & l[type].lock_fail);
186                         if (!ret || (type == SIX_LOCK_write && !try)) {
187                                 smp_mb();
188                                 break;
189                         }
190                 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
191
192                 EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
193         }
194
195         if (ret > 0)
196                 six_set_owner(lock, type, old, task);
197
198         EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
199                 (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
200
201         return ret;
202 }
203
204 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
205 {
206         struct six_lock_waiter *w, *next;
207         struct task_struct *task;
208         bool saw_one;
209         int ret;
210 again:
211         ret = 0;
212         saw_one = false;
213         raw_spin_lock(&lock->wait_lock);
214
215         list_for_each_entry_safe(w, next, &lock->wait_list, list) {
216                 if (w->lock_want != lock_type)
217                         continue;
218
219                 if (saw_one && lock_type != SIX_LOCK_read)
220                         goto unlock;
221                 saw_one = true;
222
223                 ret = __do_six_trylock(lock, lock_type, w->task, false);
224                 if (ret <= 0)
225                         goto unlock;
226
227                 /*
228                  * Similar to percpu_rwsem_wake_function(), we need to guard
229                  * against the wakee noticing w->lock_acquired, returning, and
230                  * then exiting before we do the wakeup:
231                  */
232                 task = get_task_struct(w->task);
233                 __list_del(w->list.prev, w->list.next);
234                 /*
235                  * The release barrier here ensures the ordering of the
236                  * __list_del before setting w->lock_acquired; @w is on the
237                  * stack of the thread doing the waiting and will be reused
238                  * after it sees w->lock_acquired with no other locking:
239                  * pairs with smp_load_acquire() in six_lock_slowpath()
240                  */
241                 smp_store_release(&w->lock_acquired, true);
242                 wake_up_process(task);
243                 put_task_struct(task);
244         }
245
246         six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
247 unlock:
248         raw_spin_unlock(&lock->wait_lock);
249
250         if (ret < 0) {
251                 lock_type = -ret - 1;
252                 goto again;
253         }
254 }
255
256 __always_inline
257 static void six_lock_wakeup(struct six_lock *lock, u32 state,
258                             enum six_lock_type lock_type)
259 {
260         if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
261                 return;
262
263         if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
264                 return;
265
266         __six_lock_wakeup(lock, lock_type);
267 }
268
269 __always_inline
270 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
271 {
272         int ret;
273
274         ret = __do_six_trylock(lock, type, current, try);
275         if (ret < 0)
276                 __six_lock_wakeup(lock, -ret - 1);
277
278         return ret > 0;
279 }
280
281 /**
282  * six_trylock_ip - attempt to take a six lock without blocking
283  * @lock:       lock to take
284  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
285  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
286  *
287  * Return: true on success, false on failure.
288  */
289 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
290 {
291         if (!do_six_trylock(lock, type, true))
292                 return false;
293
294         if (type != SIX_LOCK_write)
295                 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
296         return true;
297 }
298 EXPORT_SYMBOL_GPL(six_trylock_ip);
299
300 /**
301  * six_relock_ip - attempt to re-take a lock that was held previously
302  * @lock:       lock to take
303  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
304  * @seq:        lock sequence number obtained from six_lock_seq() while lock was
305  *              held previously
306  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
307  *
308  * Return: true on success, false on failure.
309  */
310 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
311                    unsigned seq, unsigned long ip)
312 {
313         if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
314                 return false;
315
316         if (six_lock_seq(lock) != seq) {
317                 six_unlock_ip(lock, type, ip);
318                 return false;
319         }
320
321         return true;
322 }
323 EXPORT_SYMBOL_GPL(six_relock_ip);
324
325 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
326
327 static inline bool six_can_spin_on_owner(struct six_lock *lock)
328 {
329         struct task_struct *owner;
330         bool ret;
331
332         if (need_resched())
333                 return false;
334
335         rcu_read_lock();
336         owner = READ_ONCE(lock->owner);
337         ret = !owner || owner_on_cpu(owner);
338         rcu_read_unlock();
339
340         return ret;
341 }
342
343 static inline bool six_spin_on_owner(struct six_lock *lock,
344                                      struct task_struct *owner,
345                                      u64 end_time)
346 {
347         bool ret = true;
348         unsigned loop = 0;
349
350         rcu_read_lock();
351         while (lock->owner == owner) {
352                 /*
353                  * Ensure we emit the owner->on_cpu, dereference _after_
354                  * checking lock->owner still matches owner. If that fails,
355                  * owner might point to freed memory. If it still matches,
356                  * the rcu_read_lock() ensures the memory stays valid.
357                  */
358                 barrier();
359
360                 if (!owner_on_cpu(owner) || need_resched()) {
361                         ret = false;
362                         break;
363                 }
364
365                 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
366                         six_set_bitmask(lock, SIX_LOCK_NOSPIN);
367                         ret = false;
368                         break;
369                 }
370
371                 cpu_relax();
372         }
373         rcu_read_unlock();
374
375         return ret;
376 }
377
378 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
379 {
380         struct task_struct *task = current;
381         u64 end_time;
382
383         if (type == SIX_LOCK_write)
384                 return false;
385
386         preempt_disable();
387         if (!six_can_spin_on_owner(lock))
388                 goto fail;
389
390         if (!osq_lock(&lock->osq))
391                 goto fail;
392
393         end_time = sched_clock() + 10 * NSEC_PER_USEC;
394
395         while (1) {
396                 struct task_struct *owner;
397
398                 /*
399                  * If there's an owner, wait for it to either
400                  * release the lock or go to sleep.
401                  */
402                 owner = READ_ONCE(lock->owner);
403                 if (owner && !six_spin_on_owner(lock, owner, end_time))
404                         break;
405
406                 if (do_six_trylock(lock, type, false)) {
407                         osq_unlock(&lock->osq);
408                         preempt_enable();
409                         return true;
410                 }
411
412                 /*
413                  * When there's no owner, we might have preempted between the
414                  * owner acquiring the lock and setting the owner field. If
415                  * we're an RT task that will live-lock because we won't let
416                  * the owner complete.
417                  */
418                 if (!owner && (need_resched() || rt_task(task)))
419                         break;
420
421                 /*
422                  * The cpu_relax() call is a compiler barrier which forces
423                  * everything in this loop to be re-loaded. We don't need
424                  * memory barriers as we'll eventually observe the right
425                  * values at the cost of a few extra spins.
426                  */
427                 cpu_relax();
428         }
429
430         osq_unlock(&lock->osq);
431 fail:
432         preempt_enable();
433
434         /*
435          * If we fell out of the spin path because of need_resched(),
436          * reschedule now, before we try-lock again. This avoids getting
437          * scheduled out right after we obtained the lock.
438          */
439         if (need_resched())
440                 schedule();
441
442         return false;
443 }
444
445 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
446
447 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
448 {
449         return false;
450 }
451
452 #endif
453
454 noinline
455 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
456                              struct six_lock_waiter *wait,
457                              six_lock_should_sleep_fn should_sleep_fn, void *p,
458                              unsigned long ip)
459 {
460         int ret = 0;
461
462         if (type == SIX_LOCK_write) {
463                 EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
464                 atomic_add(SIX_LOCK_HELD_write, &lock->state);
465                 smp_mb__after_atomic();
466         }
467
468         trace_contention_begin(lock, 0);
469         lock_contended(&lock->dep_map, ip);
470
471         if (six_optimistic_spin(lock, type))
472                 goto out;
473
474         wait->task              = current;
475         wait->lock_want         = type;
476         wait->lock_acquired     = false;
477
478         raw_spin_lock(&lock->wait_lock);
479         six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
480         /*
481          * Retry taking the lock after taking waitlist lock, in case we raced
482          * with an unlock:
483          */
484         ret = __do_six_trylock(lock, type, current, false);
485         if (ret <= 0) {
486                 wait->start_time = local_clock();
487
488                 if (!list_empty(&lock->wait_list)) {
489                         struct six_lock_waiter *last =
490                                 list_last_entry(&lock->wait_list,
491                                         struct six_lock_waiter, list);
492
493                         if (time_before_eq64(wait->start_time, last->start_time))
494                                 wait->start_time = last->start_time + 1;
495                 }
496
497                 list_add_tail(&wait->list, &lock->wait_list);
498         }
499         raw_spin_unlock(&lock->wait_lock);
500
501         if (unlikely(ret > 0)) {
502                 ret = 0;
503                 goto out;
504         }
505
506         if (unlikely(ret < 0)) {
507                 __six_lock_wakeup(lock, -ret - 1);
508                 ret = 0;
509         }
510
511         while (1) {
512                 set_current_state(TASK_UNINTERRUPTIBLE);
513
514                 /*
515                  * Ensures that writes to the waitlist entry happen after we see
516                  * wait->lock_acquired: pairs with the smp_store_release in
517                  * __six_lock_wakeup
518                  */
519                 if (smp_load_acquire(&wait->lock_acquired))
520                         break;
521
522                 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
523                 if (unlikely(ret)) {
524                         bool acquired;
525
526                         /*
527                          * If should_sleep_fn() returns an error, we are
528                          * required to return that error even if we already
529                          * acquired the lock - should_sleep_fn() might have
530                          * modified external state (e.g. when the deadlock cycle
531                          * detector in bcachefs issued a transaction restart)
532                          */
533                         raw_spin_lock(&lock->wait_lock);
534                         acquired = wait->lock_acquired;
535                         if (!acquired)
536                                 list_del(&wait->list);
537                         raw_spin_unlock(&lock->wait_lock);
538
539                         if (unlikely(acquired))
540                                 do_six_unlock_type(lock, type);
541                         break;
542                 }
543
544                 schedule();
545         }
546
547         __set_current_state(TASK_RUNNING);
548 out:
549         if (ret && type == SIX_LOCK_write) {
550                 six_clear_bitmask(lock, SIX_LOCK_HELD_write);
551                 six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
552         }
553         trace_contention_end(lock, 0);
554
555         return ret;
556 }
557
558 /**
559  * six_lock_ip_waiter - take a lock, with full waitlist interface
560  * @lock:       lock to take
561  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
562  * @wait:       pointer to wait object, which will be added to lock's waitlist
563  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
564  *              to scheduling
565  * @p:          passed through to @should_sleep_fn
566  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
567  *
568  * This is the most general six_lock() variant, with parameters to support full
569  * cycle detection for deadlock avoidance.
570  *
571  * The code calling this function must implement tracking of held locks, and the
572  * @wait object should be embedded into the struct that tracks held locks -
573  * which must also be accessible in a thread-safe way.
574  *
575  * @should_sleep_fn should invoke the cycle detector; it should walk each
576  * lock's waiters, and for each waiter recursively walk their held locks.
577  *
578  * When this function must block, @wait will be added to @lock's waitlist before
579  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
580  * removed from the lock waitlist until the lock has been successfully acquired,
581  * or we abort.
582  *
583  * @wait.start_time will be monotonically increasing for any given waitlist, and
584  * thus may be used as a loop cursor.
585  *
586  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
587  */
588 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
589                        struct six_lock_waiter *wait,
590                        six_lock_should_sleep_fn should_sleep_fn, void *p,
591                        unsigned long ip)
592 {
593         int ret;
594
595         wait->start_time = 0;
596
597         if (type != SIX_LOCK_write)
598                 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
599
600         ret = do_six_trylock(lock, type, true) ? 0
601                 : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
602
603         if (ret && type != SIX_LOCK_write)
604                 six_release(&lock->dep_map, ip);
605         if (!ret)
606                 lock_acquired(&lock->dep_map, ip);
607
608         return ret;
609 }
610 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
611
612 __always_inline
613 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
614 {
615         u32 state;
616
617         if (type == SIX_LOCK_intent)
618                 lock->owner = NULL;
619
620         if (type == SIX_LOCK_read &&
621             lock->readers) {
622                 smp_mb(); /* unlock barrier */
623                 this_cpu_dec(*lock->readers);
624                 smp_mb(); /* between unlocking and checking for waiters */
625                 state = atomic_read(&lock->state);
626         } else {
627                 u32 v = l[type].lock_val;
628
629                 if (type != SIX_LOCK_read)
630                         v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
631
632                 EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
633                 state = atomic_sub_return_release(v, &lock->state);
634         }
635
636         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
637 }
638
639 /**
640  * six_unlock_ip - drop a six lock
641  * @lock:       lock to unlock
642  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
643  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
644  *
645  * When a lock is held multiple times (because six_lock_incement()) was used),
646  * this decrements the 'lock held' counter by one.
647  *
648  * For example:
649  * six_lock_read(&foo->lock);                           read count 1
650  * six_lock_increment(&foo->lock, SIX_LOCK_read);       read count 2
651  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 1
652  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 0
653  */
654 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
655 {
656         EBUG_ON(type == SIX_LOCK_write &&
657                 !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
658         EBUG_ON((type == SIX_LOCK_write ||
659                  type == SIX_LOCK_intent) &&
660                 lock->owner != current);
661
662         if (type != SIX_LOCK_write)
663                 six_release(&lock->dep_map, ip);
664         else
665                 lock->seq++;
666
667         if (type == SIX_LOCK_intent &&
668             lock->intent_lock_recurse) {
669                 --lock->intent_lock_recurse;
670                 return;
671         }
672
673         do_six_unlock_type(lock, type);
674 }
675 EXPORT_SYMBOL_GPL(six_unlock_ip);
676
677 /**
678  * six_lock_downgrade - convert an intent lock to a read lock
679  * @lock:       lock to dowgrade
680  *
681  * @lock will have read count incremented and intent count decremented
682  */
683 void six_lock_downgrade(struct six_lock *lock)
684 {
685         six_lock_increment(lock, SIX_LOCK_read);
686         six_unlock_intent(lock);
687 }
688 EXPORT_SYMBOL_GPL(six_lock_downgrade);
689
690 /**
691  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
692  * @lock:       lock to upgrade
693  *
694  * On success, @lock will have intent count incremented and read count
695  * decremented
696  *
697  * Return: true on success, false on failure
698  */
699 bool six_lock_tryupgrade(struct six_lock *lock)
700 {
701         u32 old = atomic_read(&lock->state), new;
702
703         do {
704                 new = old;
705
706                 if (new & SIX_LOCK_HELD_intent)
707                         return false;
708
709                 if (!lock->readers) {
710                         EBUG_ON(!(new & SIX_LOCK_HELD_read));
711                         new -= l[SIX_LOCK_read].lock_val;
712                 }
713
714                 new |= SIX_LOCK_HELD_intent;
715         } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
716
717         if (lock->readers)
718                 this_cpu_dec(*lock->readers);
719
720         six_set_owner(lock, SIX_LOCK_intent, old, current);
721
722         return true;
723 }
724 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
725
726 /**
727  * six_trylock_convert - attempt to convert a held lock from one type to another
728  * @lock:       lock to upgrade
729  * @from:       SIX_LOCK_read or SIX_LOCK_intent
730  * @to:         SIX_LOCK_read or SIX_LOCK_intent
731  *
732  * On success, @lock will have intent count incremented and read count
733  * decremented
734  *
735  * Return: true on success, false on failure
736  */
737 bool six_trylock_convert(struct six_lock *lock,
738                          enum six_lock_type from,
739                          enum six_lock_type to)
740 {
741         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
742
743         if (to == from)
744                 return true;
745
746         if (to == SIX_LOCK_read) {
747                 six_lock_downgrade(lock);
748                 return true;
749         } else {
750                 return six_lock_tryupgrade(lock);
751         }
752 }
753 EXPORT_SYMBOL_GPL(six_trylock_convert);
754
755 /**
756  * six_lock_increment - increase held lock count on a lock that is already held
757  * @lock:       lock to increment
758  * @type:       SIX_LOCK_read or SIX_LOCK_intent
759  *
760  * @lock must already be held, with a lock type that is greater than or equal to
761  * @type
762  *
763  * A corresponding six_unlock_type() call will be required for @lock to be fully
764  * unlocked.
765  */
766 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
767 {
768         six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
769
770         /* XXX: assert already locked, and that we don't overflow: */
771
772         switch (type) {
773         case SIX_LOCK_read:
774                 if (lock->readers) {
775                         this_cpu_inc(*lock->readers);
776                 } else {
777                         EBUG_ON(!(atomic_read(&lock->state) &
778                                   (SIX_LOCK_HELD_read|
779                                    SIX_LOCK_HELD_intent)));
780                         atomic_add(l[type].lock_val, &lock->state);
781                 }
782                 break;
783         case SIX_LOCK_intent:
784                 EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
785                 lock->intent_lock_recurse++;
786                 break;
787         case SIX_LOCK_write:
788                 BUG();
789                 break;
790         }
791 }
792 EXPORT_SYMBOL_GPL(six_lock_increment);
793
794 /**
795  * six_lock_wakeup_all - wake up all waiters on @lock
796  * @lock:       lock to wake up waiters for
797  *
798  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
799  * abort the lock operation.
800  *
801  * This function is never needed in a bug-free program; it's only useful in
802  * debug code, e.g. to determine if a cycle detector is at fault.
803  */
804 void six_lock_wakeup_all(struct six_lock *lock)
805 {
806         u32 state = atomic_read(&lock->state);
807         struct six_lock_waiter *w;
808
809         six_lock_wakeup(lock, state, SIX_LOCK_read);
810         six_lock_wakeup(lock, state, SIX_LOCK_intent);
811         six_lock_wakeup(lock, state, SIX_LOCK_write);
812
813         raw_spin_lock(&lock->wait_lock);
814         list_for_each_entry(w, &lock->wait_list, list)
815                 wake_up_process(w->task);
816         raw_spin_unlock(&lock->wait_lock);
817 }
818 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
819
820 /**
821  * six_lock_counts - return held lock counts, for each lock type
822  * @lock:       lock to return counters for
823  *
824  * Return: the number of times a lock is held for read, intent and write.
825  */
826 struct six_lock_count six_lock_counts(struct six_lock *lock)
827 {
828         struct six_lock_count ret;
829
830         ret.n[SIX_LOCK_read]    = !lock->readers
831                 ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
832                 : pcpu_read_count(lock);
833         ret.n[SIX_LOCK_intent]  = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
834                 lock->intent_lock_recurse;
835         ret.n[SIX_LOCK_write]   = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
836
837         return ret;
838 }
839 EXPORT_SYMBOL_GPL(six_lock_counts);
840
841 /**
842  * six_lock_readers_add - directly manipulate reader count of a lock
843  * @lock:       lock to add/subtract readers for
844  * @nr:         reader count to add/subtract
845  *
846  * When an upper layer is implementing lock reentrency, we may have both read
847  * and intent locks on the same lock.
848  *
849  * When we need to take a write lock, the read locks will cause self-deadlock,
850  * because six locks themselves do not track which read locks are held by the
851  * current thread and which are held by a different thread - it does no
852  * per-thread tracking of held locks.
853  *
854  * The upper layer that is tracking held locks may however, if trylock() has
855  * failed, count up its own read locks, subtract them, take the write lock, and
856  * then re-add them.
857  *
858  * As in any other situation when taking a write lock, @lock must be held for
859  * intent one (or more) times, so @lock will never be left unlocked.
860  */
861 void six_lock_readers_add(struct six_lock *lock, int nr)
862 {
863         if (lock->readers) {
864                 this_cpu_add(*lock->readers, nr);
865         } else {
866                 EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
867                 /* reader count starts at bit 0 */
868                 atomic_add(nr, &lock->state);
869         }
870 }
871 EXPORT_SYMBOL_GPL(six_lock_readers_add);
872
873 /**
874  * six_lock_exit - release resources held by a lock prior to freeing
875  * @lock:       lock to exit
876  *
877  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
878  * required to free the percpu read counts.
879  */
880 void six_lock_exit(struct six_lock *lock)
881 {
882         WARN_ON(lock->readers && pcpu_read_count(lock));
883         WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
884
885         free_percpu(lock->readers);
886         lock->readers = NULL;
887 }
888 EXPORT_SYMBOL_GPL(six_lock_exit);
889
890 void __six_lock_init(struct six_lock *lock, const char *name,
891                      struct lock_class_key *key, enum six_lock_init_flags flags)
892 {
893         atomic_set(&lock->state, 0);
894         raw_spin_lock_init(&lock->wait_lock);
895         INIT_LIST_HEAD(&lock->wait_list);
896 #ifdef CONFIG_DEBUG_LOCK_ALLOC
897         debug_check_no_locks_freed((void *) lock, sizeof(*lock));
898         lockdep_init_map(&lock->dep_map, name, key, 0);
899 #endif
900
901         /*
902          * Don't assume that we have real percpu variables available in
903          * userspace:
904          */
905 #ifdef __KERNEL__
906         if (flags & SIX_LOCK_INIT_PCPU) {
907                 /*
908                  * We don't return an error here on memory allocation failure
909                  * since percpu is an optimization, and locks will work with the
910                  * same semantics in non-percpu mode: callers can check for
911                  * failure if they wish by checking lock->readers, but generally
912                  * will not want to treat it as an error.
913                  */
914                 lock->readers = alloc_percpu(unsigned);
915         }
916 #endif
917 }
918 EXPORT_SYMBOL_GPL(__six_lock_init);