]> git.sesse.net Git - bcachefs-tools-debian/blob - linux/six.c
Update bcachefs sources to 4837f82ee1 bcachefs: Use cached iterators for alloc btree
[bcachefs-tools-debian] / linux / six.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/preempt.h>
6 #include <linux/rcupdate.h>
7 #include <linux/sched.h>
8 #include <linux/sched/rt.h>
9 #include <linux/six.h>
10
11 #ifdef DEBUG
12 #define EBUG_ON(cond)           BUG_ON(cond)
13 #else
14 #define EBUG_ON(cond)           do {} while (0)
15 #endif
16
17 #define six_acquire(l, t)       lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
18 #define six_release(l)          lock_release(l, _RET_IP_)
19
20 struct six_lock_vals {
21         /* Value we add to the lock in order to take the lock: */
22         u64                     lock_val;
23
24         /* If the lock has this value (used as a mask), taking the lock fails: */
25         u64                     lock_fail;
26
27         /* Value we add to the lock in order to release the lock: */
28         u64                     unlock_val;
29
30         /* Mask that indicates lock is held for this type: */
31         u64                     held_mask;
32
33         /* Waitlist we wakeup when releasing the lock: */
34         enum six_lock_type      unlock_wakeup;
35 };
36
37 #define __SIX_LOCK_HELD_read    __SIX_VAL(read_lock, ~0)
38 #define __SIX_LOCK_HELD_intent  __SIX_VAL(intent_lock, ~0)
39 #define __SIX_LOCK_HELD_write   __SIX_VAL(seq, 1)
40
41 #define LOCK_VALS {                                                     \
42         [SIX_LOCK_read] = {                                             \
43                 .lock_val       = __SIX_VAL(read_lock, 1),              \
44                 .lock_fail      = __SIX_LOCK_HELD_write,                \
45                 .unlock_val     = -__SIX_VAL(read_lock, 1),             \
46                 .held_mask      = __SIX_LOCK_HELD_read,                 \
47                 .unlock_wakeup  = SIX_LOCK_write,                       \
48         },                                                              \
49         [SIX_LOCK_intent] = {                                           \
50                 .lock_val       = __SIX_VAL(intent_lock, 1),            \
51                 .lock_fail      = __SIX_LOCK_HELD_intent,               \
52                 .unlock_val     = -__SIX_VAL(intent_lock, 1),           \
53                 .held_mask      = __SIX_LOCK_HELD_intent,               \
54                 .unlock_wakeup  = SIX_LOCK_intent,                      \
55         },                                                              \
56         [SIX_LOCK_write] = {                                            \
57                 .lock_val       = __SIX_VAL(seq, 1),                    \
58                 .lock_fail      = __SIX_LOCK_HELD_read,                 \
59                 .unlock_val     = __SIX_VAL(seq, 1),                    \
60                 .held_mask      = __SIX_LOCK_HELD_write,                \
61                 .unlock_wakeup  = SIX_LOCK_read,                        \
62         },                                                              \
63 }
64
65 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
66                                  union six_lock_state old)
67 {
68         if (type != SIX_LOCK_intent)
69                 return;
70
71         if (!old.intent_lock) {
72                 EBUG_ON(lock->owner);
73                 lock->owner = current;
74         } else {
75                 EBUG_ON(lock->owner != current);
76         }
77 }
78
79 static __always_inline bool do_six_trylock_type(struct six_lock *lock,
80                                                 enum six_lock_type type)
81 {
82         const struct six_lock_vals l[] = LOCK_VALS;
83         union six_lock_state old;
84         u64 v = READ_ONCE(lock->state.v);
85
86         EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
87
88         do {
89                 old.v = v;
90
91                 EBUG_ON(type == SIX_LOCK_write &&
92                         ((old.v & __SIX_LOCK_HELD_write) ||
93                          !(old.v & __SIX_LOCK_HELD_intent)));
94
95                 if (old.v & l[type].lock_fail)
96                         return false;
97         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
98                                 old.v,
99                                 old.v + l[type].lock_val)) != old.v);
100
101         six_set_owner(lock, type, old);
102         return true;
103 }
104
105 __always_inline __flatten
106 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
107 {
108         if (!do_six_trylock_type(lock, type))
109                 return false;
110
111         if (type != SIX_LOCK_write)
112                 six_acquire(&lock->dep_map, 1);
113         return true;
114 }
115
116 __always_inline __flatten
117 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
118                               unsigned seq)
119 {
120         const struct six_lock_vals l[] = LOCK_VALS;
121         union six_lock_state old;
122         u64 v = READ_ONCE(lock->state.v);
123
124         do {
125                 old.v = v;
126
127                 if (old.seq != seq || old.v & l[type].lock_fail)
128                         return false;
129         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
130                                 old.v,
131                                 old.v + l[type].lock_val)) != old.v);
132
133         six_set_owner(lock, type, old);
134         if (type != SIX_LOCK_write)
135                 six_acquire(&lock->dep_map, 1);
136         return true;
137 }
138
139 struct six_lock_waiter {
140         struct list_head        list;
141         struct task_struct      *task;
142 };
143
144 /* This is probably up there with the more evil things I've done */
145 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
146
147 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
148
149 static inline int six_can_spin_on_owner(struct six_lock *lock)
150 {
151         struct task_struct *owner;
152         int retval = 1;
153
154         if (need_resched())
155                 return 0;
156
157         rcu_read_lock();
158         owner = READ_ONCE(lock->owner);
159         if (owner)
160                 retval = owner->on_cpu;
161         rcu_read_unlock();
162         /*
163          * if lock->owner is not set, the mutex owner may have just acquired
164          * it and not set the owner yet or the mutex has been released.
165          */
166         return retval;
167 }
168
169 static inline bool six_spin_on_owner(struct six_lock *lock,
170                                      struct task_struct *owner)
171 {
172         bool ret = true;
173
174         rcu_read_lock();
175         while (lock->owner == owner) {
176                 /*
177                  * Ensure we emit the owner->on_cpu, dereference _after_
178                  * checking lock->owner still matches owner. If that fails,
179                  * owner might point to freed memory. If it still matches,
180                  * the rcu_read_lock() ensures the memory stays valid.
181                  */
182                 barrier();
183
184                 if (!owner->on_cpu || need_resched()) {
185                         ret = false;
186                         break;
187                 }
188
189                 cpu_relax();
190         }
191         rcu_read_unlock();
192
193         return ret;
194 }
195
196 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
197 {
198         struct task_struct *task = current;
199
200         if (type == SIX_LOCK_write)
201                 return false;
202
203         preempt_disable();
204         if (!six_can_spin_on_owner(lock))
205                 goto fail;
206
207         if (!osq_lock(&lock->osq))
208                 goto fail;
209
210         while (1) {
211                 struct task_struct *owner;
212
213                 /*
214                  * If there's an owner, wait for it to either
215                  * release the lock or go to sleep.
216                  */
217                 owner = READ_ONCE(lock->owner);
218                 if (owner && !six_spin_on_owner(lock, owner))
219                         break;
220
221                 if (do_six_trylock_type(lock, type)) {
222                         osq_unlock(&lock->osq);
223                         preempt_enable();
224                         return true;
225                 }
226
227                 /*
228                  * When there's no owner, we might have preempted between the
229                  * owner acquiring the lock and setting the owner field. If
230                  * we're an RT task that will live-lock because we won't let
231                  * the owner complete.
232                  */
233                 if (!owner && (need_resched() || rt_task(task)))
234                         break;
235
236                 /*
237                  * The cpu_relax() call is a compiler barrier which forces
238                  * everything in this loop to be re-loaded. We don't need
239                  * memory barriers as we'll eventually observe the right
240                  * values at the cost of a few extra spins.
241                  */
242                 cpu_relax();
243         }
244
245         osq_unlock(&lock->osq);
246 fail:
247         preempt_enable();
248
249         /*
250          * If we fell out of the spin path because of need_resched(),
251          * reschedule now, before we try-lock again. This avoids getting
252          * scheduled out right after we obtained the lock.
253          */
254         if (need_resched())
255                 schedule();
256
257         return false;
258 }
259
260 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
261
262 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
263 {
264         return false;
265 }
266
267 #endif
268
269 noinline
270 static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
271                                     six_lock_should_sleep_fn should_sleep_fn, void *p)
272 {
273         const struct six_lock_vals l[] = LOCK_VALS;
274         union six_lock_state old, new;
275         struct six_lock_waiter wait;
276         int ret = 0;
277         u64 v;
278
279         ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
280         if (ret)
281                 return ret;
282
283         if (six_optimistic_spin(lock, type))
284                 return 0;
285
286         lock_contended(&lock->dep_map, _RET_IP_);
287
288         INIT_LIST_HEAD(&wait.list);
289         wait.task = current;
290
291         while (1) {
292                 set_current_state(TASK_UNINTERRUPTIBLE);
293                 if (type == SIX_LOCK_write)
294                         EBUG_ON(lock->owner != current);
295                 else if (list_empty_careful(&wait.list)) {
296                         raw_spin_lock(&lock->wait_lock);
297                         list_add_tail(&wait.list, &lock->wait_list[type]);
298                         raw_spin_unlock(&lock->wait_lock);
299                 }
300
301                 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
302                 if (ret)
303                         break;
304
305                 v = READ_ONCE(lock->state.v);
306                 do {
307                         new.v = old.v = v;
308
309                         if (!(old.v & l[type].lock_fail))
310                                 new.v += l[type].lock_val;
311                         else if (!(new.waiters & (1 << type)))
312                                 new.waiters |= 1 << type;
313                         else
314                                 break; /* waiting bit already set */
315                 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
316                                         old.v, new.v)) != old.v);
317
318                 if (!(old.v & l[type].lock_fail))
319                         break;
320
321                 schedule();
322         }
323
324         if (!ret)
325                 six_set_owner(lock, type, old);
326
327         __set_current_state(TASK_RUNNING);
328
329         if (!list_empty_careful(&wait.list)) {
330                 raw_spin_lock(&lock->wait_lock);
331                 list_del_init(&wait.list);
332                 raw_spin_unlock(&lock->wait_lock);
333         }
334
335         return ret;
336 }
337
338 __always_inline
339 static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
340                            six_lock_should_sleep_fn should_sleep_fn, void *p)
341 {
342         int ret;
343
344         if (type != SIX_LOCK_write)
345                 six_acquire(&lock->dep_map, 0);
346
347         ret = do_six_trylock_type(lock, type) ? 0
348                 : __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
349
350         if (ret && type != SIX_LOCK_write)
351                 six_release(&lock->dep_map);
352         if (!ret)
353                 lock_acquired(&lock->dep_map, _RET_IP_);
354
355         return ret;
356 }
357
358 static inline void six_lock_wakeup(struct six_lock *lock,
359                                    union six_lock_state state,
360                                    unsigned waitlist_id)
361 {
362         struct list_head *wait_list = &lock->wait_list[waitlist_id];
363         struct six_lock_waiter *w, *next;
364
365         if (waitlist_id == SIX_LOCK_write && state.read_lock)
366                 return;
367
368         if (!(state.waiters & (1 << waitlist_id)))
369                 return;
370
371         clear_bit(waitlist_bitnr(waitlist_id),
372                   (unsigned long *) &lock->state.v);
373
374         if (waitlist_id == SIX_LOCK_write) {
375                 struct task_struct *p = READ_ONCE(lock->owner);
376
377                 if (p)
378                         wake_up_process(p);
379                 return;
380         }
381
382         raw_spin_lock(&lock->wait_lock);
383
384         list_for_each_entry_safe(w, next, wait_list, list) {
385                 list_del_init(&w->list);
386
387                 if (wake_up_process(w->task) &&
388                     waitlist_id != SIX_LOCK_read) {
389                         if (!list_empty(wait_list))
390                                 set_bit(waitlist_bitnr(waitlist_id),
391                                         (unsigned long *) &lock->state.v);
392                         break;
393                 }
394         }
395
396         raw_spin_unlock(&lock->wait_lock);
397 }
398
399 __always_inline __flatten
400 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
401 {
402         const struct six_lock_vals l[] = LOCK_VALS;
403         union six_lock_state state;
404
405         EBUG_ON(!(lock->state.v & l[type].held_mask));
406         EBUG_ON(type == SIX_LOCK_write &&
407                 !(lock->state.v & __SIX_LOCK_HELD_intent));
408
409         if (type != SIX_LOCK_write)
410                 six_release(&lock->dep_map);
411
412         if (type == SIX_LOCK_intent) {
413                 EBUG_ON(lock->owner != current);
414
415                 if (lock->intent_lock_recurse) {
416                         --lock->intent_lock_recurse;
417                         return;
418                 }
419
420                 lock->owner = NULL;
421         }
422
423         state.v = atomic64_add_return_release(l[type].unlock_val,
424                                               &lock->state.counter);
425         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
426 }
427
428 #define __SIX_LOCK(type)                                                \
429 bool six_trylock_##type(struct six_lock *lock)                          \
430 {                                                                       \
431         return __six_trylock_type(lock, SIX_LOCK_##type);               \
432 }                                                                       \
433 EXPORT_SYMBOL_GPL(six_trylock_##type);                                  \
434                                                                         \
435 bool six_relock_##type(struct six_lock *lock, u32 seq)                  \
436 {                                                                       \
437         return __six_relock_type(lock, SIX_LOCK_##type, seq);           \
438 }                                                                       \
439 EXPORT_SYMBOL_GPL(six_relock_##type);                                   \
440                                                                         \
441 int six_lock_##type(struct six_lock *lock,                              \
442                     six_lock_should_sleep_fn should_sleep_fn, void *p)  \
443 {                                                                       \
444         return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
445 }                                                                       \
446 EXPORT_SYMBOL_GPL(six_lock_##type);                                     \
447                                                                         \
448 void six_unlock_##type(struct six_lock *lock)                           \
449 {                                                                       \
450         __six_unlock_type(lock, SIX_LOCK_##type);                       \
451 }                                                                       \
452 EXPORT_SYMBOL_GPL(six_unlock_##type);
453
454 __SIX_LOCK(read)
455 __SIX_LOCK(intent)
456 __SIX_LOCK(write)
457
458 #undef __SIX_LOCK
459
460 /* Convert from intent to read: */
461 void six_lock_downgrade(struct six_lock *lock)
462 {
463         six_lock_increment(lock, SIX_LOCK_read);
464         six_unlock_intent(lock);
465 }
466 EXPORT_SYMBOL_GPL(six_lock_downgrade);
467
468 bool six_lock_tryupgrade(struct six_lock *lock)
469 {
470         const struct six_lock_vals l[] = LOCK_VALS;
471         union six_lock_state old, new;
472         u64 v = READ_ONCE(lock->state.v);
473
474         do {
475                 new.v = old.v = v;
476
477                 EBUG_ON(!(old.v & l[SIX_LOCK_read].held_mask));
478
479                 new.v += l[SIX_LOCK_read].unlock_val;
480
481                 if (new.v & l[SIX_LOCK_intent].lock_fail)
482                         return false;
483
484                 new.v += l[SIX_LOCK_intent].lock_val;
485         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
486                                 old.v, new.v)) != old.v);
487
488         six_set_owner(lock, SIX_LOCK_intent, old);
489         six_lock_wakeup(lock, new, l[SIX_LOCK_read].unlock_wakeup);
490
491         return true;
492 }
493 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
494
495 bool six_trylock_convert(struct six_lock *lock,
496                          enum six_lock_type from,
497                          enum six_lock_type to)
498 {
499         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
500
501         if (to == from)
502                 return true;
503
504         if (to == SIX_LOCK_read) {
505                 six_lock_downgrade(lock);
506                 return true;
507         } else {
508                 return six_lock_tryupgrade(lock);
509         }
510 }
511 EXPORT_SYMBOL_GPL(six_trylock_convert);
512
513 /*
514  * Increment read/intent lock count, assuming we already have it read or intent
515  * locked:
516  */
517 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
518 {
519         const struct six_lock_vals l[] = LOCK_VALS;
520
521         EBUG_ON(type == SIX_LOCK_write);
522         six_acquire(&lock->dep_map, 0);
523
524         /* XXX: assert already locked, and that we don't overflow: */
525
526         switch (type) {
527         case SIX_LOCK_read:
528                 atomic64_add(l[type].lock_val, &lock->state.counter);
529                 break;
530         case SIX_LOCK_intent:
531                 lock->intent_lock_recurse++;
532                 break;
533         case SIX_LOCK_write:
534                 BUG();
535                 break;
536         }
537 }
538 EXPORT_SYMBOL_GPL(six_lock_increment);
539
540 void six_lock_wakeup_all(struct six_lock *lock)
541 {
542         struct six_lock_waiter *w;
543
544         raw_spin_lock(&lock->wait_lock);
545
546         list_for_each_entry(w, &lock->wait_list[0], list)
547                 wake_up_process(w->task);
548         list_for_each_entry(w, &lock->wait_list[1], list)
549                 wake_up_process(w->task);
550
551         raw_spin_unlock(&lock->wait_lock);
552 }
553 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);