]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/six.c
Update bcachefs sources to 0e765bc37c bcachefs: foreground merging of interior btree...
[bcachefs-tools-debian] / libbcachefs / six.c
1
2 #include <linux/log2.h>
3 #include <linux/preempt.h>
4 #include <linux/rcupdate.h>
5 #include <linux/sched.h>
6 #include <linux/sched/rt.h>
7
8 #include "six.h"
9
10 #define six_acquire(l, t)       lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
11 #define six_release(l)          lock_release(l, 0, _RET_IP_)
12
13 struct six_lock_vals {
14         /* Value we add to the lock in order to take the lock: */
15         u64                     lock_val;
16
17         /* If the lock has this value (used as a mask), taking the lock fails: */
18         u64                     lock_fail;
19
20         /* Value we add to the lock in order to release the lock: */
21         u64                     unlock_val;
22
23         /* Mask that indicates lock is held for this type: */
24         u64                     held_mask;
25
26         /* Waitlist we wakeup when releasing the lock: */
27         enum six_lock_type      unlock_wakeup;
28 };
29
30 #define __SIX_LOCK_HELD_read    __SIX_VAL(read_lock, ~0)
31 #define __SIX_LOCK_HELD_intent  __SIX_VAL(intent_lock, ~0)
32 #define __SIX_LOCK_HELD_write   __SIX_VAL(seq, 1)
33
34 #define LOCK_VALS {                                                     \
35         [SIX_LOCK_read] = {                                             \
36                 .lock_val       = __SIX_VAL(read_lock, 1),              \
37                 .lock_fail      = __SIX_LOCK_HELD_write,                \
38                 .unlock_val     = -__SIX_VAL(read_lock, 1),             \
39                 .held_mask      = __SIX_LOCK_HELD_read,                 \
40                 .unlock_wakeup  = SIX_LOCK_write,                       \
41         },                                                              \
42         [SIX_LOCK_intent] = {                                           \
43                 .lock_val       = __SIX_VAL(intent_lock, 1),            \
44                 .lock_fail      = __SIX_LOCK_HELD_intent,               \
45                 .unlock_val     = -__SIX_VAL(intent_lock, 1),           \
46                 .held_mask      = __SIX_LOCK_HELD_intent,               \
47                 .unlock_wakeup  = SIX_LOCK_intent,                      \
48         },                                                              \
49         [SIX_LOCK_write] = {                                            \
50                 .lock_val       = __SIX_VAL(seq, 1),                    \
51                 .lock_fail      = __SIX_LOCK_HELD_read,                 \
52                 .unlock_val     = __SIX_VAL(seq, 1),                    \
53                 .held_mask      = __SIX_LOCK_HELD_write,                \
54                 .unlock_wakeup  = SIX_LOCK_read,                        \
55         },                                                              \
56 }
57
58 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
59                                  union six_lock_state old)
60 {
61         if (type != SIX_LOCK_intent)
62                 return;
63
64         if (!old.intent_lock) {
65                 EBUG_ON(lock->owner);
66                 lock->owner = current;
67         } else {
68                 EBUG_ON(lock->owner != current);
69         }
70 }
71
72 static inline void six_clear_owner(struct six_lock *lock, enum six_lock_type type)
73 {
74         if (type != SIX_LOCK_intent)
75                 return;
76
77         EBUG_ON(lock->owner != current);
78
79         if (lock->state.intent_lock == 1)
80                 lock->owner = NULL;
81 }
82
83 static __always_inline bool do_six_trylock_type(struct six_lock *lock,
84                                                 enum six_lock_type type)
85 {
86         const struct six_lock_vals l[] = LOCK_VALS;
87         union six_lock_state old;
88         u64 v = READ_ONCE(lock->state.v);
89
90         EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
91
92         do {
93                 old.v = v;
94
95                 EBUG_ON(type == SIX_LOCK_write &&
96                         ((old.v & __SIX_LOCK_HELD_write) ||
97                          !(old.v & __SIX_LOCK_HELD_intent)));
98
99                 if (old.v & l[type].lock_fail)
100                         return false;
101         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
102                                 old.v,
103                                 old.v + l[type].lock_val)) != old.v);
104
105         six_set_owner(lock, type, old);
106         return true;
107 }
108
109 __always_inline __flatten
110 static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
111 {
112         if (!do_six_trylock_type(lock, type))
113                 return false;
114
115         six_acquire(&lock->dep_map, 1);
116         return true;
117 }
118
119 __always_inline __flatten
120 static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
121                               unsigned seq)
122 {
123         const struct six_lock_vals l[] = LOCK_VALS;
124         union six_lock_state old;
125         u64 v = READ_ONCE(lock->state.v);
126
127         do {
128                 old.v = v;
129
130                 if (old.seq != seq || old.v & l[type].lock_fail)
131                         return false;
132         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
133                                 old.v,
134                                 old.v + l[type].lock_val)) != old.v);
135
136         six_set_owner(lock, type, old);
137         six_acquire(&lock->dep_map, 1);
138         return true;
139 }
140
141 struct six_lock_waiter {
142         struct list_head        list;
143         struct task_struct      *task;
144 };
145
146 /* This is probably up there with the more evil things I've done */
147 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
148
149 static inline int six_can_spin_on_owner(struct six_lock *lock)
150 {
151         struct task_struct *owner;
152         int retval = 1;
153
154         if (need_resched())
155                 return 0;
156
157         rcu_read_lock();
158         owner = READ_ONCE(lock->owner);
159         if (owner)
160                 retval = owner->on_cpu;
161         rcu_read_unlock();
162         /*
163          * if lock->owner is not set, the mutex owner may have just acquired
164          * it and not set the owner yet or the mutex has been released.
165          */
166         return retval;
167 }
168
169 static inline bool six_spin_on_owner(struct six_lock *lock,
170                                      struct task_struct *owner)
171 {
172         bool ret = true;
173
174         rcu_read_lock();
175         while (lock->owner == owner) {
176                 /*
177                  * Ensure we emit the owner->on_cpu, dereference _after_
178                  * checking lock->owner still matches owner. If that fails,
179                  * owner might point to freed memory. If it still matches,
180                  * the rcu_read_lock() ensures the memory stays valid.
181                  */
182                 barrier();
183
184                 if (!owner->on_cpu || need_resched()) {
185                         ret = false;
186                         break;
187                 }
188
189                 cpu_relax();
190         }
191         rcu_read_unlock();
192
193         return ret;
194 }
195
196 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
197 {
198         struct task_struct *task = current;
199
200         if (type == SIX_LOCK_write)
201                 return false;
202
203         preempt_disable();
204         if (!six_can_spin_on_owner(lock))
205                 goto fail;
206
207         if (!osq_lock(&lock->osq))
208                 goto fail;
209
210         while (1) {
211                 struct task_struct *owner;
212
213                 /*
214                  * If there's an owner, wait for it to either
215                  * release the lock or go to sleep.
216                  */
217                 owner = READ_ONCE(lock->owner);
218                 if (owner && !six_spin_on_owner(lock, owner))
219                         break;
220
221                 if (do_six_trylock_type(lock, type)) {
222                         osq_unlock(&lock->osq);
223                         preempt_enable();
224                         return true;
225                 }
226
227                 /*
228                  * When there's no owner, we might have preempted between the
229                  * owner acquiring the lock and setting the owner field. If
230                  * we're an RT task that will live-lock because we won't let
231                  * the owner complete.
232                  */
233                 if (!owner && (need_resched() || rt_task(task)))
234                         break;
235
236                 /*
237                  * The cpu_relax() call is a compiler barrier which forces
238                  * everything in this loop to be re-loaded. We don't need
239                  * memory barriers as we'll eventually observe the right
240                  * values at the cost of a few extra spins.
241                  */
242                 cpu_relax();
243         }
244
245         osq_unlock(&lock->osq);
246 fail:
247         preempt_enable();
248
249         /*
250          * If we fell out of the spin path because of need_resched(),
251          * reschedule now, before we try-lock again. This avoids getting
252          * scheduled out right after we obtained the lock.
253          */
254         if (need_resched())
255                 schedule();
256
257         return false;
258 }
259
260 noinline
261 static void __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type)
262 {
263         const struct six_lock_vals l[] = LOCK_VALS;
264         union six_lock_state old, new;
265         struct six_lock_waiter wait;
266         u64 v;
267
268         if (six_optimistic_spin(lock, type))
269                 return;
270
271         lock_contended(&lock->dep_map, _RET_IP_);
272
273         INIT_LIST_HEAD(&wait.list);
274         wait.task = current;
275
276         while (1) {
277                 set_current_state(TASK_UNINTERRUPTIBLE);
278                 if (type == SIX_LOCK_write)
279                         EBUG_ON(lock->owner != current);
280                 else if (list_empty_careful(&wait.list)) {
281                         raw_spin_lock(&lock->wait_lock);
282                         list_add_tail(&wait.list, &lock->wait_list[type]);
283                         raw_spin_unlock(&lock->wait_lock);
284                 }
285
286                 v = READ_ONCE(lock->state.v);
287                 do {
288                         new.v = old.v = v;
289
290                         if (!(old.v & l[type].lock_fail))
291                                 new.v += l[type].lock_val;
292                         else if (!(new.waiters & (1 << type)))
293                                 new.waiters |= 1 << type;
294                         else
295                                 break; /* waiting bit already set */
296                 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
297                                         old.v, new.v)) != old.v);
298
299                 if (!(old.v & l[type].lock_fail))
300                         break;
301
302                 schedule();
303         }
304
305         six_set_owner(lock, type, old);
306
307         __set_current_state(TASK_RUNNING);
308
309         if (!list_empty_careful(&wait.list)) {
310                 raw_spin_lock(&lock->wait_lock);
311                 list_del_init(&wait.list);
312                 raw_spin_unlock(&lock->wait_lock);
313         }
314 }
315
316 __always_inline
317 static void __six_lock_type(struct six_lock *lock, enum six_lock_type type)
318 {
319         six_acquire(&lock->dep_map, 0);
320
321         if (!do_six_trylock_type(lock, type))
322                 __six_lock_type_slowpath(lock, type);
323
324         lock_acquired(&lock->dep_map, _RET_IP_);
325 }
326
327 static inline void six_lock_wakeup(struct six_lock *lock,
328                                    union six_lock_state state,
329                                    unsigned waitlist_id)
330 {
331         struct list_head *wait_list = &lock->wait_list[waitlist_id];
332         struct six_lock_waiter *w, *next;
333
334         if (waitlist_id == SIX_LOCK_write && state.read_lock)
335                 return;
336
337         if (!(state.waiters & (1 << waitlist_id)))
338                 return;
339
340         clear_bit(waitlist_bitnr(waitlist_id),
341                   (unsigned long *) &lock->state.v);
342
343         if (waitlist_id == SIX_LOCK_write) {
344                 struct task_struct *p = READ_ONCE(lock->owner);
345
346                 if (p)
347                         wake_up_process(p);
348                 return;
349         }
350
351         raw_spin_lock(&lock->wait_lock);
352
353         list_for_each_entry_safe(w, next, wait_list, list) {
354                 list_del_init(&w->list);
355
356                 if (wake_up_process(w->task) &&
357                     waitlist_id != SIX_LOCK_read) {
358                         if (!list_empty(wait_list))
359                                 set_bit(waitlist_bitnr(waitlist_id),
360                                         (unsigned long *) &lock->state.v);
361                         break;
362                 }
363         }
364
365         raw_spin_unlock(&lock->wait_lock);
366 }
367
368 __always_inline __flatten
369 static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
370 {
371         const struct six_lock_vals l[] = LOCK_VALS;
372         union six_lock_state state;
373
374         EBUG_ON(!(lock->state.v & l[type].held_mask));
375         EBUG_ON(type == SIX_LOCK_write &&
376                 !(lock->state.v & __SIX_LOCK_HELD_intent));
377
378         six_clear_owner(lock, type);
379
380         state.v = atomic64_add_return_release(l[type].unlock_val,
381                                               &lock->state.counter);
382         six_release(&lock->dep_map);
383         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
384 }
385
386 #ifdef SIX_LOCK_SEPARATE_LOCKFNS
387
388 #define __SIX_LOCK(type)                                                \
389 bool six_trylock_##type(struct six_lock *lock)                          \
390 {                                                                       \
391         return __six_trylock_type(lock, SIX_LOCK_##type);               \
392 }                                                                       \
393                                                                         \
394 bool six_relock_##type(struct six_lock *lock, u32 seq)                  \
395 {                                                                       \
396         return __six_relock_type(lock, SIX_LOCK_##type, seq);           \
397 }                                                                       \
398                                                                         \
399 void six_lock_##type(struct six_lock *lock)                             \
400 {                                                                       \
401         __six_lock_type(lock, SIX_LOCK_##type);                         \
402 }                                                                       \
403                                                                         \
404 void six_unlock_##type(struct six_lock *lock)                           \
405 {                                                                       \
406         __six_unlock_type(lock, SIX_LOCK_##type);                       \
407 }
408
409 __SIX_LOCK(read)
410 __SIX_LOCK(intent)
411 __SIX_LOCK(write)
412
413 #undef __SIX_LOCK
414
415 #else
416
417 bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
418 {
419         return __six_trylock_type(lock, type);
420 }
421
422 bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
423                      unsigned seq)
424 {
425         return __six_relock_type(lock, type, seq);
426
427 }
428
429 void six_lock_type(struct six_lock *lock, enum six_lock_type type)
430 {
431         __six_lock_type(lock, type);
432 }
433
434 void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
435 {
436         __six_unlock_type(lock, type);
437 }
438
439 #endif
440
441 /* Convert from intent to read: */
442 void six_lock_downgrade(struct six_lock *lock)
443 {
444         six_lock_increment(lock, SIX_LOCK_read);
445         six_unlock_intent(lock);
446 }
447
448 bool six_lock_tryupgrade(struct six_lock *lock)
449 {
450         const struct six_lock_vals l[] = LOCK_VALS;
451         union six_lock_state old, new;
452         u64 v = READ_ONCE(lock->state.v);
453
454         do {
455                 new.v = old.v = v;
456
457                 EBUG_ON(!(old.v & l[SIX_LOCK_read].held_mask));
458
459                 new.v += l[SIX_LOCK_read].unlock_val;
460
461                 if (new.v & l[SIX_LOCK_intent].lock_fail)
462                         return false;
463
464                 new.v += l[SIX_LOCK_intent].lock_val;
465         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
466                                 old.v, new.v)) != old.v);
467
468         six_set_owner(lock, SIX_LOCK_intent, old);
469         six_lock_wakeup(lock, new, l[SIX_LOCK_read].unlock_wakeup);
470
471         return true;
472 }
473
474 bool six_trylock_convert(struct six_lock *lock,
475                          enum six_lock_type from,
476                          enum six_lock_type to)
477 {
478         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
479
480         if (to == from)
481                 return true;
482
483         if (to == SIX_LOCK_read) {
484                 six_lock_downgrade(lock);
485                 return true;
486         } else {
487                 return six_lock_tryupgrade(lock);
488         }
489 }
490
491 /*
492  * Increment read/intent lock count, assuming we already have it read or intent
493  * locked:
494  */
495 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
496 {
497         const struct six_lock_vals l[] = LOCK_VALS;
498
499         EBUG_ON(type == SIX_LOCK_write);
500         six_acquire(&lock->dep_map, 0);
501
502         /* XXX: assert already locked, and that we don't overflow: */
503
504         atomic64_add(l[type].lock_val, &lock->state.counter);
505 }