]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/six.c
bcachefs-in-userspace improvements
[bcachefs-tools-debian] / libbcachefs / six.c
1
2 #include <linux/log2.h>
3 #include <linux/preempt.h>
4 #include <linux/rcupdate.h>
5 #include <linux/sched.h>
6 #include <linux/sched/rt.h>
7
8 #include "six.h"
9
10 #define six_acquire(l, t)       lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
11 #define six_release(l)          lock_release(l, 0, _RET_IP_)
12
13 #define __SIX_LOCK_HELD_read            __SIX_VAL(read_lock, ~0)
14 #define __SIX_LOCK_HELD_intent          __SIX_VAL(intent_lock, ~0)
15 #define __SIX_LOCK_HELD_write           __SIX_VAL(seq, 1)
16
17 struct six_lock_vals {
18         /* Value we add to the lock in order to take the lock: */
19         u64                     lock_val;
20
21         /* If the lock has this value (used as a mask), taking the lock fails: */
22         u64                     lock_fail;
23
24         /* Value we add to the lock in order to release the lock: */
25         u64                     unlock_val;
26
27         /* Mask that indicates lock is held for this type: */
28         u64                     held_mask;
29
30         /* Waitlist we wakeup when releasing the lock: */
31         enum six_lock_type      unlock_wakeup;
32 };
33
34 #define LOCK_VALS {                                                     \
35         [SIX_LOCK_read] = {                                             \
36                 .lock_val       = __SIX_VAL(read_lock, 1),              \
37                 .lock_fail      = __SIX_LOCK_HELD_write,                \
38                 .unlock_val     = -__SIX_VAL(read_lock, 1),             \
39                 .held_mask      = __SIX_LOCK_HELD_read,                 \
40                 .unlock_wakeup  = SIX_LOCK_write,                       \
41         },                                                              \
42         [SIX_LOCK_intent] = {                                           \
43                 .lock_val       = __SIX_VAL(intent_lock, 1),            \
44                 .lock_fail      = __SIX_LOCK_HELD_intent,               \
45                 .unlock_val     = -__SIX_VAL(intent_lock, 1),           \
46                 .held_mask      = __SIX_LOCK_HELD_intent,               \
47                 .unlock_wakeup  = SIX_LOCK_intent,                      \
48         },                                                              \
49         [SIX_LOCK_write] = {                                            \
50                 .lock_val       = __SIX_VAL(seq, 1),                    \
51                 .lock_fail      = __SIX_LOCK_HELD_read,                 \
52                 .unlock_val     = __SIX_VAL(seq, 1),                    \
53                 .held_mask      = __SIX_LOCK_HELD_write,                \
54                 .unlock_wakeup  = SIX_LOCK_read,                        \
55         },                                                              \
56 }
57
58 static void six_set_owner(struct six_lock *lock, enum six_lock_type type)
59 {
60         if (type == SIX_LOCK_intent)
61                 lock->owner = current;
62 }
63
64 static void six_clear_owner(struct six_lock *lock, enum six_lock_type type)
65 {
66         if (type == SIX_LOCK_intent)
67                 lock->owner = NULL;
68 }
69
70 static inline bool __six_trylock_type(struct six_lock *lock,
71                                       enum six_lock_type type)
72 {
73         const struct six_lock_vals l[] = LOCK_VALS;
74         union six_lock_state old;
75         u64 v = READ_ONCE(lock->state.v);
76
77         do {
78                 old.v = v;
79
80                 EBUG_ON(type == SIX_LOCK_write &&
81                         ((old.v & __SIX_LOCK_HELD_write) ||
82                          !(old.v & __SIX_LOCK_HELD_intent)));
83
84                 if (old.v & l[type].lock_fail)
85                         return false;
86         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
87                                 old.v,
88                                 old.v + l[type].lock_val)) != old.v);
89         return true;
90 }
91
92 bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
93 {
94         bool ret = __six_trylock_type(lock, type);
95
96         if (ret) {
97                 six_acquire(&lock->dep_map, 1);
98                 six_set_owner(lock, type);
99         }
100
101         return ret;
102 }
103
104 bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
105                      unsigned seq)
106 {
107         const struct six_lock_vals l[] = LOCK_VALS;
108         union six_lock_state old;
109         u64 v = READ_ONCE(lock->state.v);
110
111         do {
112                 old.v = v;
113
114                 if (old.seq != seq || old.v & l[type].lock_fail)
115                         return false;
116         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
117                                 old.v,
118                                 old.v + l[type].lock_val)) != old.v);
119
120         six_acquire(&lock->dep_map, 1);
121         six_set_owner(lock, type);
122         return true;
123 }
124
125 struct six_lock_waiter {
126         struct list_head        list;
127         struct task_struct      *task;
128 };
129
130 /* This is probably up there with the more evil things I've done */
131 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
132
133 static inline int six_can_spin_on_owner(struct six_lock *lock)
134 {
135         struct task_struct *owner;
136         int retval = 1;
137
138         if (need_resched())
139                 return 0;
140
141         rcu_read_lock();
142         owner = READ_ONCE(lock->owner);
143         if (owner)
144                 retval = owner->on_cpu;
145         rcu_read_unlock();
146         /*
147          * if lock->owner is not set, the mutex owner may have just acquired
148          * it and not set the owner yet or the mutex has been released.
149          */
150         return retval;
151 }
152
153 static bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner)
154 {
155         bool ret = true;
156
157         rcu_read_lock();
158         while (lock->owner == owner) {
159                 /*
160                  * Ensure we emit the owner->on_cpu, dereference _after_
161                  * checking lock->owner still matches owner. If that fails,
162                  * owner might point to freed memory. If it still matches,
163                  * the rcu_read_lock() ensures the memory stays valid.
164                  */
165                 barrier();
166
167                 if (!owner->on_cpu || need_resched()) {
168                         ret = false;
169                         break;
170                 }
171
172                 cpu_relax_lowlatency();
173         }
174         rcu_read_unlock();
175
176         return ret;
177 }
178
179 static bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
180 {
181         struct task_struct *task = current;
182
183         if (type == SIX_LOCK_write)
184                 return false;
185
186         preempt_disable();
187         if (!six_can_spin_on_owner(lock))
188                 goto fail;
189
190         if (!osq_lock(&lock->osq))
191                 goto fail;
192
193         while (1) {
194                 struct task_struct *owner;
195
196                 /*
197                  * If there's an owner, wait for it to either
198                  * release the lock or go to sleep.
199                  */
200                 owner = READ_ONCE(lock->owner);
201                 if (owner && !six_spin_on_owner(lock, owner))
202                         break;
203
204                 if (__six_trylock_type(lock, type)) {
205                         osq_unlock(&lock->osq);
206                         preempt_enable();
207                         return true;
208                 }
209
210                 /*
211                  * When there's no owner, we might have preempted between the
212                  * owner acquiring the lock and setting the owner field. If
213                  * we're an RT task that will live-lock because we won't let
214                  * the owner complete.
215                  */
216                 if (!owner && (need_resched() || rt_task(task)))
217                         break;
218
219                 /*
220                  * The cpu_relax() call is a compiler barrier which forces
221                  * everything in this loop to be re-loaded. We don't need
222                  * memory barriers as we'll eventually observe the right
223                  * values at the cost of a few extra spins.
224                  */
225                 cpu_relax_lowlatency();
226         }
227
228         osq_unlock(&lock->osq);
229 fail:
230         preempt_enable();
231
232         /*
233          * If we fell out of the spin path because of need_resched(),
234          * reschedule now, before we try-lock again. This avoids getting
235          * scheduled out right after we obtained the lock.
236          */
237         if (need_resched())
238                 schedule();
239
240         return false;
241 }
242
243 void six_lock_type(struct six_lock *lock, enum six_lock_type type)
244 {
245         const struct six_lock_vals l[] = LOCK_VALS;
246         union six_lock_state old, new;
247         struct six_lock_waiter wait;
248         u64 v;
249
250         six_acquire(&lock->dep_map, 0);
251
252         if (__six_trylock_type(lock, type))
253                 goto done;
254
255         if (six_optimistic_spin(lock, type))
256                 goto done;
257
258         lock_contended(&lock->dep_map, _RET_IP_);
259
260         INIT_LIST_HEAD(&wait.list);
261         wait.task = current;
262
263         while (1) {
264                 set_current_state(TASK_UNINTERRUPTIBLE);
265                 if (list_empty_careful(&wait.list)) {
266                         raw_spin_lock(&lock->wait_lock);
267                         list_add_tail(&wait.list, &lock->wait_list[type]);
268                         raw_spin_unlock(&lock->wait_lock);
269                 }
270
271                 v = READ_ONCE(lock->state.v);
272                 do {
273                         new.v = old.v = v;
274
275                         if (!(old.v & l[type].lock_fail))
276                                 new.v += l[type].lock_val;
277                         else if (!(new.waiters & (1 << type)))
278                                 new.waiters |= 1 << type;
279                         else
280                                 break; /* waiting bit already set */
281                 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
282                                         old.v, new.v)) != old.v);
283
284                 if (!(old.v & l[type].lock_fail))
285                         break;
286
287                 schedule();
288         }
289
290         __set_current_state(TASK_RUNNING);
291
292         if (!list_empty_careful(&wait.list)) {
293                 raw_spin_lock(&lock->wait_lock);
294                 list_del_init(&wait.list);
295                 raw_spin_unlock(&lock->wait_lock);
296         }
297 done:
298         lock_acquired(&lock->dep_map, _RET_IP_);
299         six_set_owner(lock, type);
300 }
301
302 static inline void six_lock_wakeup(struct six_lock *lock,
303                                    union six_lock_state state,
304                                    unsigned waitlist_id)
305 {
306         struct list_head *wait_list = &lock->wait_list[waitlist_id];
307         struct six_lock_waiter *w, *next;
308
309         if (waitlist_id == SIX_LOCK_write && state.read_lock)
310                 return;
311
312         if (!(state.waiters & (1 << waitlist_id)))
313                 return;
314
315         clear_bit(waitlist_bitnr(waitlist_id),
316                   (unsigned long *) &lock->state.v);
317
318         raw_spin_lock(&lock->wait_lock);
319
320         list_for_each_entry_safe(w, next, wait_list, list) {
321                 list_del_init(&w->list);
322
323                 if (wake_up_process(w->task) &&
324                     waitlist_id != SIX_LOCK_read) {
325                         if (!list_empty(wait_list))
326                                 set_bit(waitlist_bitnr(waitlist_id),
327                                         (unsigned long *) &lock->state.v);
328                         break;
329                 }
330         }
331
332         raw_spin_unlock(&lock->wait_lock);
333 }
334
335 void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
336 {
337         const struct six_lock_vals l[] = LOCK_VALS;
338         union six_lock_state state;
339
340         six_clear_owner(lock, type);
341
342         EBUG_ON(!(lock->state.v & l[type].held_mask));
343         EBUG_ON(type == SIX_LOCK_write &&
344                 !(lock->state.v & __SIX_LOCK_HELD_intent));
345
346         state.v = atomic64_add_return_release(l[type].unlock_val,
347                                               &lock->state.counter);
348         six_release(&lock->dep_map);
349         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
350 }
351
352 bool six_trylock_convert(struct six_lock *lock,
353                          enum six_lock_type from,
354                          enum six_lock_type to)
355 {
356         const struct six_lock_vals l[] = LOCK_VALS;
357         union six_lock_state old, new;
358         u64 v = READ_ONCE(lock->state.v);
359
360         do {
361                 new.v = old.v = v;
362                 new.v += l[from].unlock_val;
363
364                 if (new.v & l[to].lock_fail)
365                         return false;
366         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
367                                 old.v,
368                                 new.v + l[to].lock_val)) != old.v);
369
370         six_clear_owner(lock, from);
371         six_set_owner(lock, to);
372
373         six_lock_wakeup(lock, new, l[from].unlock_wakeup);
374
375         return true;
376 }
377
378 /*
379  * Increment read/intent lock count, assuming we already have it read or intent
380  * locked:
381  */
382 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
383 {
384         const struct six_lock_vals l[] = LOCK_VALS;
385
386         EBUG_ON(type == SIX_LOCK_write);
387         six_acquire(&lock->dep_map, 0);
388
389         /* XXX: assert already locked, and that we don't overflow: */
390
391         atomic64_add(l[type].lock_val, &lock->state.counter);
392 }
393
394 /* Convert from intent to read: */
395 void six_lock_downgrade(struct six_lock *lock)
396 {
397         six_lock_increment(lock, SIX_LOCK_read);
398         six_unlock_intent(lock);
399 }