]> git.sesse.net Git - bcachefs-tools-debian/blob - linux/wait.c
bcache in userspace; userspace fsck
[bcachefs-tools-debian] / linux / wait.c
1 /*
2  * Generic waiting primitives.
3  *
4  * (C) 2004 Nadia Yvette Chambers, Oracle
5  */
6 #include <linux/export.h>
7 #include <linux/sched.h>
8 #include <linux/mm.h>
9 #include <linux/wait.h>
10 #include <linux/hash.h>
11 #include <linux/kthread.h>
12
13 void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
14 {
15         spin_lock_init(&q->lock);
16         lockdep_set_class_and_name(&q->lock, key, name);
17         INIT_LIST_HEAD(&q->task_list);
18 }
19
20 EXPORT_SYMBOL(__init_waitqueue_head);
21
22 void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
23 {
24         unsigned long flags;
25
26         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
27         spin_lock_irqsave(&q->lock, flags);
28         __add_wait_queue(q, wait);
29         spin_unlock_irqrestore(&q->lock, flags);
30 }
31 EXPORT_SYMBOL(add_wait_queue);
32
33 void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
34 {
35         unsigned long flags;
36
37         wait->flags |= WQ_FLAG_EXCLUSIVE;
38         spin_lock_irqsave(&q->lock, flags);
39         __add_wait_queue_tail(q, wait);
40         spin_unlock_irqrestore(&q->lock, flags);
41 }
42 EXPORT_SYMBOL(add_wait_queue_exclusive);
43
44 void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
45 {
46         unsigned long flags;
47
48         spin_lock_irqsave(&q->lock, flags);
49         __remove_wait_queue(q, wait);
50         spin_unlock_irqrestore(&q->lock, flags);
51 }
52 EXPORT_SYMBOL(remove_wait_queue);
53
54
55 /*
56  * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
57  * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
58  * number) then we wake all the non-exclusive tasks and one exclusive task.
59  *
60  * There are circumstances in which we can try to wake a task which has already
61  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
62  * zero in this (rare) case, and we handle it by continuing to scan the queue.
63  */
64 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
65                         int nr_exclusive, int wake_flags, void *key)
66 {
67         wait_queue_t *curr, *next;
68
69         list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
70                 unsigned flags = curr->flags;
71
72                 if (curr->func(curr, mode, wake_flags, key) &&
73                                 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
74                         break;
75         }
76 }
77
78 /**
79  * __wake_up - wake up threads blocked on a waitqueue.
80  * @q: the waitqueue
81  * @mode: which threads
82  * @nr_exclusive: how many wake-one or wake-many threads to wake up
83  * @key: is directly passed to the wakeup function
84  *
85  * It may be assumed that this function implies a write memory barrier before
86  * changing the task state if and only if any tasks are woken up.
87  */
88 void __wake_up(wait_queue_head_t *q, unsigned int mode,
89                         int nr_exclusive, void *key)
90 {
91         unsigned long flags;
92
93         spin_lock_irqsave(&q->lock, flags);
94         __wake_up_common(q, mode, nr_exclusive, 0, key);
95         spin_unlock_irqrestore(&q->lock, flags);
96 }
97 EXPORT_SYMBOL(__wake_up);
98
99 /*
100  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
101  */
102 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
103 {
104         __wake_up_common(q, mode, nr, 0, NULL);
105 }
106 EXPORT_SYMBOL_GPL(__wake_up_locked);
107
108 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
109 {
110         __wake_up_common(q, mode, 1, 0, key);
111 }
112 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
113
114 /**
115  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
116  * @q: the waitqueue
117  * @mode: which threads
118  * @nr_exclusive: how many wake-one or wake-many threads to wake up
119  * @key: opaque value to be passed to wakeup targets
120  *
121  * The sync wakeup differs that the waker knows that it will schedule
122  * away soon, so while the target thread will be woken up, it will not
123  * be migrated to another CPU - ie. the two threads are 'synchronized'
124  * with each other. This can prevent needless bouncing between CPUs.
125  *
126  * On UP it can prevent extra preemption.
127  *
128  * It may be assumed that this function implies a write memory barrier before
129  * changing the task state if and only if any tasks are woken up.
130  */
131 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
132                         int nr_exclusive, void *key)
133 {
134         unsigned long flags;
135         int wake_flags = 1; /* XXX WF_SYNC */
136
137         if (unlikely(!q))
138                 return;
139
140         if (unlikely(nr_exclusive != 1))
141                 wake_flags = 0;
142
143         spin_lock_irqsave(&q->lock, flags);
144         __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
145         spin_unlock_irqrestore(&q->lock, flags);
146 }
147 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
148
149 /*
150  * __wake_up_sync - see __wake_up_sync_key()
151  */
152 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
153 {
154         __wake_up_sync_key(q, mode, nr_exclusive, NULL);
155 }
156 EXPORT_SYMBOL_GPL(__wake_up_sync);      /* For internal use only */
157
158 /*
159  * Note: we use "set_current_state()" _after_ the wait-queue add,
160  * because we need a memory barrier there on SMP, so that any
161  * wake-function that tests for the wait-queue being active
162  * will be guaranteed to see waitqueue addition _or_ subsequent
163  * tests in this thread will see the wakeup having taken place.
164  *
165  * The spin_unlock() itself is semi-permeable and only protects
166  * one way (it only protects stuff inside the critical region and
167  * stops them from bleeding out - it would still allow subsequent
168  * loads to move into the critical region).
169  */
170 void
171 prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
172 {
173         unsigned long flags;
174
175         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
176         spin_lock_irqsave(&q->lock, flags);
177         if (list_empty(&wait->task_list))
178                 __add_wait_queue(q, wait);
179         set_current_state(state);
180         spin_unlock_irqrestore(&q->lock, flags);
181 }
182 EXPORT_SYMBOL(prepare_to_wait);
183
184 void
185 prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
186 {
187         unsigned long flags;
188
189         wait->flags |= WQ_FLAG_EXCLUSIVE;
190         spin_lock_irqsave(&q->lock, flags);
191         if (list_empty(&wait->task_list))
192                 __add_wait_queue_tail(q, wait);
193         set_current_state(state);
194         spin_unlock_irqrestore(&q->lock, flags);
195 }
196 EXPORT_SYMBOL(prepare_to_wait_exclusive);
197
198 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
199 {
200         unsigned long flags;
201
202         wait->private = current;
203         wait->func = autoremove_wake_function;
204
205         spin_lock_irqsave(&q->lock, flags);
206         if (list_empty(&wait->task_list)) {
207                 if (wait->flags & WQ_FLAG_EXCLUSIVE)
208                         __add_wait_queue_tail(q, wait);
209                 else
210                         __add_wait_queue(q, wait);
211         }
212         set_current_state(state);
213         spin_unlock_irqrestore(&q->lock, flags);
214
215         return 0;
216 }
217 EXPORT_SYMBOL(prepare_to_wait_event);
218
219 /**
220  * finish_wait - clean up after waiting in a queue
221  * @q: waitqueue waited on
222  * @wait: wait descriptor
223  *
224  * Sets current thread back to running state and removes
225  * the wait descriptor from the given waitqueue if still
226  * queued.
227  */
228 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
229 {
230         unsigned long flags;
231
232         __set_current_state(TASK_RUNNING);
233         /*
234          * We can check for list emptiness outside the lock
235          * IFF:
236          *  - we use the "careful" check that verifies both
237          *    the next and prev pointers, so that there cannot
238          *    be any half-pending updates in progress on other
239          *    CPU's that we haven't seen yet (and that might
240          *    still change the stack area.
241          * and
242          *  - all other users take the lock (ie we can only
243          *    have _one_ other CPU that looks at or modifies
244          *    the list).
245          */
246         if (!list_empty_careful(&wait->task_list)) {
247                 spin_lock_irqsave(&q->lock, flags);
248                 list_del_init(&wait->task_list);
249                 spin_unlock_irqrestore(&q->lock, flags);
250         }
251 }
252 EXPORT_SYMBOL(finish_wait);
253
254 /**
255  * abort_exclusive_wait - abort exclusive waiting in a queue
256  * @q: waitqueue waited on
257  * @wait: wait descriptor
258  * @mode: runstate of the waiter to be woken
259  * @key: key to identify a wait bit queue or %NULL
260  *
261  * Sets current thread back to running state and removes
262  * the wait descriptor from the given waitqueue if still
263  * queued.
264  *
265  * Wakes up the next waiter if the caller is concurrently
266  * woken up through the queue.
267  *
268  * This prevents waiter starvation where an exclusive waiter
269  * aborts and is woken up concurrently and no one wakes up
270  * the next waiter.
271  */
272 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
273                         unsigned int mode, void *key)
274 {
275         unsigned long flags;
276
277         __set_current_state(TASK_RUNNING);
278         spin_lock_irqsave(&q->lock, flags);
279         if (!list_empty(&wait->task_list))
280                 list_del_init(&wait->task_list);
281         else if (waitqueue_active(q))
282                 __wake_up_locked_key(q, mode, key);
283         spin_unlock_irqrestore(&q->lock, flags);
284 }
285 EXPORT_SYMBOL(abort_exclusive_wait);
286
287 int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
288                           void *key)
289 {
290         return wake_up_process(curr->private);
291 }
292
293 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
294 {
295         int ret = default_wake_function(wait, mode, sync, key);
296
297         if (ret)
298                 list_del_init(&wait->task_list);
299         return ret;
300 }
301 EXPORT_SYMBOL(autoremove_wake_function);
302
303 static inline bool is_kthread_should_stop(void)
304 {
305         return (current->flags & PF_KTHREAD) && kthread_should_stop();
306 }
307
308 /*
309  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
310  *
311  * add_wait_queue(&wq, &wait);
312  * for (;;) {
313  *     if (condition)
314  *         break;
315  *
316  *     p->state = mode;                         condition = true;
317  *     smp_mb(); // A                           smp_wmb(); // C
318  *     if (!wait->flags & WQ_FLAG_WOKEN)        wait->flags |= WQ_FLAG_WOKEN;
319  *         schedule()                           try_to_wake_up();
320  *     p->state = TASK_RUNNING;             ~~~~~~~~~~~~~~~~~~
321  *     wait->flags &= ~WQ_FLAG_WOKEN;           condition = true;
322  *     smp_mb() // B                            smp_wmb(); // C
323  *                                              wait->flags |= WQ_FLAG_WOKEN;
324  * }
325  * remove_wait_queue(&wq, &wait);
326  *
327  */
328 long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
329 {
330         set_current_state(mode); /* A */
331         /*
332          * The above implies an smp_mb(), which matches with the smp_wmb() from
333          * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
334          * also observe all state before the wakeup.
335          */
336         if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
337                 timeout = schedule_timeout(timeout);
338         __set_current_state(TASK_RUNNING);
339
340         /*
341          * The below implies an smp_mb(), it too pairs with the smp_wmb() from
342          * woken_wake_function() such that we must either observe the wait
343          * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
344          * an event.
345          */
346         smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
347
348         return timeout;
349 }
350 EXPORT_SYMBOL(wait_woken);
351
352 int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
353 {
354         /*
355          * Although this function is called under waitqueue lock, LOCK
356          * doesn't imply write barrier and the users expects write
357          * barrier semantics on wakeup functions.  The following
358          * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
359          * and is paired with smp_store_mb() in wait_woken().
360          */
361         smp_wmb(); /* C */
362         wait->flags |= WQ_FLAG_WOKEN;
363
364         return default_wake_function(wait, mode, sync, key);
365 }
366 EXPORT_SYMBOL(woken_wake_function);
367
368 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
369 {
370         struct wait_bit_key *key = arg;
371         struct wait_bit_queue *wait_bit
372                 = container_of(wait, struct wait_bit_queue, wait);
373
374         if (wait_bit->key.flags != key->flags ||
375                         wait_bit->key.bit_nr != key->bit_nr ||
376                         test_bit(key->bit_nr, key->flags))
377                 return 0;
378         else
379                 return autoremove_wake_function(wait, mode, sync, key);
380 }
381 EXPORT_SYMBOL(wake_bit_function);
382
383 /*
384  * To allow interruptible waiting and asynchronous (i.e. nonblocking)
385  * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
386  * permitted return codes. Nonzero return codes halt waiting and return.
387  */
388 int __sched
389 __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
390               wait_bit_action_f *action, unsigned mode)
391 {
392         int ret = 0;
393
394         do {
395                 prepare_to_wait(wq, &q->wait, mode);
396                 if (test_bit(q->key.bit_nr, q->key.flags))
397                         ret = (*action)(&q->key, mode);
398         } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
399         finish_wait(wq, &q->wait);
400         return ret;
401 }
402 EXPORT_SYMBOL(__wait_on_bit);
403
404 int __sched out_of_line_wait_on_bit(void *word, int bit,
405                                     wait_bit_action_f *action, unsigned mode)
406 {
407         wait_queue_head_t *wq = bit_waitqueue(word, bit);
408         DEFINE_WAIT_BIT(wait, word, bit);
409
410         return __wait_on_bit(wq, &wait, action, mode);
411 }
412 EXPORT_SYMBOL(out_of_line_wait_on_bit);
413
414 int __sched out_of_line_wait_on_bit_timeout(
415         void *word, int bit, wait_bit_action_f *action,
416         unsigned mode, unsigned long timeout)
417 {
418         wait_queue_head_t *wq = bit_waitqueue(word, bit);
419         DEFINE_WAIT_BIT(wait, word, bit);
420
421         wait.key.timeout = jiffies + timeout;
422         return __wait_on_bit(wq, &wait, action, mode);
423 }
424 EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
425
426 int __sched
427 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
428                         wait_bit_action_f *action, unsigned mode)
429 {
430         do {
431                 int ret;
432
433                 prepare_to_wait_exclusive(wq, &q->wait, mode);
434                 if (!test_bit(q->key.bit_nr, q->key.flags))
435                         continue;
436                 ret = action(&q->key, mode);
437                 if (!ret)
438                         continue;
439                 abort_exclusive_wait(wq, &q->wait, mode, &q->key);
440                 return ret;
441         } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
442         finish_wait(wq, &q->wait);
443         return 0;
444 }
445 EXPORT_SYMBOL(__wait_on_bit_lock);
446
447 int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
448                                          wait_bit_action_f *action, unsigned mode)
449 {
450         wait_queue_head_t *wq = bit_waitqueue(word, bit);
451         DEFINE_WAIT_BIT(wait, word, bit);
452
453         return __wait_on_bit_lock(wq, &wait, action, mode);
454 }
455 EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
456
457 void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
458 {
459         struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
460         if (waitqueue_active(wq))
461                 __wake_up(wq, TASK_NORMAL, 1, &key);
462 }
463 EXPORT_SYMBOL(__wake_up_bit);
464
465 /**
466  * wake_up_bit - wake up a waiter on a bit
467  * @word: the word being waited on, a kernel virtual address
468  * @bit: the bit of the word being waited on
469  *
470  * There is a standard hashed waitqueue table for generic use. This
471  * is the part of the hashtable's accessor API that wakes up waiters
472  * on a bit. For instance, if one were to have waiters on a bitflag,
473  * one would call wake_up_bit() after clearing the bit.
474  *
475  * In order for this to function properly, as it uses waitqueue_active()
476  * internally, some kind of memory barrier must be done prior to calling
477  * this. Typically, this will be smp_mb__after_atomic(), but in some
478  * cases where bitflags are manipulated non-atomically under a lock, one
479  * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
480  * because spin_unlock() does not guarantee a memory barrier.
481  */
482 void wake_up_bit(void *word, int bit)
483 {
484         __wake_up_bit(bit_waitqueue(word, bit), word, bit);
485 }
486 EXPORT_SYMBOL(wake_up_bit);
487
488 static DECLARE_WAIT_QUEUE_HEAD(__bit_waitqueue);
489
490 wait_queue_head_t *bit_waitqueue(void *word, int bit)
491 {
492         return &__bit_waitqueue;
493 }
494 EXPORT_SYMBOL(bit_waitqueue);
495
496 /*
497  * Manipulate the atomic_t address to produce a better bit waitqueue table hash
498  * index (we're keying off bit -1, but that would produce a horrible hash
499  * value).
500  */
501 static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
502 {
503         if (BITS_PER_LONG == 64) {
504                 unsigned long q = (unsigned long)p;
505                 return bit_waitqueue((void *)(q & ~1), q & 1);
506         }
507         return bit_waitqueue(p, 0);
508 }
509
510 static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
511                                   void *arg)
512 {
513         struct wait_bit_key *key = arg;
514         struct wait_bit_queue *wait_bit
515                 = container_of(wait, struct wait_bit_queue, wait);
516         atomic_t *val = key->flags;
517
518         if (wait_bit->key.flags != key->flags ||
519             wait_bit->key.bit_nr != key->bit_nr ||
520             atomic_read(val) != 0)
521                 return 0;
522         return autoremove_wake_function(wait, mode, sync, key);
523 }
524
525 /*
526  * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
527  * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
528  * return codes halt waiting and return.
529  */
530 static __sched
531 int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
532                        int (*action)(atomic_t *), unsigned mode)
533 {
534         atomic_t *val;
535         int ret = 0;
536
537         do {
538                 prepare_to_wait(wq, &q->wait, mode);
539                 val = q->key.flags;
540                 if (atomic_read(val) == 0)
541                         break;
542                 ret = (*action)(val);
543         } while (!ret && atomic_read(val) != 0);
544         finish_wait(wq, &q->wait);
545         return ret;
546 }
547
548 #define DEFINE_WAIT_ATOMIC_T(name, p)                                   \
549         struct wait_bit_queue name = {                                  \
550                 .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),              \
551                 .wait   = {                                             \
552                         .private        = current,                      \
553                         .func           = wake_atomic_t_function,       \
554                         .task_list      =                               \
555                                 LIST_HEAD_INIT((name).wait.task_list),  \
556                 },                                                      \
557         }
558
559 __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
560                                          unsigned mode)
561 {
562         wait_queue_head_t *wq = atomic_t_waitqueue(p);
563         DEFINE_WAIT_ATOMIC_T(wait, p);
564
565         return __wait_on_atomic_t(wq, &wait, action, mode);
566 }
567 EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
568
569 /**
570  * wake_up_atomic_t - Wake up a waiter on a atomic_t
571  * @p: The atomic_t being waited on, a kernel virtual address
572  *
573  * Wake up anyone waiting for the atomic_t to go to zero.
574  *
575  * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
576  * check is done by the waiter's wake function, not the by the waker itself).
577  */
578 void wake_up_atomic_t(atomic_t *p)
579 {
580         __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
581 }
582 EXPORT_SYMBOL(wake_up_atomic_t);
583
584 __sched int bit_wait(struct wait_bit_key *word, int mode)
585 {
586         schedule();
587         return 0;
588 }
589 EXPORT_SYMBOL(bit_wait);
590
591 __sched int bit_wait_io(struct wait_bit_key *word, int mode)
592 {
593         io_schedule();
594         return 0;
595 }
596 EXPORT_SYMBOL(bit_wait_io);
597
598 __sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
599 {
600         unsigned long now = jiffies;
601         if (time_after_eq(now, word->timeout))
602                 return -EAGAIN;
603         schedule_timeout(word->timeout - now);
604         return 0;
605 }
606 EXPORT_SYMBOL_GPL(bit_wait_timeout);
607
608 __sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
609 {
610         unsigned long now = jiffies;
611         if (time_after_eq(now, word->timeout))
612                 return -EAGAIN;
613         io_schedule_timeout(word->timeout - now);
614         return 0;
615 }
616 EXPORT_SYMBOL_GPL(bit_wait_io_timeout);