git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/six.c

   1
   2 #include <linux/log2.h>
   3 #include <linux/preempt.h>
   4 #include <linux/rcupdate.h>
   5 #include <linux/sched.h>
   6 #include <linux/sched/rt.h>
   7
   8 #include "six.h"
   9
  10 #define six_acquire(l, t)       lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
  11 #define six_release(l)          lock_release(l, 0, _RET_IP_)
  12
  13 #define __SIX_LOCK_HELD_read            __SIX_VAL(read_lock, ~0)
  14 #define __SIX_LOCK_HELD_intent          __SIX_VAL(intent_lock, ~0)
  15 #define __SIX_LOCK_HELD_write           __SIX_VAL(seq, 1)
  16
  17 struct six_lock_vals {
  18         /* Value we add to the lock in order to take the lock: */
  19         u64                     lock_val;
  20
  21         /* If the lock has this value (used as a mask), taking the lock fails: */
  22         u64                     lock_fail;
  23
  24         /* Value we add to the lock in order to release the lock: */
  25         u64                     unlock_val;
  26
  27         /* Mask that indicates lock is held for this type: */
  28         u64                     held_mask;
  29
  30         /* Waitlist we wakeup when releasing the lock: */
  31         enum six_lock_type      unlock_wakeup;
  32 };
  33
  34 #define LOCK_VALS {                                                     \
  35         [SIX_LOCK_read] = {                                             \
  36                 .lock_val       = __SIX_VAL(read_lock, 1),              \
  37                 .lock_fail      = __SIX_LOCK_HELD_write,                \
  38                 .unlock_val     = -__SIX_VAL(read_lock, 1),             \
  39                 .held_mask      = __SIX_LOCK_HELD_read,                 \
  40                 .unlock_wakeup  = SIX_LOCK_write,                       \
  41         },                                                              \
  42         [SIX_LOCK_intent] = {                                           \
  43                 .lock_val       = __SIX_VAL(intent_lock, 1),            \
  44                 .lock_fail      = __SIX_LOCK_HELD_intent,               \
  45                 .unlock_val     = -__SIX_VAL(intent_lock, 1),           \
  46                 .held_mask      = __SIX_LOCK_HELD_intent,               \
  47                 .unlock_wakeup  = SIX_LOCK_intent,                      \
  48         },                                                              \
  49         [SIX_LOCK_write] = {                                            \
  50                 .lock_val       = __SIX_VAL(seq, 1),                    \
  51                 .lock_fail      = __SIX_LOCK_HELD_read,                 \
  52                 .unlock_val     = __SIX_VAL(seq, 1),                    \
  53                 .held_mask      = __SIX_LOCK_HELD_write,                \
  54                 .unlock_wakeup  = SIX_LOCK_read,                        \
  55         },                                                              \
  56 }
  57
  58 static void six_set_owner(struct six_lock *lock, enum six_lock_type type)
  59 {
  60         if (type == SIX_LOCK_intent)
  61                 lock->owner = current;
  62 }
  63
  64 static void six_clear_owner(struct six_lock *lock, enum six_lock_type type)
  65 {
  66         if (type == SIX_LOCK_intent)
  67                 lock->owner = NULL;
  68 }
  69
  70 static inline bool __six_trylock_type(struct six_lock *lock,
  71                                       enum six_lock_type type)
  72 {
  73         const struct six_lock_vals l[] = LOCK_VALS;
  74         union six_lock_state old;
  75         u64 v = READ_ONCE(lock->state.v);
  76
  77         do {
  78                 old.v = v;
  79
  80                 EBUG_ON(type == SIX_LOCK_write &&
  81                         ((old.v & __SIX_LOCK_HELD_write) ||
  82                          !(old.v & __SIX_LOCK_HELD_intent)));
  83
  84                 if (old.v & l[type].lock_fail)
  85                         return false;
  86         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
  87                                 old.v,
  88                                 old.v + l[type].lock_val)) != old.v);
  89         return true;
  90 }
  91
  92 bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
  93 {
  94         bool ret = __six_trylock_type(lock, type);
  95
  96         if (ret) {
  97                 six_acquire(&lock->dep_map, 1);
  98                 six_set_owner(lock, type);
  99         }
 100
 101         return ret;
 102 }
 103
 104 bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
 105                      unsigned seq)
 106 {
 107         const struct six_lock_vals l[] = LOCK_VALS;
 108         union six_lock_state old;
 109         u64 v = READ_ONCE(lock->state.v);
 110
 111         do {
 112                 old.v = v;
 113
 114                 if (old.seq != seq || old.v & l[type].lock_fail)
 115                         return false;
 116         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
 117                                 old.v,
 118                                 old.v + l[type].lock_val)) != old.v);
 119
 120         six_acquire(&lock->dep_map, 1);
 121         six_set_owner(lock, type);
 122         return true;
 123 }
 124
 125 struct six_lock_waiter {
 126         struct list_head        list;
 127         struct task_struct      *task;
 128 };
 129
 130 /* This is probably up there with the more evil things I've done */
 131 #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
 132
 133 static inline int six_can_spin_on_owner(struct six_lock *lock)
 134 {
 135         struct task_struct *owner;
 136         int retval = 1;
 137
 138         if (need_resched())
 139                 return 0;
 140
 141         rcu_read_lock();
 142         owner = READ_ONCE(lock->owner);
 143         if (owner)
 144                 retval = owner->on_cpu;
 145         rcu_read_unlock();
 146         /*
 147          * if lock->owner is not set, the mutex owner may have just acquired
 148          * it and not set the owner yet or the mutex has been released.
 149          */
 150         return retval;
 151 }
 152
 153 static bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner)
 154 {
 155         bool ret = true;
 156
 157         rcu_read_lock();
 158         while (lock->owner == owner) {
 159                 /*
 160                  * Ensure we emit the owner->on_cpu, dereference _after_
 161                  * checking lock->owner still matches owner. If that fails,
 162                  * owner might point to freed memory. If it still matches,
 163                  * the rcu_read_lock() ensures the memory stays valid.
 164                  */
 165                 barrier();
 166
 167                 if (!owner->on_cpu || need_resched()) {
 168                         ret = false;
 169                         break;
 170                 }
 171
 172                 cpu_relax();
 173         }
 174         rcu_read_unlock();
 175
 176         return ret;
 177 }
 178
 179 static bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
 180 {
 181         struct task_struct *task = current;
 182
 183         if (type == SIX_LOCK_write)
 184                 return false;
 185
 186         preempt_disable();
 187         if (!six_can_spin_on_owner(lock))
 188                 goto fail;
 189
 190         if (!osq_lock(&lock->osq))
 191                 goto fail;
 192
 193         while (1) {
 194                 struct task_struct *owner;
 195
 196                 /*
 197                  * If there's an owner, wait for it to either
 198                  * release the lock or go to sleep.
 199                  */
 200                 owner = READ_ONCE(lock->owner);
 201                 if (owner && !six_spin_on_owner(lock, owner))
 202                         break;
 203
 204                 if (__six_trylock_type(lock, type)) {
 205                         osq_unlock(&lock->osq);
 206                         preempt_enable();
 207                         return true;
 208                 }
 209
 210                 /*
 211                  * When there's no owner, we might have preempted between the
 212                  * owner acquiring the lock and setting the owner field. If
 213                  * we're an RT task that will live-lock because we won't let
 214                  * the owner complete.
 215                  */
 216                 if (!owner && (need_resched() || rt_task(task)))
 217                         break;
 218
 219                 /*
 220                  * The cpu_relax() call is a compiler barrier which forces
 221                  * everything in this loop to be re-loaded. We don't need
 222                  * memory barriers as we'll eventually observe the right
 223                  * values at the cost of a few extra spins.
 224                  */
 225                 cpu_relax();
 226         }
 227
 228         osq_unlock(&lock->osq);
 229 fail:
 230         preempt_enable();
 231
 232         /*
 233          * If we fell out of the spin path because of need_resched(),
 234          * reschedule now, before we try-lock again. This avoids getting
 235          * scheduled out right after we obtained the lock.
 236          */
 237         if (need_resched())
 238                 schedule();
 239
 240         return false;
 241 }
 242
 243 void six_lock_type(struct six_lock *lock, enum six_lock_type type)
 244 {
 245         const struct six_lock_vals l[] = LOCK_VALS;
 246         union six_lock_state old, new;
 247         struct six_lock_waiter wait;
 248         u64 v;
 249
 250         six_acquire(&lock->dep_map, 0);
 251
 252         if (__six_trylock_type(lock, type))
 253                 goto done;
 254
 255         if (six_optimistic_spin(lock, type))
 256                 goto done;
 257
 258         lock_contended(&lock->dep_map, _RET_IP_);
 259
 260         INIT_LIST_HEAD(&wait.list);
 261         wait.task = current;
 262
 263         while (1) {
 264                 set_current_state(TASK_UNINTERRUPTIBLE);
 265                 if (list_empty_careful(&wait.list)) {
 266                         raw_spin_lock(&lock->wait_lock);
 267                         list_add_tail(&wait.list, &lock->wait_list[type]);
 268                         raw_spin_unlock(&lock->wait_lock);
 269                 }
 270
 271                 v = READ_ONCE(lock->state.v);
 272                 do {
 273                         new.v = old.v = v;
 274
 275                         if (!(old.v & l[type].lock_fail))
 276                                 new.v += l[type].lock_val;
 277                         else if (!(new.waiters & (1 << type)))
 278                                 new.waiters |= 1 << type;
 279                         else
 280                                 break; /* waiting bit already set */
 281                 } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
 282                                         old.v, new.v)) != old.v);
 283
 284                 if (!(old.v & l[type].lock_fail))
 285                         break;
 286
 287                 schedule();
 288         }
 289
 290         __set_current_state(TASK_RUNNING);
 291
 292         if (!list_empty_careful(&wait.list)) {
 293                 raw_spin_lock(&lock->wait_lock);
 294                 list_del_init(&wait.list);
 295                 raw_spin_unlock(&lock->wait_lock);
 296         }
 297 done:
 298         lock_acquired(&lock->dep_map, _RET_IP_);
 299         six_set_owner(lock, type);
 300 }
 301
 302 static inline void six_lock_wakeup(struct six_lock *lock,
 303                                    union six_lock_state state,
 304                                    unsigned waitlist_id)
 305 {
 306         struct list_head *wait_list = &lock->wait_list[waitlist_id];
 307         struct six_lock_waiter *w, *next;
 308
 309         if (waitlist_id == SIX_LOCK_write && state.read_lock)
 310                 return;
 311
 312         if (!(state.waiters & (1 << waitlist_id)))
 313                 return;
 314
 315         clear_bit(waitlist_bitnr(waitlist_id),
 316                   (unsigned long *) &lock->state.v);
 317
 318         raw_spin_lock(&lock->wait_lock);
 319
 320         list_for_each_entry_safe(w, next, wait_list, list) {
 321                 list_del_init(&w->list);
 322
 323                 if (wake_up_process(w->task) &&
 324                     waitlist_id != SIX_LOCK_read) {
 325                         if (!list_empty(wait_list))
 326                                 set_bit(waitlist_bitnr(waitlist_id),
 327                                         (unsigned long *) &lock->state.v);
 328                         break;
 329                 }
 330         }
 331
 332         raw_spin_unlock(&lock->wait_lock);
 333 }
 334
 335 void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
 336 {
 337         const struct six_lock_vals l[] = LOCK_VALS;
 338         union six_lock_state state;
 339
 340         six_clear_owner(lock, type);
 341
 342         EBUG_ON(!(lock->state.v & l[type].held_mask));
 343         EBUG_ON(type == SIX_LOCK_write &&
 344                 !(lock->state.v & __SIX_LOCK_HELD_intent));
 345
 346         state.v = atomic64_add_return_release(l[type].unlock_val,
 347                                               &lock->state.counter);
 348         six_release(&lock->dep_map);
 349         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
 350 }
 351
 352 bool six_trylock_convert(struct six_lock *lock,
 353                          enum six_lock_type from,
 354                          enum six_lock_type to)
 355 {
 356         const struct six_lock_vals l[] = LOCK_VALS;
 357         union six_lock_state old, new;
 358         u64 v = READ_ONCE(lock->state.v);
 359
 360         do {
 361                 new.v = old.v = v;
 362                 new.v += l[from].unlock_val;
 363
 364                 if (new.v & l[to].lock_fail)
 365                         return false;
 366         } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
 367                                 old.v,
 368                                 new.v + l[to].lock_val)) != old.v);
 369
 370         six_clear_owner(lock, from);
 371         six_set_owner(lock, to);
 372
 373         six_lock_wakeup(lock, new, l[from].unlock_wakeup);
 374
 375         return true;
 376 }
 377
 378 /*
 379  * Increment read/intent lock count, assuming we already have it read or intent
 380  * locked:
 381  */
 382 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
 383 {
 384         const struct six_lock_vals l[] = LOCK_VALS;
 385
 386         EBUG_ON(type == SIX_LOCK_write);
 387         six_acquire(&lock->dep_map, 0);
 388
 389         /* XXX: assert already locked, and that we don't overflow: */
 390
 391         atomic64_add(l[type].lock_val, &lock->state.counter);
 392 }
 393
 394 /* Convert from intent to read: */
 395 void six_lock_downgrade(struct six_lock *lock)
 396 {
 397         six_lock_increment(lock, SIX_LOCK_read);
 398         six_unlock_intent(lock);
 399 }