git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/six.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include <linux/export.h>
   4 #include <linux/log2.h>
   5 #include <linux/percpu.h>
   6 #include <linux/preempt.h>
   7 #include <linux/rcupdate.h>
   8 #include <linux/sched.h>
   9 #include <linux/sched/clock.h>
  10 #include <linux/sched/rt.h>
  11 #include <linux/sched/task.h>
  12 #include <linux/slab.h>
  13
  14 #include <trace/events/lock.h>
  15
  16 #include "six.h"
  17
  18 #ifdef DEBUG
  19 #define EBUG_ON(cond)                   BUG_ON(cond)
  20 #else
  21 #define EBUG_ON(cond)                   do {} while (0)
  22 #endif
  23
  24 #define six_acquire(l, t, r, ip)        lock_acquire(l, 0, t, r, 1, NULL, ip)
  25 #define six_release(l, ip)              lock_release(l, ip)
  26
  27 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
  28
  29 #define SIX_LOCK_HELD_read_OFFSET       0
  30 #define SIX_LOCK_HELD_read              ~(~0U << 26)
  31 #define SIX_LOCK_HELD_intent            (1U << 26)
  32 #define SIX_LOCK_HELD_write             (1U << 27)
  33 #define SIX_LOCK_WAITING_read           (1U << (28 + SIX_LOCK_read))
  34 #define SIX_LOCK_WAITING_intent         (1U << (28 + SIX_LOCK_intent))
  35 #define SIX_LOCK_WAITING_write          (1U << (28 + SIX_LOCK_write))
  36 #define SIX_LOCK_NOSPIN                 (1U << 31)
  37
  38 struct six_lock_vals {
  39         /* Value we add to the lock in order to take the lock: */
  40         u32                     lock_val;
  41
  42         /* If the lock has this value (used as a mask), taking the lock fails: */
  43         u32                     lock_fail;
  44
  45         /* Mask that indicates lock is held for this type: */
  46         u32                     held_mask;
  47
  48         /* Waitlist we wakeup when releasing the lock: */
  49         enum six_lock_type      unlock_wakeup;
  50 };
  51
  52 static const struct six_lock_vals l[] = {
  53         [SIX_LOCK_read] = {
  54                 .lock_val       = 1U << SIX_LOCK_HELD_read_OFFSET,
  55                 .lock_fail      = SIX_LOCK_HELD_write,
  56                 .held_mask      = SIX_LOCK_HELD_read,
  57                 .unlock_wakeup  = SIX_LOCK_write,
  58         },
  59         [SIX_LOCK_intent] = {
  60                 .lock_val       = SIX_LOCK_HELD_intent,
  61                 .lock_fail      = SIX_LOCK_HELD_intent,
  62                 .held_mask      = SIX_LOCK_HELD_intent,
  63                 .unlock_wakeup  = SIX_LOCK_intent,
  64         },
  65         [SIX_LOCK_write] = {
  66                 .lock_val       = SIX_LOCK_HELD_write,
  67                 .lock_fail      = SIX_LOCK_HELD_read,
  68                 .held_mask      = SIX_LOCK_HELD_write,
  69                 .unlock_wakeup  = SIX_LOCK_read,
  70         },
  71 };
  72
  73 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
  74 {
  75         if ((atomic_read(&lock->state) & mask) != mask)
  76                 atomic_or(mask, &lock->state);
  77 }
  78
  79 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
  80 {
  81         if (atomic_read(&lock->state) & mask)
  82                 atomic_and(~mask, &lock->state);
  83 }
  84
  85 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
  86                                  u32 old, struct task_struct *owner)
  87 {
  88         if (type != SIX_LOCK_intent)
  89                 return;
  90
  91         if (!(old & SIX_LOCK_HELD_intent)) {
  92                 EBUG_ON(lock->owner);
  93                 lock->owner = owner;
  94         } else {
  95                 EBUG_ON(lock->owner != current);
  96         }
  97 }
  98
  99 static inline unsigned pcpu_read_count(struct six_lock *lock)
 100 {
 101         unsigned read_count = 0;
 102         int cpu;
 103
 104         for_each_possible_cpu(cpu)
 105                 read_count += *per_cpu_ptr(lock->readers, cpu);
 106         return read_count;
 107 }
 108
 109 /*
 110  * __do_six_trylock() - main trylock routine
 111  *
 112  * Returns 1 on success, 0 on failure
 113  *
 114  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
 115  * for anoter thread taking the competing lock type, and we may havve to do a
 116  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
 117  */
 118 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
 119                             struct task_struct *task, bool try)
 120 {
 121         int ret;
 122         u32 old;
 123
 124         EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
 125         EBUG_ON(type == SIX_LOCK_write &&
 126                 (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
 127
 128         /*
 129          * Percpu reader mode:
 130          *
 131          * The basic idea behind this algorithm is that you can implement a lock
 132          * between two threads without any atomics, just memory barriers:
 133          *
 134          * For two threads you'll need two variables, one variable for "thread a
 135          * has the lock" and another for "thread b has the lock".
 136          *
 137          * To take the lock, a thread sets its variable indicating that it holds
 138          * the lock, then issues a full memory barrier, then reads from the
 139          * other thread's variable to check if the other thread thinks it has
 140          * the lock. If we raced, we backoff and retry/sleep.
 141          *
 142          * Failure to take the lock may cause a spurious trylock failure in
 143          * another thread, because we temporarily set the lock to indicate that
 144          * we held it. This would be a problem for a thread in six_lock(), when
 145          * they are calling trylock after adding themself to the waitlist and
 146          * prior to sleeping.
 147          *
 148          * Therefore, if we fail to get the lock, and there were waiters of the
 149          * type we conflict with, we will have to issue a wakeup.
 150          *
 151          * Since we may be called under wait_lock (and by the wakeup code
 152          * itself), we return that the wakeup has to be done instead of doing it
 153          * here.
 154          */
 155         if (type == SIX_LOCK_read && lock->readers) {
 156                 preempt_disable();
 157                 this_cpu_inc(*lock->readers); /* signal that we own lock */
 158
 159                 smp_mb();
 160
 161                 old = atomic_read(&lock->state);
 162                 ret = !(old & l[type].lock_fail);
 163
 164                 this_cpu_sub(*lock->readers, !ret);
 165                 preempt_enable();
 166
 167                 if (!ret && (old & SIX_LOCK_WAITING_write))
 168                         ret = -1 - SIX_LOCK_write;
 169         } else if (type == SIX_LOCK_write && lock->readers) {
 170                 if (try) {
 171                         atomic_add(SIX_LOCK_HELD_write, &lock->state);
 172                         smp_mb__after_atomic();
 173                 }
 174
 175                 ret = !pcpu_read_count(lock);
 176
 177                 if (try && !ret) {
 178                         old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
 179                         if (old & SIX_LOCK_WAITING_read)
 180                                 ret = -1 - SIX_LOCK_read;
 181                 }
 182         } else {
 183                 old = atomic_read(&lock->state);
 184                 do {
 185                         ret = !(old & l[type].lock_fail);
 186                         if (!ret || (type == SIX_LOCK_write && !try)) {
 187                                 smp_mb();
 188                                 break;
 189                         }
 190                 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
 191
 192                 EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
 193         }
 194
 195         if (ret > 0)
 196                 six_set_owner(lock, type, old, task);
 197
 198         EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
 199                 (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
 200
 201         return ret;
 202 }
 203
 204 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
 205 {
 206         struct six_lock_waiter *w, *next;
 207         struct task_struct *task;
 208         bool saw_one;
 209         int ret;
 210 again:
 211         ret = 0;
 212         saw_one = false;
 213         raw_spin_lock(&lock->wait_lock);
 214
 215         list_for_each_entry_safe(w, next, &lock->wait_list, list) {
 216                 if (w->lock_want != lock_type)
 217                         continue;
 218
 219                 if (saw_one && lock_type != SIX_LOCK_read)
 220                         goto unlock;
 221                 saw_one = true;
 222
 223                 ret = __do_six_trylock(lock, lock_type, w->task, false);
 224                 if (ret <= 0)
 225                         goto unlock;
 226
 227                 /*
 228                  * Similar to percpu_rwsem_wake_function(), we need to guard
 229                  * against the wakee noticing w->lock_acquired, returning, and
 230                  * then exiting before we do the wakeup:
 231                  */
 232                 task = get_task_struct(w->task);
 233                 __list_del(w->list.prev, w->list.next);
 234                 /*
 235                  * The release barrier here ensures the ordering of the
 236                  * __list_del before setting w->lock_acquired; @w is on the
 237                  * stack of the thread doing the waiting and will be reused
 238                  * after it sees w->lock_acquired with no other locking:
 239                  * pairs with smp_load_acquire() in six_lock_slowpath()
 240                  */
 241                 smp_store_release(&w->lock_acquired, true);
 242                 wake_up_process(task);
 243                 put_task_struct(task);
 244         }
 245
 246         six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
 247 unlock:
 248         raw_spin_unlock(&lock->wait_lock);
 249
 250         if (ret < 0) {
 251                 lock_type = -ret - 1;
 252                 goto again;
 253         }
 254 }
 255
 256 __always_inline
 257 static void six_lock_wakeup(struct six_lock *lock, u32 state,
 258                             enum six_lock_type lock_type)
 259 {
 260         if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
 261                 return;
 262
 263         if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
 264                 return;
 265
 266         __six_lock_wakeup(lock, lock_type);
 267 }
 268
 269 __always_inline
 270 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
 271 {
 272         int ret;
 273
 274         ret = __do_six_trylock(lock, type, current, try);
 275         if (ret < 0)
 276                 __six_lock_wakeup(lock, -ret - 1);
 277
 278         return ret > 0;
 279 }
 280
 281 /**
 282  * six_trylock_ip - attempt to take a six lock without blocking
 283  * @lock:       lock to take
 284  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 285  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 286  *
 287  * Return: true on success, false on failure.
 288  */
 289 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
 290 {
 291         if (!do_six_trylock(lock, type, true))
 292                 return false;
 293
 294         if (type != SIX_LOCK_write)
 295                 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
 296         return true;
 297 }
 298 EXPORT_SYMBOL_GPL(six_trylock_ip);
 299
 300 /**
 301  * six_relock_ip - attempt to re-take a lock that was held previously
 302  * @lock:       lock to take
 303  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 304  * @seq:        lock sequence number obtained from six_lock_seq() while lock was
 305  *              held previously
 306  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 307  *
 308  * Return: true on success, false on failure.
 309  */
 310 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
 311                    unsigned seq, unsigned long ip)
 312 {
 313         if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
 314                 return false;
 315
 316         if (six_lock_seq(lock) != seq) {
 317                 six_unlock_ip(lock, type, ip);
 318                 return false;
 319         }
 320
 321         return true;
 322 }
 323 EXPORT_SYMBOL_GPL(six_relock_ip);
 324
 325 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
 326
 327 static inline bool six_can_spin_on_owner(struct six_lock *lock)
 328 {
 329         struct task_struct *owner;
 330         bool ret;
 331
 332         if (need_resched())
 333                 return false;
 334
 335         rcu_read_lock();
 336         owner = READ_ONCE(lock->owner);
 337         ret = !owner || owner_on_cpu(owner);
 338         rcu_read_unlock();
 339
 340         return ret;
 341 }
 342
 343 static inline bool six_spin_on_owner(struct six_lock *lock,
 344                                      struct task_struct *owner,
 345                                      u64 end_time)
 346 {
 347         bool ret = true;
 348         unsigned loop = 0;
 349
 350         rcu_read_lock();
 351         while (lock->owner == owner) {
 352                 /*
 353                  * Ensure we emit the owner->on_cpu, dereference _after_
 354                  * checking lock->owner still matches owner. If that fails,
 355                  * owner might point to freed memory. If it still matches,
 356                  * the rcu_read_lock() ensures the memory stays valid.
 357                  */
 358                 barrier();
 359
 360                 if (!owner_on_cpu(owner) || need_resched()) {
 361                         ret = false;
 362                         break;
 363                 }
 364
 365                 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
 366                         six_set_bitmask(lock, SIX_LOCK_NOSPIN);
 367                         ret = false;
 368                         break;
 369                 }
 370
 371                 cpu_relax();
 372         }
 373         rcu_read_unlock();
 374
 375         return ret;
 376 }
 377
 378 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
 379 {
 380         struct task_struct *task = current;
 381         u64 end_time;
 382
 383         if (type == SIX_LOCK_write)
 384                 return false;
 385
 386         preempt_disable();
 387         if (!six_can_spin_on_owner(lock))
 388                 goto fail;
 389
 390         if (!osq_lock(&lock->osq))
 391                 goto fail;
 392
 393         end_time = sched_clock() + 10 * NSEC_PER_USEC;
 394
 395         while (1) {
 396                 struct task_struct *owner;
 397
 398                 /*
 399                  * If there's an owner, wait for it to either
 400                  * release the lock or go to sleep.
 401                  */
 402                 owner = READ_ONCE(lock->owner);
 403                 if (owner && !six_spin_on_owner(lock, owner, end_time))
 404                         break;
 405
 406                 if (do_six_trylock(lock, type, false)) {
 407                         osq_unlock(&lock->osq);
 408                         preempt_enable();
 409                         return true;
 410                 }
 411
 412                 /*
 413                  * When there's no owner, we might have preempted between the
 414                  * owner acquiring the lock and setting the owner field. If
 415                  * we're an RT task that will live-lock because we won't let
 416                  * the owner complete.
 417                  */
 418                 if (!owner && (need_resched() || rt_task(task)))
 419                         break;
 420
 421                 /*
 422                  * The cpu_relax() call is a compiler barrier which forces
 423                  * everything in this loop to be re-loaded. We don't need
 424                  * memory barriers as we'll eventually observe the right
 425                  * values at the cost of a few extra spins.
 426                  */
 427                 cpu_relax();
 428         }
 429
 430         osq_unlock(&lock->osq);
 431 fail:
 432         preempt_enable();
 433
 434         /*
 435          * If we fell out of the spin path because of need_resched(),
 436          * reschedule now, before we try-lock again. This avoids getting
 437          * scheduled out right after we obtained the lock.
 438          */
 439         if (need_resched())
 440                 schedule();
 441
 442         return false;
 443 }
 444
 445 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
 446
 447 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
 448 {
 449         return false;
 450 }
 451
 452 #endif
 453
 454 noinline
 455 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
 456                              struct six_lock_waiter *wait,
 457                              six_lock_should_sleep_fn should_sleep_fn, void *p,
 458                              unsigned long ip)
 459 {
 460         int ret = 0;
 461
 462         if (type == SIX_LOCK_write) {
 463                 EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
 464                 atomic_add(SIX_LOCK_HELD_write, &lock->state);
 465                 smp_mb__after_atomic();
 466         }
 467
 468         trace_contention_begin(lock, 0);
 469         lock_contended(&lock->dep_map, ip);
 470
 471         if (six_optimistic_spin(lock, type))
 472                 goto out;
 473
 474         wait->task              = current;
 475         wait->lock_want         = type;
 476         wait->lock_acquired     = false;
 477
 478         raw_spin_lock(&lock->wait_lock);
 479         six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
 480         /*
 481          * Retry taking the lock after taking waitlist lock, in case we raced
 482          * with an unlock:
 483          */
 484         ret = __do_six_trylock(lock, type, current, false);
 485         if (ret <= 0) {
 486                 wait->start_time = local_clock();
 487
 488                 if (!list_empty(&lock->wait_list)) {
 489                         struct six_lock_waiter *last =
 490                                 list_last_entry(&lock->wait_list,
 491                                         struct six_lock_waiter, list);
 492
 493                         if (time_before_eq64(wait->start_time, last->start_time))
 494                                 wait->start_time = last->start_time + 1;
 495                 }
 496
 497                 list_add_tail(&wait->list, &lock->wait_list);
 498         }
 499         raw_spin_unlock(&lock->wait_lock);
 500
 501         if (unlikely(ret > 0)) {
 502                 ret = 0;
 503                 goto out;
 504         }
 505
 506         if (unlikely(ret < 0)) {
 507                 __six_lock_wakeup(lock, -ret - 1);
 508                 ret = 0;
 509         }
 510
 511         while (1) {
 512                 set_current_state(TASK_UNINTERRUPTIBLE);
 513
 514                 /*
 515                  * Ensures that writes to the waitlist entry happen after we see
 516                  * wait->lock_acquired: pairs with the smp_store_release in
 517                  * __six_lock_wakeup
 518                  */
 519                 if (smp_load_acquire(&wait->lock_acquired))
 520                         break;
 521
 522                 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
 523                 if (unlikely(ret)) {
 524                         bool acquired;
 525
 526                         /*
 527                          * If should_sleep_fn() returns an error, we are
 528                          * required to return that error even if we already
 529                          * acquired the lock - should_sleep_fn() might have
 530                          * modified external state (e.g. when the deadlock cycle
 531                          * detector in bcachefs issued a transaction restart)
 532                          */
 533                         raw_spin_lock(&lock->wait_lock);
 534                         acquired = wait->lock_acquired;
 535                         if (!acquired)
 536                                 list_del(&wait->list);
 537                         raw_spin_unlock(&lock->wait_lock);
 538
 539                         if (unlikely(acquired))
 540                                 do_six_unlock_type(lock, type);
 541                         break;
 542                 }
 543
 544                 schedule();
 545         }
 546
 547         __set_current_state(TASK_RUNNING);
 548 out:
 549         if (ret && type == SIX_LOCK_write) {
 550                 six_clear_bitmask(lock, SIX_LOCK_HELD_write);
 551                 six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
 552         }
 553         trace_contention_end(lock, 0);
 554
 555         return ret;
 556 }
 557
 558 /**
 559  * six_lock_ip_waiter - take a lock, with full waitlist interface
 560  * @lock:       lock to take
 561  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 562  * @wait:       pointer to wait object, which will be added to lock's waitlist
 563  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
 564  *              to scheduling
 565  * @p:          passed through to @should_sleep_fn
 566  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 567  *
 568  * This is the most general six_lock() variant, with parameters to support full
 569  * cycle detection for deadlock avoidance.
 570  *
 571  * The code calling this function must implement tracking of held locks, and the
 572  * @wait object should be embedded into the struct that tracks held locks -
 573  * which must also be accessible in a thread-safe way.
 574  *
 575  * @should_sleep_fn should invoke the cycle detector; it should walk each
 576  * lock's waiters, and for each waiter recursively walk their held locks.
 577  *
 578  * When this function must block, @wait will be added to @lock's waitlist before
 579  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
 580  * removed from the lock waitlist until the lock has been successfully acquired,
 581  * or we abort.
 582  *
 583  * @wait.start_time will be monotonically increasing for any given waitlist, and
 584  * thus may be used as a loop cursor.
 585  *
 586  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
 587  */
 588 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
 589                        struct six_lock_waiter *wait,
 590                        six_lock_should_sleep_fn should_sleep_fn, void *p,
 591                        unsigned long ip)
 592 {
 593         int ret;
 594
 595         wait->start_time = 0;
 596
 597         if (type != SIX_LOCK_write)
 598                 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
 599
 600         ret = do_six_trylock(lock, type, true) ? 0
 601                 : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
 602
 603         if (ret && type != SIX_LOCK_write)
 604                 six_release(&lock->dep_map, ip);
 605         if (!ret)
 606                 lock_acquired(&lock->dep_map, ip);
 607
 608         return ret;
 609 }
 610 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
 611
 612 __always_inline
 613 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
 614 {
 615         u32 state;
 616
 617         if (type == SIX_LOCK_intent)
 618                 lock->owner = NULL;
 619
 620         if (type == SIX_LOCK_read &&
 621             lock->readers) {
 622                 smp_mb(); /* unlock barrier */
 623                 this_cpu_dec(*lock->readers);
 624                 smp_mb(); /* between unlocking and checking for waiters */
 625                 state = atomic_read(&lock->state);
 626         } else {
 627                 u32 v = l[type].lock_val;
 628
 629                 if (type != SIX_LOCK_read)
 630                         v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
 631
 632                 EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
 633                 state = atomic_sub_return_release(v, &lock->state);
 634         }
 635
 636         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
 637 }
 638
 639 /**
 640  * six_unlock_ip - drop a six lock
 641  * @lock:       lock to unlock
 642  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 643  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 644  *
 645  * When a lock is held multiple times (because six_lock_incement()) was used),
 646  * this decrements the 'lock held' counter by one.
 647  *
 648  * For example:
 649  * six_lock_read(&foo->lock);                           read count 1
 650  * six_lock_increment(&foo->lock, SIX_LOCK_read);       read count 2
 651  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 1
 652  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 0
 653  */
 654 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
 655 {
 656         EBUG_ON(type == SIX_LOCK_write &&
 657                 !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
 658         EBUG_ON((type == SIX_LOCK_write ||
 659                  type == SIX_LOCK_intent) &&
 660                 lock->owner != current);
 661
 662         if (type != SIX_LOCK_write)
 663                 six_release(&lock->dep_map, ip);
 664         else
 665                 lock->seq++;
 666
 667         if (type == SIX_LOCK_intent &&
 668             lock->intent_lock_recurse) {
 669                 --lock->intent_lock_recurse;
 670                 return;
 671         }
 672
 673         do_six_unlock_type(lock, type);
 674 }
 675 EXPORT_SYMBOL_GPL(six_unlock_ip);
 676
 677 /**
 678  * six_lock_downgrade - convert an intent lock to a read lock
 679  * @lock:       lock to dowgrade
 680  *
 681  * @lock will have read count incremented and intent count decremented
 682  */
 683 void six_lock_downgrade(struct six_lock *lock)
 684 {
 685         six_lock_increment(lock, SIX_LOCK_read);
 686         six_unlock_intent(lock);
 687 }
 688 EXPORT_SYMBOL_GPL(six_lock_downgrade);
 689
 690 /**
 691  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
 692  * @lock:       lock to upgrade
 693  *
 694  * On success, @lock will have intent count incremented and read count
 695  * decremented
 696  *
 697  * Return: true on success, false on failure
 698  */
 699 bool six_lock_tryupgrade(struct six_lock *lock)
 700 {
 701         u32 old = atomic_read(&lock->state), new;
 702
 703         do {
 704                 new = old;
 705
 706                 if (new & SIX_LOCK_HELD_intent)
 707                         return false;
 708
 709                 if (!lock->readers) {
 710                         EBUG_ON(!(new & SIX_LOCK_HELD_read));
 711                         new -= l[SIX_LOCK_read].lock_val;
 712                 }
 713
 714                 new |= SIX_LOCK_HELD_intent;
 715         } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
 716
 717         if (lock->readers)
 718                 this_cpu_dec(*lock->readers);
 719
 720         six_set_owner(lock, SIX_LOCK_intent, old, current);
 721
 722         return true;
 723 }
 724 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
 725
 726 /**
 727  * six_trylock_convert - attempt to convert a held lock from one type to another
 728  * @lock:       lock to upgrade
 729  * @from:       SIX_LOCK_read or SIX_LOCK_intent
 730  * @to:         SIX_LOCK_read or SIX_LOCK_intent
 731  *
 732  * On success, @lock will have intent count incremented and read count
 733  * decremented
 734  *
 735  * Return: true on success, false on failure
 736  */
 737 bool six_trylock_convert(struct six_lock *lock,
 738                          enum six_lock_type from,
 739                          enum six_lock_type to)
 740 {
 741         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
 742
 743         if (to == from)
 744                 return true;
 745
 746         if (to == SIX_LOCK_read) {
 747                 six_lock_downgrade(lock);
 748                 return true;
 749         } else {
 750                 return six_lock_tryupgrade(lock);
 751         }
 752 }
 753 EXPORT_SYMBOL_GPL(six_trylock_convert);
 754
 755 /**
 756  * six_lock_increment - increase held lock count on a lock that is already held
 757  * @lock:       lock to increment
 758  * @type:       SIX_LOCK_read or SIX_LOCK_intent
 759  *
 760  * @lock must already be held, with a lock type that is greater than or equal to
 761  * @type
 762  *
 763  * A corresponding six_unlock_type() call will be required for @lock to be fully
 764  * unlocked.
 765  */
 766 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
 767 {
 768         six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
 769
 770         /* XXX: assert already locked, and that we don't overflow: */
 771
 772         switch (type) {
 773         case SIX_LOCK_read:
 774                 if (lock->readers) {
 775                         this_cpu_inc(*lock->readers);
 776                 } else {
 777                         EBUG_ON(!(atomic_read(&lock->state) &
 778                                   (SIX_LOCK_HELD_read|
 779                                    SIX_LOCK_HELD_intent)));
 780                         atomic_add(l[type].lock_val, &lock->state);
 781                 }
 782                 break;
 783         case SIX_LOCK_intent:
 784                 EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
 785                 lock->intent_lock_recurse++;
 786                 break;
 787         case SIX_LOCK_write:
 788                 BUG();
 789                 break;
 790         }
 791 }
 792 EXPORT_SYMBOL_GPL(six_lock_increment);
 793
 794 /**
 795  * six_lock_wakeup_all - wake up all waiters on @lock
 796  * @lock:       lock to wake up waiters for
 797  *
 798  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
 799  * abort the lock operation.
 800  *
 801  * This function is never needed in a bug-free program; it's only useful in
 802  * debug code, e.g. to determine if a cycle detector is at fault.
 803  */
 804 void six_lock_wakeup_all(struct six_lock *lock)
 805 {
 806         u32 state = atomic_read(&lock->state);
 807         struct six_lock_waiter *w;
 808
 809         six_lock_wakeup(lock, state, SIX_LOCK_read);
 810         six_lock_wakeup(lock, state, SIX_LOCK_intent);
 811         six_lock_wakeup(lock, state, SIX_LOCK_write);
 812
 813         raw_spin_lock(&lock->wait_lock);
 814         list_for_each_entry(w, &lock->wait_list, list)
 815                 wake_up_process(w->task);
 816         raw_spin_unlock(&lock->wait_lock);
 817 }
 818 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
 819
 820 /**
 821  * six_lock_counts - return held lock counts, for each lock type
 822  * @lock:       lock to return counters for
 823  *
 824  * Return: the number of times a lock is held for read, intent and write.
 825  */
 826 struct six_lock_count six_lock_counts(struct six_lock *lock)
 827 {
 828         struct six_lock_count ret;
 829
 830         ret.n[SIX_LOCK_read]    = !lock->readers
 831                 ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
 832                 : pcpu_read_count(lock);
 833         ret.n[SIX_LOCK_intent]  = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
 834                 lock->intent_lock_recurse;
 835         ret.n[SIX_LOCK_write]   = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
 836
 837         return ret;
 838 }
 839 EXPORT_SYMBOL_GPL(six_lock_counts);
 840
 841 /**
 842  * six_lock_readers_add - directly manipulate reader count of a lock
 843  * @lock:       lock to add/subtract readers for
 844  * @nr:         reader count to add/subtract
 845  *
 846  * When an upper layer is implementing lock reentrency, we may have both read
 847  * and intent locks on the same lock.
 848  *
 849  * When we need to take a write lock, the read locks will cause self-deadlock,
 850  * because six locks themselves do not track which read locks are held by the
 851  * current thread and which are held by a different thread - it does no
 852  * per-thread tracking of held locks.
 853  *
 854  * The upper layer that is tracking held locks may however, if trylock() has
 855  * failed, count up its own read locks, subtract them, take the write lock, and
 856  * then re-add them.
 857  *
 858  * As in any other situation when taking a write lock, @lock must be held for
 859  * intent one (or more) times, so @lock will never be left unlocked.
 860  */
 861 void six_lock_readers_add(struct six_lock *lock, int nr)
 862 {
 863         if (lock->readers) {
 864                 this_cpu_add(*lock->readers, nr);
 865         } else {
 866                 EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
 867                 /* reader count starts at bit 0 */
 868                 atomic_add(nr, &lock->state);
 869         }
 870 }
 871 EXPORT_SYMBOL_GPL(six_lock_readers_add);
 872
 873 /**
 874  * six_lock_exit - release resources held by a lock prior to freeing
 875  * @lock:       lock to exit
 876  *
 877  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
 878  * required to free the percpu read counts.
 879  */
 880 void six_lock_exit(struct six_lock *lock)
 881 {
 882         WARN_ON(lock->readers && pcpu_read_count(lock));
 883         WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
 884
 885         free_percpu(lock->readers);
 886         lock->readers = NULL;
 887 }
 888 EXPORT_SYMBOL_GPL(six_lock_exit);
 889
 890 void __six_lock_init(struct six_lock *lock, const char *name,
 891                      struct lock_class_key *key, enum six_lock_init_flags flags)
 892 {
 893         atomic_set(&lock->state, 0);
 894         raw_spin_lock_init(&lock->wait_lock);
 895         INIT_LIST_HEAD(&lock->wait_list);
 896 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 897         debug_check_no_locks_freed((void *) lock, sizeof(*lock));
 898         lockdep_init_map(&lock->dep_map, name, key, 0);
 899 #endif
 900
 901         /*
 902          * Don't assume that we have real percpu variables available in
 903          * userspace:
 904          */
 905 #ifdef __KERNEL__
 906         if (flags & SIX_LOCK_INIT_PCPU) {
 907                 /*
 908                  * We don't return an error here on memory allocation failure
 909                  * since percpu is an optimization, and locks will work with the
 910                  * same semantics in non-percpu mode: callers can check for
 911                  * failure if they wish by checking lock->readers, but generally
 912                  * will not want to treat it as an error.
 913                  */
 914                 lock->readers = alloc_percpu(unsigned);
 915         }
 916 #endif
 917 }
 918 EXPORT_SYMBOL_GPL(__six_lock_init);