git.sesse.net Git - bcachefs-tools-debian/blob - include/linux/seqlock.h

   1 #ifndef __LINUX_SEQLOCK_H
   2 #define __LINUX_SEQLOCK_H
   3 /*
   4  * Reader/writer consistent mechanism without starving writers. This type of
   5  * lock for data where the reader wants a consistent set of information
   6  * and is willing to retry if the information changes. There are two types
   7  * of readers:
   8  * 1. Sequence readers which never block a writer but they may have to retry
   9  *    if a writer is in progress by detecting change in sequence number.
  10  *    Writers do not wait for a sequence reader.
  11  * 2. Locking readers which will wait if a writer or another locking reader
  12  *    is in progress. A locking reader in progress will also block a writer
  13  *    from going forward. Unlike the regular rwlock, the read lock here is
  14  *    exclusive so that only one locking reader can get it.
  15  *
  16  * This is not as cache friendly as brlock. Also, this may not work well
  17  * for data that contains pointers, because any writer could
  18  * invalidate a pointer that a reader was following.
  19  *
  20  * Expected non-blocking reader usage:
  21  *      do {
  22  *          seq = read_seqbegin(&foo);
  23  *      ...
  24  *      } while (read_seqretry(&foo, seq));
  25  *
  26  *
  27  * On non-SMP the spin locks disappear but the writer still needs
  28  * to increment the sequence variables because an interrupt routine could
  29  * change the state of the data.
  30  *
  31  * Based on x86_64 vsyscall gettimeofday
  32  * by Keith Owens and Andrea Arcangeli
  33  */
  34
  35 #include <linux/spinlock.h>
  36 #include <linux/lockdep.h>
  37 #include <linux/compiler.h>
  38
  39 /*
  40  * Version using sequence counter only.
  41  * This can be used when code has its own mutex protecting the
  42  * updating starting before the write_seqcountbeqin() and ending
  43  * after the write_seqcount_end().
  44  */
  45 typedef struct seqcount {
  46         unsigned sequence;
  47 } seqcount_t;
  48
  49 static inline void __seqcount_init(seqcount_t *s, const char *name,
  50                                           struct lock_class_key *key)
  51 {
  52         s->sequence = 0;
  53 }
  54
  55 # define SEQCOUNT_DEP_MAP_INIT(lockname)
  56 # define seqcount_init(s) __seqcount_init(s, NULL, NULL)
  57 # define seqcount_lockdep_reader_access(x)
  58
  59 #define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
  60
  61
  62 /**
  63  * __read_seqcount_begin - begin a seq-read critical section (without barrier)
  64  * @s: pointer to seqcount_t
  65  * Returns: count to be passed to read_seqcount_retry
  66  *
  67  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  68  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
  69  * provided before actually loading any of the variables that are to be
  70  * protected in this critical section.
  71  *
  72  * Use carefully, only in critical code, and comment how the barrier is
  73  * provided.
  74  */
  75 static inline unsigned __read_seqcount_begin(const seqcount_t *s)
  76 {
  77         unsigned ret;
  78
  79 repeat:
  80         ret = READ_ONCE(s->sequence);
  81         if (unlikely(ret & 1)) {
  82                 cpu_relax();
  83                 goto repeat;
  84         }
  85         return ret;
  86 }
  87
  88 /**
  89  * raw_read_seqcount - Read the raw seqcount
  90  * @s: pointer to seqcount_t
  91  * Returns: count to be passed to read_seqcount_retry
  92  *
  93  * raw_read_seqcount opens a read critical section of the given
  94  * seqcount without any lockdep checking and without checking or
  95  * masking the LSB. Calling code is responsible for handling that.
  96  */
  97 static inline unsigned raw_read_seqcount(const seqcount_t *s)
  98 {
  99         unsigned ret = READ_ONCE(s->sequence);
 100         smp_rmb();
 101         return ret;
 102 }
 103
 104 /**
 105  * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
 106  * @s: pointer to seqcount_t
 107  * Returns: count to be passed to read_seqcount_retry
 108  *
 109  * raw_read_seqcount_begin opens a read critical section of the given
 110  * seqcount, but without any lockdep checking. Validity of the critical
 111  * section is tested by checking read_seqcount_retry function.
 112  */
 113 static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
 114 {
 115         unsigned ret = __read_seqcount_begin(s);
 116         smp_rmb();
 117         return ret;
 118 }
 119
 120 /**
 121  * read_seqcount_begin - begin a seq-read critical section
 122  * @s: pointer to seqcount_t
 123  * Returns: count to be passed to read_seqcount_retry
 124  *
 125  * read_seqcount_begin opens a read critical section of the given seqcount.
 126  * Validity of the critical section is tested by checking read_seqcount_retry
 127  * function.
 128  */
 129 static inline unsigned read_seqcount_begin(const seqcount_t *s)
 130 {
 131         seqcount_lockdep_reader_access(s);
 132         return raw_read_seqcount_begin(s);
 133 }
 134
 135 /**
 136  * raw_seqcount_begin - begin a seq-read critical section
 137  * @s: pointer to seqcount_t
 138  * Returns: count to be passed to read_seqcount_retry
 139  *
 140  * raw_seqcount_begin opens a read critical section of the given seqcount.
 141  * Validity of the critical section is tested by checking read_seqcount_retry
 142  * function.
 143  *
 144  * Unlike read_seqcount_begin(), this function will not wait for the count
 145  * to stabilize. If a writer is active when we begin, we will fail the
 146  * read_seqcount_retry() instead of stabilizing at the beginning of the
 147  * critical section.
 148  */
 149 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 150 {
 151         unsigned ret = READ_ONCE(s->sequence);
 152         smp_rmb();
 153         return ret & ~1;
 154 }
 155
 156 /**
 157  * __read_seqcount_retry - end a seq-read critical section (without barrier)
 158  * @s: pointer to seqcount_t
 159  * @start: count, from read_seqcount_begin
 160  * Returns: 1 if retry is required, else 0
 161  *
 162  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
 163  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
 164  * provided before actually loading any of the variables that are to be
 165  * protected in this critical section.
 166  *
 167  * Use carefully, only in critical code, and comment how the barrier is
 168  * provided.
 169  */
 170 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 171 {
 172         return unlikely(s->sequence != start);
 173 }
 174
 175 /**
 176  * read_seqcount_retry - end a seq-read critical section
 177  * @s: pointer to seqcount_t
 178  * @start: count, from read_seqcount_begin
 179  * Returns: 1 if retry is required, else 0
 180  *
 181  * read_seqcount_retry closes a read critical section of the given seqcount.
 182  * If the critical section was invalid, it must be ignored (and typically
 183  * retried).
 184  */
 185 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 186 {
 187         smp_rmb();
 188         return __read_seqcount_retry(s, start);
 189 }
 190
 191
 192
 193 static inline void raw_write_seqcount_begin(seqcount_t *s)
 194 {
 195         s->sequence++;
 196         smp_wmb();
 197 }
 198
 199 static inline void raw_write_seqcount_end(seqcount_t *s)
 200 {
 201         smp_wmb();
 202         s->sequence++;
 203 }
 204
 205 /**
 206  * raw_write_seqcount_barrier - do a seq write barrier
 207  * @s: pointer to seqcount_t
 208  *
 209  * This can be used to provide an ordering guarantee instead of the
 210  * usual consistency guarantee. It is one wmb cheaper, because we can
 211  * collapse the two back-to-back wmb()s.
 212  *
 213  *      seqcount_t seq;
 214  *      bool X = true, Y = false;
 215  *
 216  *      void read(void)
 217  *      {
 218  *              bool x, y;
 219  *
 220  *              do {
 221  *                      int s = read_seqcount_begin(&seq);
 222  *
 223  *                      x = X; y = Y;
 224  *
 225  *              } while (read_seqcount_retry(&seq, s));
 226  *
 227  *              BUG_ON(!x && !y);
 228  *      }
 229  *
 230  *      void write(void)
 231  *      {
 232  *              Y = true;
 233  *
 234  *              raw_write_seqcount_barrier(seq);
 235  *
 236  *              X = false;
 237  *      }
 238  */
 239 static inline void raw_write_seqcount_barrier(seqcount_t *s)
 240 {
 241         s->sequence++;
 242         smp_wmb();
 243         s->sequence++;
 244 }
 245
 246 static inline int raw_read_seqcount_latch(seqcount_t *s)
 247 {
 248         int seq = READ_ONCE(s->sequence);
 249         /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
 250         smp_read_barrier_depends();
 251         return seq;
 252 }
 253
 254 /**
 255  * raw_write_seqcount_latch - redirect readers to even/odd copy
 256  * @s: pointer to seqcount_t
 257  *
 258  * The latch technique is a multiversion concurrency control method that allows
 259  * queries during non-atomic modifications. If you can guarantee queries never
 260  * interrupt the modification -- e.g. the concurrency is strictly between CPUs
 261  * -- you most likely do not need this.
 262  *
 263  * Where the traditional RCU/lockless data structures rely on atomic
 264  * modifications to ensure queries observe either the old or the new state the
 265  * latch allows the same for non-atomic updates. The trade-off is doubling the
 266  * cost of storage; we have to maintain two copies of the entire data
 267  * structure.
 268  *
 269  * Very simply put: we first modify one copy and then the other. This ensures
 270  * there is always one copy in a stable state, ready to give us an answer.
 271  *
 272  * The basic form is a data structure like:
 273  *
 274  * struct latch_struct {
 275  *      seqcount_t              seq;
 276  *      struct data_struct      data[2];
 277  * };
 278  *
 279  * Where a modification, which is assumed to be externally serialized, does the
 280  * following:
 281  *
 282  * void latch_modify(struct latch_struct *latch, ...)
 283  * {
 284  *      smp_wmb();      <- Ensure that the last data[1] update is visible
 285  *      latch->seq++;
 286  *      smp_wmb();      <- Ensure that the seqcount update is visible
 287  *
 288  *      modify(latch->data[0], ...);
 289  *
 290  *      smp_wmb();      <- Ensure that the data[0] update is visible
 291  *      latch->seq++;
 292  *      smp_wmb();      <- Ensure that the seqcount update is visible
 293  *
 294  *      modify(latch->data[1], ...);
 295  * }
 296  *
 297  * The query will have a form like:
 298  *
 299  * struct entry *latch_query(struct latch_struct *latch, ...)
 300  * {
 301  *      struct entry *entry;
 302  *      unsigned seq, idx;
 303  *
 304  *      do {
 305  *              seq = raw_read_seqcount_latch(&latch->seq);
 306  *
 307  *              idx = seq & 0x01;
 308  *              entry = data_query(latch->data[idx], ...);
 309  *
 310  *              smp_rmb();
 311  *      } while (seq != latch->seq);
 312  *
 313  *      return entry;
 314  * }
 315  *
 316  * So during the modification, queries are first redirected to data[1]. Then we
 317  * modify data[0]. When that is complete, we redirect queries back to data[0]
 318  * and we can modify data[1].
 319  *
 320  * NOTE: The non-requirement for atomic modifications does _NOT_ include
 321  *       the publishing of new entries in the case where data is a dynamic
 322  *       data structure.
 323  *
 324  *       An iteration might start in data[0] and get suspended long enough
 325  *       to miss an entire modification sequence, once it resumes it might
 326  *       observe the new entry.
 327  *
 328  * NOTE: When data is a dynamic data structure; one should use regular RCU
 329  *       patterns to manage the lifetimes of the objects within.
 330  */
 331 static inline void raw_write_seqcount_latch(seqcount_t *s)
 332 {
 333        smp_wmb();      /* prior stores before incrementing "sequence" */
 334        s->sequence++;
 335        smp_wmb();      /* increment "sequence" before following stores */
 336 }
 337
 338 /*
 339  * Sequence counter only version assumes that callers are using their
 340  * own mutexing.
 341  */
 342 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 343 {
 344         raw_write_seqcount_begin(s);
 345 }
 346
 347 static inline void write_seqcount_begin(seqcount_t *s)
 348 {
 349         write_seqcount_begin_nested(s, 0);
 350 }
 351
 352 static inline void write_seqcount_end(seqcount_t *s)
 353 {
 354         raw_write_seqcount_end(s);
 355 }
 356
 357 /**
 358  * write_seqcount_invalidate - invalidate in-progress read-side seq operations
 359  * @s: pointer to seqcount_t
 360  *
 361  * After write_seqcount_invalidate, no read-side seq operations will complete
 362  * successfully and see data older than this.
 363  */
 364 static inline void write_seqcount_invalidate(seqcount_t *s)
 365 {
 366         smp_wmb();
 367         s->sequence+=2;
 368 }
 369
 370 typedef struct {
 371         struct seqcount seqcount;
 372         spinlock_t lock;
 373 } seqlock_t;
 374
 375 /*
 376  * These macros triggered gcc-3.x compile-time problems.  We think these are
 377  * OK now.  Be cautious.
 378  */
 379 #define __SEQLOCK_UNLOCKED(lockname)                    \
 380         {                                               \
 381                 .seqcount = SEQCNT_ZERO(lockname),      \
 382                 .lock = __SPIN_LOCK_UNLOCKED(lockname)  \
 383         }
 384
 385 #define seqlock_init(x)                                 \
 386         do {                                            \
 387                 seqcount_init(&(x)->seqcount);          \
 388                 spin_lock_init(&(x)->lock);             \
 389         } while (0)
 390
 391 #define DEFINE_SEQLOCK(x) \
 392                 seqlock_t x = __SEQLOCK_UNLOCKED(x)
 393
 394 /*
 395  * Read side functions for starting and finalizing a read side section.
 396  */
 397 static inline unsigned read_seqbegin(const seqlock_t *sl)
 398 {
 399         return read_seqcount_begin(&sl->seqcount);
 400 }
 401
 402 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 403 {
 404         return read_seqcount_retry(&sl->seqcount, start);
 405 }
 406
 407 /*
 408  * Lock out other writers and update the count.
 409  * Acts like a normal spin_lock/unlock.
 410  * Don't need preempt_disable() because that is in the spin_lock already.
 411  */
 412 static inline void write_seqlock(seqlock_t *sl)
 413 {
 414         spin_lock(&sl->lock);
 415         write_seqcount_begin(&sl->seqcount);
 416 }
 417
 418 static inline void write_sequnlock(seqlock_t *sl)
 419 {
 420         write_seqcount_end(&sl->seqcount);
 421         spin_unlock(&sl->lock);
 422 }
 423
 424 static inline void write_seqlock_bh(seqlock_t *sl)
 425 {
 426         spin_lock_bh(&sl->lock);
 427         write_seqcount_begin(&sl->seqcount);
 428 }
 429
 430 static inline void write_sequnlock_bh(seqlock_t *sl)
 431 {
 432         write_seqcount_end(&sl->seqcount);
 433         spin_unlock_bh(&sl->lock);
 434 }
 435
 436 static inline void write_seqlock_irq(seqlock_t *sl)
 437 {
 438         spin_lock_irq(&sl->lock);
 439         write_seqcount_begin(&sl->seqcount);
 440 }
 441
 442 static inline void write_sequnlock_irq(seqlock_t *sl)
 443 {
 444         write_seqcount_end(&sl->seqcount);
 445         spin_unlock_irq(&sl->lock);
 446 }
 447
 448 static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 449 {
 450         unsigned long flags;
 451
 452         spin_lock_irqsave(&sl->lock, flags);
 453         write_seqcount_begin(&sl->seqcount);
 454         return flags;
 455 }
 456
 457 #define write_seqlock_irqsave(lock, flags)                              \
 458         do { flags = __write_seqlock_irqsave(lock); } while (0)
 459
 460 static inline void
 461 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 462 {
 463         write_seqcount_end(&sl->seqcount);
 464         spin_unlock_irqrestore(&sl->lock, flags);
 465 }
 466
 467 /*
 468  * A locking reader exclusively locks out other writers and locking readers,
 469  * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
 470  * Don't need preempt_disable() because that is in the spin_lock already.
 471  */
 472 static inline void read_seqlock_excl(seqlock_t *sl)
 473 {
 474         spin_lock(&sl->lock);
 475 }
 476
 477 static inline void read_sequnlock_excl(seqlock_t *sl)
 478 {
 479         spin_unlock(&sl->lock);
 480 }
 481
 482 /**
 483  * read_seqbegin_or_lock - begin a sequence number check or locking block
 484  * @lock: sequence lock
 485  * @seq : sequence number to be checked
 486  *
 487  * First try it once optimistically without taking the lock. If that fails,
 488  * take the lock. The sequence number is also used as a marker for deciding
 489  * whether to be a reader (even) or writer (odd).
 490  * N.B. seq must be initialized to an even number to begin with.
 491  */
 492 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
 493 {
 494         if (!(*seq & 1))        /* Even */
 495                 *seq = read_seqbegin(lock);
 496         else                    /* Odd */
 497                 read_seqlock_excl(lock);
 498 }
 499
 500 static inline int need_seqretry(seqlock_t *lock, int seq)
 501 {
 502         return !(seq & 1) && read_seqretry(lock, seq);
 503 }
 504
 505 static inline void done_seqretry(seqlock_t *lock, int seq)
 506 {
 507         if (seq & 1)
 508                 read_sequnlock_excl(lock);
 509 }
 510
 511 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 512 {
 513         spin_lock_bh(&sl->lock);
 514 }
 515
 516 static inline void read_sequnlock_excl_bh(seqlock_t *sl)
 517 {
 518         spin_unlock_bh(&sl->lock);
 519 }
 520
 521 static inline void read_seqlock_excl_irq(seqlock_t *sl)
 522 {
 523         spin_lock_irq(&sl->lock);
 524 }
 525
 526 static inline void read_sequnlock_excl_irq(seqlock_t *sl)
 527 {
 528         spin_unlock_irq(&sl->lock);
 529 }
 530
 531 static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
 532 {
 533         unsigned long flags;
 534
 535         spin_lock_irqsave(&sl->lock, flags);
 536         return flags;
 537 }
 538
 539 #define read_seqlock_excl_irqsave(lock, flags)                          \
 540         do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
 541
 542 static inline void
 543 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 544 {
 545         spin_unlock_irqrestore(&sl->lock, flags);
 546 }
 547
 548 static inline unsigned long
 549 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
 550 {
 551         unsigned long flags = 0;
 552
 553         if (!(*seq & 1))        /* Even */
 554                 *seq = read_seqbegin(lock);
 555         else                    /* Odd */
 556                 read_seqlock_excl_irqsave(lock, flags);
 557
 558         return flags;
 559 }
 560
 561 static inline void
 562 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
 563 {
 564         if (seq & 1)
 565                 read_sequnlock_excl_irqrestore(lock, flags);
 566 }
 567 #endif /* __LINUX_SEQLOCK_H */