]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache/buckets.c
update bcache sources
[bcachefs-tools-debian] / libbcache / buckets.c
1 /*
2  * Code for manipulating bucket marks for garbage collection.
3  *
4  * Copyright 2014 Datera, Inc.
5  *
6  * Bucket states:
7  * - free bucket: mark == 0
8  *   The bucket contains no data and will not be read
9  *
10  * - allocator bucket: owned_by_allocator == 1
11  *   The bucket is on a free list, or it is an open bucket
12  *
13  * - cached bucket: owned_by_allocator == 0 &&
14  *                  dirty_sectors == 0 &&
15  *                  cached_sectors > 0
16  *   The bucket contains data but may be safely discarded as there are
17  *   enough replicas of the data on other cache devices, or it has been
18  *   written back to the backing device
19  *
20  * - dirty bucket: owned_by_allocator == 0 &&
21  *                 dirty_sectors > 0
22  *   The bucket contains data that we must not discard (either only copy,
23  *   or one of the 'main copies' for data requiring multiple replicas)
24  *
25  * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1
26  *   This is a btree node, journal or gen/prio bucket
27  *
28  * Lifecycle:
29  *
30  * bucket invalidated => bucket on freelist => open bucket =>
31  *     [dirty bucket =>] cached bucket => bucket invalidated => ...
32  *
33  * Note that cache promotion can skip the dirty bucket step, as data
34  * is copied from a deeper tier to a shallower tier, onto a cached
35  * bucket.
36  * Note also that a cached bucket can spontaneously become dirty --
37  * see below.
38  *
39  * Only a traversal of the key space can determine whether a bucket is
40  * truly dirty or cached.
41  *
42  * Transitions:
43  *
44  * - free => allocator: bucket was invalidated
45  * - cached => allocator: bucket was invalidated
46  *
47  * - allocator => dirty: open bucket was filled up
48  * - allocator => cached: open bucket was filled up
49  * - allocator => metadata: metadata was allocated
50  *
51  * - dirty => cached: dirty sectors were copied to a deeper tier
52  * - dirty => free: dirty sectors were overwritten or moved (copy gc)
53  * - cached => free: cached sectors were overwritten
54  *
55  * - metadata => free: metadata was freed
56  *
57  * Oddities:
58  * - cached => dirty: a device was removed so formerly replicated data
59  *                    is no longer sufficiently replicated
60  * - free => cached: cannot happen
61  * - free => dirty: cannot happen
62  * - free => metadata: cannot happen
63  */
64
65 #include "bcache.h"
66 #include "alloc.h"
67 #include "btree_gc.h"
68 #include "buckets.h"
69 #include "error.h"
70
71 #include <linux/preempt.h>
72 #include <trace/events/bcache.h>
73
74 #ifdef DEBUG_BUCKETS
75
76 #define lg_local_lock   lg_global_lock
77 #define lg_local_unlock lg_global_unlock
78
79 static void bch_fs_stats_verify(struct bch_fs *c)
80 {
81         struct bch_fs_usage stats =
82                 __bch_fs_usage_read(c);
83
84         if ((s64) stats.sectors_dirty < 0)
85                 panic("sectors_dirty underflow: %lli\n", stats.sectors_dirty);
86
87         if ((s64) stats.sectors_cached < 0)
88                 panic("sectors_cached underflow: %lli\n", stats.sectors_cached);
89
90         if ((s64) stats.sectors_meta < 0)
91                 panic("sectors_meta underflow: %lli\n", stats.sectors_meta);
92
93         if ((s64) stats.sectors_persistent_reserved < 0)
94                 panic("sectors_persistent_reserved underflow: %lli\n", stats.sectors_persistent_reserved);
95
96         if ((s64) stats.sectors_online_reserved < 0)
97                 panic("sectors_online_reserved underflow: %lli\n", stats.sectors_online_reserved);
98 }
99
100 #else
101
102 static void bch_fs_stats_verify(struct bch_fs *c) {}
103
104 #endif
105
106 /*
107  * Clear journal_seq_valid for buckets for which it's not needed, to prevent
108  * wraparound:
109  */
110 void bch_bucket_seq_cleanup(struct bch_fs *c)
111 {
112         u16 last_seq_ondisk = c->journal.last_seq_ondisk;
113         struct bch_dev *ca;
114         struct bucket *g;
115         struct bucket_mark m;
116         unsigned i;
117
118         for_each_member_device(ca, c, i)
119                 for_each_bucket(g, ca) {
120                         bucket_cmpxchg(g, m, ({
121                                 if (!m.journal_seq_valid ||
122                                     bucket_needs_journal_commit(m, last_seq_ondisk))
123                                         break;
124
125                                 m.journal_seq_valid = 0;
126                         }));
127                 }
128 }
129
130 #define bch_usage_add(_acc, _stats)                                     \
131 do {                                                                    \
132         typeof(_acc) _a = (_acc), _s = (_stats);                        \
133         unsigned i;                                                     \
134                                                                         \
135         for (i = 0; i < sizeof(*_a) / sizeof(u64); i++)                 \
136                 ((u64 *) (_a))[i] += ((u64 *) (_s))[i];                 \
137 } while (0)
138
139 #define bch_usage_read_raw(_stats)                                      \
140 ({                                                                      \
141         typeof(*this_cpu_ptr(_stats)) _acc = { 0 };                     \
142         int cpu;                                                        \
143                                                                         \
144         for_each_possible_cpu(cpu)                                      \
145                 bch_usage_add(&_acc, per_cpu_ptr((_stats), cpu));       \
146                                                                         \
147         _acc;                                                           \
148 })
149
150 #define bch_usage_read_cached(_c, _cached, _uncached)                   \
151 ({                                                                      \
152         typeof(_cached) _ret;                                           \
153         unsigned _seq;                                                  \
154                                                                         \
155         do {                                                            \
156                 _seq = read_seqcount_begin(&(_c)->gc_pos_lock);         \
157                 _ret = (_c)->gc_pos.phase == GC_PHASE_DONE              \
158                         ? bch_usage_read_raw(_uncached)                 \
159                         : (_cached);                                    \
160         } while (read_seqcount_retry(&(_c)->gc_pos_lock, _seq));        \
161                                                                         \
162         _ret;                                                           \
163 })
164
165 struct bch_dev_usage __bch_dev_usage_read(struct bch_dev *ca)
166 {
167         return bch_usage_read_raw(ca->usage_percpu);
168 }
169
170 struct bch_dev_usage bch_dev_usage_read(struct bch_dev *ca)
171 {
172         return bch_usage_read_cached(ca->fs,
173                                 ca->usage_cached,
174                                 ca->usage_percpu);
175 }
176
177 struct bch_fs_usage
178 __bch_fs_usage_read(struct bch_fs *c)
179 {
180         return bch_usage_read_raw(c->usage_percpu);
181 }
182
183 struct bch_fs_usage
184 bch_fs_usage_read(struct bch_fs *c)
185 {
186         return bch_usage_read_cached(c,
187                                      c->usage_cached,
188                                      c->usage_percpu);
189 }
190
191 static inline int is_meta_bucket(struct bucket_mark m)
192 {
193         return m.data_type != BUCKET_DATA;
194 }
195
196 static inline int is_dirty_bucket(struct bucket_mark m)
197 {
198         return m.data_type == BUCKET_DATA && !!m.dirty_sectors;
199 }
200
201 static inline int is_cached_bucket(struct bucket_mark m)
202 {
203         return m.data_type == BUCKET_DATA &&
204                 !m.dirty_sectors && !!m.cached_sectors;
205 }
206
207 static inline enum s_alloc bucket_type(struct bucket_mark m)
208 {
209         return is_meta_bucket(m) ? S_META : S_DIRTY;
210 }
211
212 static bool bucket_became_unavailable(struct bch_fs *c,
213                                       struct bucket_mark old,
214                                       struct bucket_mark new)
215 {
216         return is_available_bucket(old) &&
217                !is_available_bucket(new) &&
218                c && c->gc_pos.phase == GC_PHASE_DONE;
219 }
220
221 void bch_fs_usage_apply(struct bch_fs *c,
222                         struct bch_fs_usage *stats,
223                         struct disk_reservation *disk_res,
224                         struct gc_pos gc_pos)
225 {
226         s64 added =
227                 stats->s[S_COMPRESSED][S_META] +
228                 stats->s[S_COMPRESSED][S_DIRTY] +
229                 stats->persistent_reserved +
230                 stats->online_reserved;
231
232         /*
233          * Not allowed to reduce sectors_available except by getting a
234          * reservation:
235          */
236         BUG_ON(added > (s64) (disk_res ? disk_res->sectors : 0));
237
238         if (added > 0) {
239                 disk_res->sectors       -= added;
240                 stats->online_reserved  -= added;
241         }
242
243         lg_local_lock(&c->usage_lock);
244         /* online_reserved not subject to gc: */
245         this_cpu_ptr(c->usage_percpu)->online_reserved +=
246                 stats->online_reserved;
247         stats->online_reserved = 0;
248
249         if (!gc_will_visit(c, gc_pos))
250                 bch_usage_add(this_cpu_ptr(c->usage_percpu), stats);
251
252         bch_fs_stats_verify(c);
253         lg_local_unlock(&c->usage_lock);
254
255         memset(stats, 0, sizeof(*stats));
256 }
257
258 static void bch_fs_usage_update(struct bch_fs_usage *fs_usage,
259                                 struct bucket_mark old, struct bucket_mark new)
260 {
261         fs_usage->s[S_COMPRESSED][S_CACHED] +=
262                 (int) new.cached_sectors - (int) old.cached_sectors;
263         fs_usage->s[S_COMPRESSED][bucket_type(old)] -=
264                 old.dirty_sectors;
265         fs_usage->s[S_COMPRESSED][bucket_type(new)] +=
266                 new.dirty_sectors;
267 }
268
269 static void bch_dev_usage_update(struct bch_dev *ca,
270                                  struct bucket_mark old, struct bucket_mark new)
271 {
272         struct bch_fs *c = ca->fs;
273         struct bch_dev_usage *dev_usage;
274
275         bch_fs_inconsistent_on(old.data_type && new.data_type &&
276                         old.data_type != new.data_type, c,
277                         "different types of metadata in same bucket: %u, %u",
278                         old.data_type, new.data_type);
279
280         preempt_disable();
281         dev_usage = this_cpu_ptr(ca->usage_percpu);
282
283         dev_usage->sectors[S_CACHED] +=
284                 (int) new.cached_sectors - (int) old.cached_sectors;
285
286         dev_usage->sectors[bucket_type(old)] -= old.dirty_sectors;
287         dev_usage->sectors[bucket_type(new)] += new.dirty_sectors;
288
289         dev_usage->buckets_alloc +=
290                 (int) new.owned_by_allocator - (int) old.owned_by_allocator;
291
292         dev_usage->buckets_meta += is_meta_bucket(new) - is_meta_bucket(old);
293         dev_usage->buckets_cached += is_cached_bucket(new) - is_cached_bucket(old);
294         dev_usage->buckets_dirty += is_dirty_bucket(new) - is_dirty_bucket(old);
295         preempt_enable();
296
297         if (!is_available_bucket(old) && is_available_bucket(new))
298                 bch_wake_allocator(ca);
299 }
300
301 #define bucket_data_cmpxchg(ca, g, new, expr)                   \
302 ({                                                              \
303         struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
304                                                                 \
305         bch_dev_usage_update(ca, _old, new);                    \
306         _old;                                                   \
307 })
308
309 void bch_invalidate_bucket(struct bch_dev *ca, struct bucket *g)
310 {
311         struct bch_fs_usage stats = { 0 };
312         struct bucket_mark old, new;
313
314         old = bucket_data_cmpxchg(ca, g, new, ({
315                 new.owned_by_allocator  = 1;
316                 new.had_metadata        = 0;
317                 new.data_type           = 0;
318                 new.cached_sectors      = 0;
319                 new.dirty_sectors       = 0;
320                 new.copygc              = 0;
321                 new.gen++;
322         }));
323
324         /* XXX: we're not actually updating fs usage's cached sectors... */
325         bch_fs_usage_update(&stats, old, new);
326
327         if (!old.owned_by_allocator && old.cached_sectors)
328                 trace_bcache_invalidate(ca, g - ca->buckets,
329                                         old.cached_sectors);
330 }
331
332 void bch_mark_free_bucket(struct bch_dev *ca, struct bucket *g)
333 {
334         struct bucket_mark old, new;
335
336         old = bucket_data_cmpxchg(ca, g, new, ({
337                 new.owned_by_allocator  = 0;
338                 new.data_type           = 0;
339                 new.cached_sectors      = 0;
340                 new.dirty_sectors       = 0;
341         }));
342
343         BUG_ON(bucket_became_unavailable(ca->fs, old, new));
344 }
345
346 void bch_mark_alloc_bucket(struct bch_dev *ca, struct bucket *g,
347                            bool owned_by_allocator)
348 {
349         struct bucket_mark new;
350
351         bucket_data_cmpxchg(ca, g, new, ({
352                 new.owned_by_allocator = owned_by_allocator;
353         }));
354 }
355
356 void bch_mark_metadata_bucket(struct bch_dev *ca, struct bucket *g,
357                               enum bucket_data_type type,
358                               bool may_make_unavailable)
359 {
360         struct bucket_mark old, new;
361
362         BUG_ON(!type);
363
364         old = bucket_data_cmpxchg(ca, g, new, ({
365                 new.data_type = type;
366                 new.had_metadata = 1;
367         }));
368
369         BUG_ON(old.cached_sectors);
370         BUG_ON(old.dirty_sectors);
371         BUG_ON(!may_make_unavailable &&
372                bucket_became_unavailable(ca->fs, old, new));
373 }
374
375 #define saturated_add(ca, dst, src, max)                        \
376 do {                                                            \
377         BUG_ON((int) (dst) + (src) < 0);                        \
378         if ((dst) == (max))                                     \
379                 ;                                               \
380         else if ((dst) + (src) <= (max))                        \
381                 dst += (src);                                   \
382         else {                                                  \
383                 dst = (max);                                    \
384                 trace_bcache_sectors_saturated(ca);             \
385         }                                                       \
386 } while (0)
387
388 #if 0
389 /* Reverting this until the copygc + compression issue is fixed: */
390
391 static unsigned __disk_sectors(const union bch_extent_crc *crc, unsigned sectors)
392 {
393         return crc_compression_type(crc)
394                 ? sectors * crc_compressed_size(crc) / crc_uncompressed_size(crc)
395                 : sectors;
396 }
397
398 static unsigned __compressed_sectors(const union bch_extent_crc *crc, unsigned sectors)
399 {
400         return crc_compression_type(crc)
401                 ? min_t(unsigned, crc_compressed_size(crc), sectors)
402                 : sectors;
403 }
404 #else
405 static unsigned __disk_sectors(const union bch_extent_crc *crc, unsigned sectors)
406 {
407         return sectors;
408 }
409
410 static unsigned __compressed_sectors(const union bch_extent_crc *crc, unsigned sectors)
411 {
412         return sectors;
413 }
414 #endif
415
416 /*
417  * Checking against gc's position has to be done here, inside the cmpxchg()
418  * loop, to avoid racing with the start of gc clearing all the marks - GC does
419  * that with the gc pos seqlock held.
420  */
421 static void bch_mark_pointer(struct bch_fs *c,
422                              struct bkey_s_c_extent e,
423                              const union bch_extent_crc *crc,
424                              const struct bch_extent_ptr *ptr,
425                              s64 sectors, enum s_alloc type,
426                              bool may_make_unavailable,
427                              struct bch_fs_usage *stats,
428                              bool gc_will_visit, u64 journal_seq)
429 {
430         struct bucket_mark old, new;
431         unsigned saturated;
432         struct bch_dev *ca = c->devs[ptr->dev];
433         struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
434         unsigned old_sectors, new_sectors;
435         int disk_sectors, compressed_sectors;
436
437         if (sectors > 0) {
438                 old_sectors = 0;
439                 new_sectors = sectors;
440         } else {
441                 old_sectors = e.k->size;
442                 new_sectors = e.k->size + sectors;
443         }
444
445         disk_sectors = -__disk_sectors(crc, old_sectors)
446                 + __disk_sectors(crc, new_sectors);
447         compressed_sectors = -__compressed_sectors(crc, old_sectors)
448                 + __compressed_sectors(crc, new_sectors);
449
450         if (gc_will_visit) {
451                 if (journal_seq)
452                         bucket_cmpxchg(g, new, new.journal_seq = journal_seq);
453
454                 goto out;
455         }
456
457         old = bucket_data_cmpxchg(ca, g, new, ({
458                 saturated = 0;
459
460                 /*
461                  * Check this after reading bucket mark to guard against
462                  * the allocator invalidating a bucket after we've already
463                  * checked the gen
464                  */
465                 if (gen_after(new.gen, ptr->gen)) {
466                         EBUG_ON(type != S_CACHED &&
467                                 test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
468                         return;
469                 }
470
471                 EBUG_ON(type != S_CACHED &&
472                         !may_make_unavailable &&
473                         is_available_bucket(new) &&
474                         test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
475
476                 if (type != S_CACHED &&
477                     new.dirty_sectors == GC_MAX_SECTORS_USED &&
478                     disk_sectors < 0)
479                         saturated = -disk_sectors;
480
481                 if (type == S_CACHED)
482                         saturated_add(ca, new.cached_sectors, disk_sectors,
483                                       GC_MAX_SECTORS_USED);
484                 else
485                         saturated_add(ca, new.dirty_sectors, disk_sectors,
486                                       GC_MAX_SECTORS_USED);
487
488                 if (!new.dirty_sectors &&
489                     !new.cached_sectors) {
490                         new.data_type   = 0;
491
492                         if (journal_seq) {
493                                 new.journal_seq_valid = 1;
494                                 new.journal_seq = journal_seq;
495                         }
496                 } else {
497                         new.data_type = type == S_META
498                                 ? BUCKET_BTREE : BUCKET_DATA;
499                 }
500
501                 new.had_metadata |= is_meta_bucket(new);
502         }));
503
504         BUG_ON(!may_make_unavailable &&
505                bucket_became_unavailable(c, old, new));
506
507         if (saturated &&
508             atomic_long_add_return(saturated,
509                                    &ca->saturated_count) >=
510             ca->free_inc.size << ca->bucket_bits) {
511                 if (c->gc_thread) {
512                         trace_bcache_gc_sectors_saturated(c);
513                         wake_up_process(c->gc_thread);
514                 }
515         }
516 out:
517         stats->s[S_COMPRESSED][type]    += compressed_sectors;
518         stats->s[S_UNCOMPRESSED][type]  += sectors;
519 }
520
521 static void bch_mark_extent(struct bch_fs *c, struct bkey_s_c_extent e,
522                             s64 sectors, bool metadata,
523                             bool may_make_unavailable,
524                             struct bch_fs_usage *stats,
525                             bool gc_will_visit, u64 journal_seq)
526 {
527         const struct bch_extent_ptr *ptr;
528         const union bch_extent_crc *crc;
529         enum s_alloc type = metadata ? S_META : S_DIRTY;
530
531         BUG_ON(metadata && bkey_extent_is_cached(e.k));
532         BUG_ON(!sectors);
533
534         extent_for_each_ptr_crc(e, ptr, crc)
535                 bch_mark_pointer(c, e, crc, ptr, sectors,
536                                  ptr->cached ? S_CACHED : type,
537                                  may_make_unavailable,
538                                  stats, gc_will_visit, journal_seq);
539 }
540
541 static void __bch_mark_key(struct bch_fs *c, struct bkey_s_c k,
542                            s64 sectors, bool metadata,
543                            bool may_make_unavailable,
544                            struct bch_fs_usage *stats,
545                            bool gc_will_visit, u64 journal_seq)
546 {
547         switch (k.k->type) {
548         case BCH_EXTENT:
549         case BCH_EXTENT_CACHED:
550                 bch_mark_extent(c, bkey_s_c_to_extent(k), sectors, metadata,
551                                 may_make_unavailable, stats,
552                                 gc_will_visit, journal_seq);
553                 break;
554         case BCH_RESERVATION: {
555                 struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
556
557                 stats->persistent_reserved += r.v->nr_replicas * sectors;
558                 break;
559         }
560         }
561 }
562
563 void __bch_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
564                        s64 sectors, bool metadata,
565                        struct bch_fs_usage *stats)
566 {
567         __bch_mark_key(c, k, sectors, metadata, true, stats, false, 0);
568 }
569
570 void bch_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
571                      s64 sectors, bool metadata)
572 {
573         struct bch_fs_usage stats = { 0 };
574
575         __bch_gc_mark_key(c, k, sectors, metadata, &stats);
576
577         preempt_disable();
578         bch_usage_add(this_cpu_ptr(c->usage_percpu), &stats);
579         preempt_enable();
580 }
581
582 void bch_mark_key(struct bch_fs *c, struct bkey_s_c k,
583                   s64 sectors, bool metadata, struct gc_pos gc_pos,
584                   struct bch_fs_usage *stats, u64 journal_seq)
585 {
586         /*
587          * synchronization w.r.t. GC:
588          *
589          * Normally, bucket sector counts/marks are updated on the fly, as
590          * references are added/removed from the btree, the lists of buckets the
591          * allocator owns, other metadata buckets, etc.
592          *
593          * When GC is in progress and going to mark this reference, we do _not_
594          * mark this reference here, to avoid double counting - GC will count it
595          * when it gets to it.
596          *
597          * To know whether we should mark a given reference (GC either isn't
598          * running, or has already marked references at this position) we
599          * construct a total order for everything GC walks. Then, we can simply
600          * compare the position of the reference we're marking - @gc_pos - with
601          * GC's current position. If GC is going to mark this reference, GC's
602          * current position will be less than @gc_pos; if GC's current position
603          * is greater than @gc_pos GC has either already walked this position,
604          * or isn't running.
605          *
606          * To avoid racing with GC's position changing, we have to deal with
607          *  - GC's position being set to GC_POS_MIN when GC starts:
608          *    usage_lock guards against this
609          *  - GC's position overtaking @gc_pos: we guard against this with
610          *    whatever lock protects the data structure the reference lives in
611          *    (e.g. the btree node lock, or the relevant allocator lock).
612          */
613         lg_local_lock(&c->usage_lock);
614         __bch_mark_key(c, k, sectors, metadata, false, stats,
615                        gc_will_visit(c, gc_pos), journal_seq);
616
617         bch_fs_stats_verify(c);
618         lg_local_unlock(&c->usage_lock);
619 }
620
621 static u64 __recalc_sectors_available(struct bch_fs *c)
622 {
623         return c->capacity - bch_fs_sectors_used(c);
624 }
625
626 /* Used by gc when it's starting: */
627 void bch_recalc_sectors_available(struct bch_fs *c)
628 {
629         int cpu;
630
631         lg_global_lock(&c->usage_lock);
632
633         for_each_possible_cpu(cpu)
634                 per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0;
635
636         atomic64_set(&c->sectors_available,
637                      __recalc_sectors_available(c));
638
639         lg_global_unlock(&c->usage_lock);
640 }
641
642 void bch_disk_reservation_put(struct bch_fs *c,
643                               struct disk_reservation *res)
644 {
645         if (res->sectors) {
646                 lg_local_lock(&c->usage_lock);
647                 this_cpu_sub(c->usage_percpu->online_reserved,
648                              res->sectors);
649
650                 bch_fs_stats_verify(c);
651                 lg_local_unlock(&c->usage_lock);
652
653                 res->sectors = 0;
654         }
655 }
656
657 #define SECTORS_CACHE   1024
658
659 int bch_disk_reservation_add(struct bch_fs *c,
660                              struct disk_reservation *res,
661                              unsigned sectors, int flags)
662 {
663         struct bch_fs_usage *stats;
664         u64 old, new, v;
665         s64 sectors_available;
666         int ret;
667
668         sectors *= res->nr_replicas;
669
670         lg_local_lock(&c->usage_lock);
671         stats = this_cpu_ptr(c->usage_percpu);
672
673         if (sectors >= stats->available_cache)
674                 goto out;
675
676         v = atomic64_read(&c->sectors_available);
677         do {
678                 old = v;
679                 if (old < sectors) {
680                         lg_local_unlock(&c->usage_lock);
681                         goto recalculate;
682                 }
683
684                 new = max_t(s64, 0, old - sectors - SECTORS_CACHE);
685         } while ((v = atomic64_cmpxchg(&c->sectors_available,
686                                        old, new)) != old);
687
688         stats->available_cache  += old - new;
689 out:
690         stats->available_cache  -= sectors;
691         stats->online_reserved  += sectors;
692         res->sectors            += sectors;
693
694         bch_fs_stats_verify(c);
695         lg_local_unlock(&c->usage_lock);
696         return 0;
697
698 recalculate:
699         /*
700          * GC recalculates sectors_available when it starts, so that hopefully
701          * we don't normally end up blocking here:
702          */
703
704         /*
705          * Piss fuck, we can be called from extent_insert_fixup() with btree
706          * locks held:
707          */
708
709         if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD)) {
710                 if (!(flags & BCH_DISK_RESERVATION_BTREE_LOCKS_HELD))
711                         down_read(&c->gc_lock);
712                 else if (!down_read_trylock(&c->gc_lock))
713                         return -EINTR;
714         }
715         lg_global_lock(&c->usage_lock);
716
717         sectors_available = __recalc_sectors_available(c);
718
719         if (sectors <= sectors_available ||
720             (flags & BCH_DISK_RESERVATION_NOFAIL)) {
721                 atomic64_set(&c->sectors_available,
722                              max_t(s64, 0, sectors_available - sectors));
723                 stats->online_reserved  += sectors;
724                 res->sectors            += sectors;
725                 ret = 0;
726         } else {
727                 atomic64_set(&c->sectors_available, sectors_available);
728                 ret = -ENOSPC;
729         }
730
731         bch_fs_stats_verify(c);
732         lg_global_unlock(&c->usage_lock);
733         if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD))
734                 up_read(&c->gc_lock);
735
736         return ret;
737 }
738
739 int bch_disk_reservation_get(struct bch_fs *c,
740                              struct disk_reservation *res,
741                              unsigned sectors, int flags)
742 {
743         res->sectors = 0;
744         res->gen = c->capacity_gen;
745         res->nr_replicas = (flags & BCH_DISK_RESERVATION_METADATA)
746                 ? c->opts.metadata_replicas
747                 : c->opts.data_replicas;
748
749         return bch_disk_reservation_add(c, res, sectors, flags);
750 }