2 * Code for manipulating bucket marks for garbage collection.
4 * Copyright 2014 Datera, Inc.
7 * - free bucket: mark == 0
8 * The bucket contains no data and will not be read
10 * - allocator bucket: owned_by_allocator == 1
11 * The bucket is on a free list, or it is an open bucket
13 * - cached bucket: owned_by_allocator == 0 &&
14 * dirty_sectors == 0 &&
16 * The bucket contains data but may be safely discarded as there are
17 * enough replicas of the data on other cache devices, or it has been
18 * written back to the backing device
20 * - dirty bucket: owned_by_allocator == 0 &&
22 * The bucket contains data that we must not discard (either only copy,
23 * or one of the 'main copies' for data requiring multiple replicas)
25 * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1
26 * This is a btree node, journal or gen/prio bucket
30 * bucket invalidated => bucket on freelist => open bucket =>
31 * [dirty bucket =>] cached bucket => bucket invalidated => ...
33 * Note that cache promotion can skip the dirty bucket step, as data
34 * is copied from a deeper tier to a shallower tier, onto a cached
36 * Note also that a cached bucket can spontaneously become dirty --
39 * Only a traversal of the key space can determine whether a bucket is
40 * truly dirty or cached.
44 * - free => allocator: bucket was invalidated
45 * - cached => allocator: bucket was invalidated
47 * - allocator => dirty: open bucket was filled up
48 * - allocator => cached: open bucket was filled up
49 * - allocator => metadata: metadata was allocated
51 * - dirty => cached: dirty sectors were copied to a deeper tier
52 * - dirty => free: dirty sectors were overwritten or moved (copy gc)
53 * - cached => free: cached sectors were overwritten
55 * - metadata => free: metadata was freed
58 * - cached => dirty: a device was removed so formerly replicated data
59 * is no longer sufficiently replicated
60 * - free => cached: cannot happen
61 * - free => dirty: cannot happen
62 * - free => metadata: cannot happen
72 #include <linux/preempt.h>
73 #include <trace/events/bcachefs.h>
77 #define lg_local_lock lg_global_lock
78 #define lg_local_unlock lg_global_unlock
80 static void bch2_fs_stats_verify(struct bch_fs *c)
82 struct bch_fs_usage stats =
83 __bch2_fs_usage_read(c);
86 for (i = 0; i < ARRAY_SIZE(stats.s); i++) {
87 if ((s64) stats.s[i].data[S_META] < 0)
88 panic("replicas %u meta underflow: %lli\n",
89 i + 1, stats.s[i].data[S_META]);
91 if ((s64) stats.s[i].data[S_DIRTY] < 0)
92 panic("replicas %u dirty underflow: %lli\n",
93 i + 1, stats.s[i].data[S_DIRTY]);
95 if ((s64) stats.s[i].persistent_reserved < 0)
96 panic("replicas %u reserved underflow: %lli\n",
97 i + 1, stats.s[i].persistent_reserved);
100 if ((s64) stats.online_reserved < 0)
101 panic("sectors_online_reserved underflow: %lli\n",
102 stats.online_reserved);
105 static void bch2_dev_stats_verify(struct bch_dev *ca)
107 struct bch_dev_usage stats =
108 __bch2_dev_usage_read(ca);
109 u64 n = ca->mi.nbuckets - ca->mi.first_bucket;
112 for (i = 0; i < ARRAY_SIZE(stats.buckets); i++)
113 BUG_ON(stats.buckets[i] > n);
114 BUG_ON(stats.buckets_alloc > n);
115 BUG_ON(stats.buckets_unavailable > n);
118 static void bch2_disk_reservations_verify(struct bch_fs *c, int flags)
120 if (!(flags & BCH_DISK_RESERVATION_NOFAIL)) {
121 u64 used = __bch2_fs_sectors_used(c);
123 u64 avail = atomic64_read(&c->sectors_available);
126 for_each_possible_cpu(cpu)
127 cached += per_cpu_ptr(c->usage_percpu, cpu)->available_cache;
129 if (used + avail + cached > c->capacity)
130 panic("used %llu avail %llu cached %llu capacity %llu\n",
131 used, avail, cached, c->capacity);
137 static void bch2_fs_stats_verify(struct bch_fs *c) {}
138 static void bch2_dev_stats_verify(struct bch_dev *ca) {}
139 static void bch2_disk_reservations_verify(struct bch_fs *c, int flags) {}
144 * Clear journal_seq_valid for buckets for which it's not needed, to prevent
147 void bch2_bucket_seq_cleanup(struct bch_fs *c)
149 u16 last_seq_ondisk = c->journal.last_seq_ondisk;
151 struct bucket_array *buckets;
153 struct bucket_mark m;
156 for_each_member_device(ca, c, i) {
157 down_read(&ca->bucket_lock);
158 buckets = bucket_array(ca);
160 for_each_bucket(g, buckets) {
161 bucket_cmpxchg(g, m, ({
162 if (!m.journal_seq_valid ||
163 bucket_needs_journal_commit(m, last_seq_ondisk))
166 m.journal_seq_valid = 0;
169 up_read(&ca->bucket_lock);
173 #define bch2_usage_add(_acc, _stats) \
175 typeof(_acc) _a = (_acc), _s = (_stats); \
178 for (i = 0; i < sizeof(*_a) / sizeof(u64); i++) \
179 ((u64 *) (_a))[i] += ((u64 *) (_s))[i]; \
182 #define bch2_usage_read_raw(_stats) \
184 typeof(*this_cpu_ptr(_stats)) _acc; \
187 memset(&_acc, 0, sizeof(_acc)); \
189 for_each_possible_cpu(cpu) \
190 bch2_usage_add(&_acc, per_cpu_ptr((_stats), cpu)); \
195 #define bch2_usage_read_cached(_c, _cached, _uncached) \
197 typeof(_cached) _ret; \
201 _seq = read_seqcount_begin(&(_c)->gc_pos_lock); \
202 _ret = (_c)->gc_pos.phase == GC_PHASE_DONE \
203 ? bch2_usage_read_raw(_uncached) \
205 } while (read_seqcount_retry(&(_c)->gc_pos_lock, _seq)); \
210 struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *ca)
212 return bch2_usage_read_raw(ca->usage_percpu);
215 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
217 return bch2_usage_read_cached(c, ca->usage_cached, ca->usage_percpu);
221 __bch2_fs_usage_read(struct bch_fs *c)
223 return bch2_usage_read_raw(c->usage_percpu);
227 bch2_fs_usage_read(struct bch_fs *c)
229 return bch2_usage_read_cached(c,
234 struct fs_usage_sum {
239 static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats)
241 struct fs_usage_sum sum = { 0 };
244 for (i = 0; i < ARRAY_SIZE(stats.s); i++) {
245 sum.data += (stats.s[i].data[S_META] +
246 stats.s[i].data[S_DIRTY]) * (i + 1);
247 sum.reserved += stats.s[i].persistent_reserved * (i + 1);
250 sum.reserved += stats.online_reserved;
254 #define RESERVE_FACTOR 6
256 static u64 reserve_factor(u64 r)
258 return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
261 static u64 avail_factor(u64 r)
263 return (r << RESERVE_FACTOR) / (1 << RESERVE_FACTOR) + 1;
266 u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
268 struct fs_usage_sum sum = __fs_usage_sum(stats);
270 return sum.data + reserve_factor(sum.reserved);
273 u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
275 return min(c->capacity, __bch2_fs_sectors_used(c, stats));
278 u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
280 return avail_factor(c->capacity - bch2_fs_sectors_used(c, stats));
283 static inline int is_unavailable_bucket(struct bucket_mark m)
285 return !is_available_bucket(m);
288 static inline enum bch_data_type bucket_type(struct bucket_mark m)
290 return m.cached_sectors && !m.dirty_sectors
295 static bool bucket_became_unavailable(struct bch_fs *c,
296 struct bucket_mark old,
297 struct bucket_mark new)
299 return is_available_bucket(old) &&
300 !is_available_bucket(new) &&
301 c && c->gc_pos.phase == GC_PHASE_DONE;
304 void bch2_fs_usage_apply(struct bch_fs *c,
305 struct bch_fs_usage *stats,
306 struct disk_reservation *disk_res,
307 struct gc_pos gc_pos)
309 struct fs_usage_sum sum = __fs_usage_sum(*stats);
310 s64 added = sum.data + sum.reserved;
313 * Not allowed to reduce sectors_available except by getting a
316 BUG_ON(added > (s64) (disk_res ? disk_res->sectors : 0));
319 disk_res->sectors -= added;
320 stats->online_reserved -= added;
323 lg_local_lock(&c->usage_lock);
324 /* online_reserved not subject to gc: */
325 this_cpu_ptr(c->usage_percpu)->online_reserved +=
326 stats->online_reserved;
327 stats->online_reserved = 0;
329 if (!gc_will_visit(c, gc_pos))
330 bch2_usage_add(this_cpu_ptr(c->usage_percpu), stats);
332 bch2_fs_stats_verify(c);
333 lg_local_unlock(&c->usage_lock);
335 memset(stats, 0, sizeof(*stats));
338 static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
339 struct bucket_mark old, struct bucket_mark new)
341 struct bch_dev_usage *dev_usage;
343 lockdep_assert_held(&c->usage_lock);
345 bch2_fs_inconsistent_on(old.data_type && new.data_type &&
346 old.data_type != new.data_type, c,
347 "different types of data in same bucket: %u, %u",
348 old.data_type, new.data_type);
350 dev_usage = this_cpu_ptr(ca->usage_percpu);
352 dev_usage->buckets[bucket_type(old)]--;
353 dev_usage->buckets[bucket_type(new)]++;
355 dev_usage->buckets_alloc +=
356 (int) new.owned_by_allocator - (int) old.owned_by_allocator;
357 dev_usage->buckets_unavailable +=
358 is_unavailable_bucket(new) - is_unavailable_bucket(old);
360 dev_usage->sectors[old.data_type] -= old.dirty_sectors;
361 dev_usage->sectors[new.data_type] += new.dirty_sectors;
362 dev_usage->sectors[BCH_DATA_CACHED] +=
363 (int) new.cached_sectors - (int) old.cached_sectors;
365 if (!is_available_bucket(old) && is_available_bucket(new))
366 bch2_wake_allocator(ca);
368 bch2_dev_stats_verify(ca);
371 #define bucket_data_cmpxchg(c, ca, g, new, expr) \
373 struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
375 bch2_dev_usage_update(c, ca, _old, new); \
379 bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
380 size_t b, struct bucket_mark *old)
383 struct bucket_mark new;
385 lg_local_lock(&c->usage_lock);
388 *old = bucket_data_cmpxchg(c, ca, g, new, ({
389 if (!is_available_bucket(new)) {
390 lg_local_unlock(&c->usage_lock);
394 new.owned_by_allocator = 1;
396 new.cached_sectors = 0;
397 new.dirty_sectors = 0;
400 lg_local_unlock(&c->usage_lock);
402 if (!old->owned_by_allocator && old->cached_sectors)
403 trace_invalidate(ca, bucket_to_sector(ca, b),
404 old->cached_sectors);
408 void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
409 size_t b, bool owned_by_allocator,
410 struct gc_pos pos, unsigned flags)
413 struct bucket_mark old, new;
415 lg_local_lock(&c->usage_lock);
418 if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
419 gc_will_visit(c, pos)) {
420 lg_local_unlock(&c->usage_lock);
424 old = bucket_data_cmpxchg(c, ca, g, new, ({
425 new.owned_by_allocator = owned_by_allocator;
427 lg_local_unlock(&c->usage_lock);
429 BUG_ON(!owned_by_allocator && !old.owned_by_allocator &&
430 c->gc_pos.phase == GC_PHASE_DONE);
433 #define saturated_add(ca, dst, src, max) \
435 BUG_ON((int) (dst) + (src) < 0); \
436 if ((dst) == (max)) \
438 else if ((dst) + (src) <= (max)) \
442 trace_sectors_saturated(ca); \
446 void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
447 size_t b, enum bch_data_type type,
448 unsigned sectors, struct gc_pos pos,
452 struct bucket_mark old, new;
456 lg_local_lock(&c->usage_lock);
459 if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
460 gc_will_visit(c, pos)) {
461 lg_local_unlock(&c->usage_lock);
465 old = bucket_data_cmpxchg(c, ca, g, new, ({
466 saturated_add(ca, new.dirty_sectors, sectors,
467 GC_MAX_SECTORS_USED);
468 new.data_type = type;
470 lg_local_unlock(&c->usage_lock);
472 BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
473 bucket_became_unavailable(c, old, new));
476 /* Reverting this until the copygc + compression issue is fixed: */
478 static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
483 return max(1U, DIV_ROUND_UP(sectors * crc.compressed_size,
484 crc.uncompressed_size));
488 * Checking against gc's position has to be done here, inside the cmpxchg()
489 * loop, to avoid racing with the start of gc clearing all the marks - GC does
490 * that with the gc pos seqlock held.
492 static void bch2_mark_pointer(struct bch_fs *c,
493 struct bkey_s_c_extent e,
494 const struct bch_extent_ptr *ptr,
495 struct bch_extent_crc_unpacked crc,
496 s64 sectors, enum s_alloc type,
497 struct bch_fs_usage *stats,
498 u64 journal_seq, unsigned flags)
500 struct bucket_mark old, new;
502 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
503 struct bucket *g = PTR_BUCKET(ca, ptr);
504 enum bch_data_type data_type = type == S_META
505 ? BCH_DATA_BTREE : BCH_DATA_USER;
508 if (crc.compression_type) {
509 unsigned old_sectors, new_sectors;
513 new_sectors = sectors;
515 old_sectors = e.k->size;
516 new_sectors = e.k->size + sectors;
519 sectors = -__disk_sectors(crc, old_sectors)
520 +__disk_sectors(crc, new_sectors);
523 if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
525 bucket_cmpxchg(g, new, ({
526 new.journal_seq_valid = 1;
527 new.journal_seq = journal_seq;
533 v = READ_ONCE(g->_mark.counter);
535 new.counter = old.counter = v;
539 * Check this after reading bucket mark to guard against
540 * the allocator invalidating a bucket after we've already
543 if (gen_after(new.gen, ptr->gen)) {
544 BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags));
545 EBUG_ON(!ptr->cached &&
546 test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
551 new.dirty_sectors == GC_MAX_SECTORS_USED &&
553 saturated = -sectors;
556 saturated_add(ca, new.cached_sectors, sectors,
557 GC_MAX_SECTORS_USED);
559 saturated_add(ca, new.dirty_sectors, sectors,
560 GC_MAX_SECTORS_USED);
562 if (!new.dirty_sectors &&
563 !new.cached_sectors) {
567 new.journal_seq_valid = 1;
568 new.journal_seq = journal_seq;
571 new.data_type = data_type;
574 if (flags & BCH_BUCKET_MARK_NOATOMIC) {
578 } while ((v = cmpxchg(&g->_mark.counter,
580 new.counter)) != old.counter);
582 bch2_dev_usage_update(c, ca, old, new);
584 BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
585 bucket_became_unavailable(c, old, new));
588 atomic_long_add_return(saturated,
589 &ca->saturated_count) >=
590 bucket_to_sector(ca, ca->free_inc.size)) {
592 trace_gc_sectors_saturated(c);
593 wake_up_process(c->gc_thread);
598 void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
599 s64 sectors, bool metadata,
601 struct bch_fs_usage *stats,
602 u64 journal_seq, unsigned flags)
605 * synchronization w.r.t. GC:
607 * Normally, bucket sector counts/marks are updated on the fly, as
608 * references are added/removed from the btree, the lists of buckets the
609 * allocator owns, other metadata buckets, etc.
611 * When GC is in progress and going to mark this reference, we do _not_
612 * mark this reference here, to avoid double counting - GC will count it
613 * when it gets to it.
615 * To know whether we should mark a given reference (GC either isn't
616 * running, or has already marked references at this position) we
617 * construct a total order for everything GC walks. Then, we can simply
618 * compare the position of the reference we're marking - @pos - with
619 * GC's current position. If GC is going to mark this reference, GC's
620 * current position will be less than @pos; if GC's current position is
621 * greater than @pos GC has either already walked this position, or
624 * To avoid racing with GC's position changing, we have to deal with
625 * - GC's position being set to GC_POS_MIN when GC starts:
626 * usage_lock guards against this
627 * - GC's position overtaking @pos: we guard against this with
628 * whatever lock protects the data structure the reference lives in
629 * (e.g. the btree node lock, or the relevant allocator lock).
632 lg_local_lock(&c->usage_lock);
633 if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
634 gc_will_visit(c, pos))
635 flags |= BCH_BUCKET_MARK_GC_WILL_VISIT;
639 case BCH_EXTENT_CACHED: {
640 struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
641 const struct bch_extent_ptr *ptr;
642 struct bch_extent_crc_unpacked crc;
643 enum s_alloc type = metadata ? S_META : S_DIRTY;
644 unsigned replicas = 0;
646 BUG_ON(metadata && bkey_extent_is_cached(e.k));
649 extent_for_each_ptr_crc(e, ptr, crc) {
650 bch2_mark_pointer(c, e, ptr, crc, sectors, type,
651 stats, journal_seq, flags);
652 replicas += !ptr->cached;
656 BUG_ON(replicas - 1 > ARRAY_SIZE(stats->s));
657 stats->s[replicas - 1].data[type] += sectors;
661 case BCH_RESERVATION: {
662 struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
664 if (r.v->nr_replicas) {
665 BUG_ON(r.v->nr_replicas - 1 > ARRAY_SIZE(stats->s));
666 stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors;
671 lg_local_unlock(&c->usage_lock);
674 /* Disk reservations: */
676 static u64 __recalc_sectors_available(struct bch_fs *c)
680 for_each_possible_cpu(cpu)
681 per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0;
683 return bch2_fs_sectors_free(c, bch2_fs_usage_read(c));
686 /* Used by gc when it's starting: */
687 void bch2_recalc_sectors_available(struct bch_fs *c)
689 lg_global_lock(&c->usage_lock);
690 atomic64_set(&c->sectors_available, __recalc_sectors_available(c));
691 lg_global_unlock(&c->usage_lock);
694 void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
696 lg_local_lock(&c->usage_lock);
697 this_cpu_sub(c->usage_percpu->online_reserved,
700 bch2_fs_stats_verify(c);
701 lg_local_unlock(&c->usage_lock);
706 #define SECTORS_CACHE 1024
708 int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
709 unsigned sectors, int flags)
711 struct bch_fs_usage *stats;
713 s64 sectors_available;
716 lg_local_lock(&c->usage_lock);
717 stats = this_cpu_ptr(c->usage_percpu);
719 if (sectors <= stats->available_cache)
722 v = atomic64_read(&c->sectors_available);
725 get = min((u64) sectors + SECTORS_CACHE, old);
728 lg_local_unlock(&c->usage_lock);
731 } while ((v = atomic64_cmpxchg(&c->sectors_available,
732 old, old - get)) != old);
734 stats->available_cache += get;
737 stats->available_cache -= sectors;
738 stats->online_reserved += sectors;
739 res->sectors += sectors;
741 bch2_disk_reservations_verify(c, flags);
742 bch2_fs_stats_verify(c);
743 lg_local_unlock(&c->usage_lock);
748 * GC recalculates sectors_available when it starts, so that hopefully
749 * we don't normally end up blocking here:
753 * Piss fuck, we can be called from extent_insert_fixup() with btree
757 if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD)) {
758 if (!(flags & BCH_DISK_RESERVATION_BTREE_LOCKS_HELD))
759 down_read(&c->gc_lock);
760 else if (!down_read_trylock(&c->gc_lock))
763 lg_global_lock(&c->usage_lock);
765 sectors_available = __recalc_sectors_available(c);
767 if (sectors <= sectors_available ||
768 (flags & BCH_DISK_RESERVATION_NOFAIL)) {
769 atomic64_set(&c->sectors_available,
770 max_t(s64, 0, sectors_available - sectors));
771 stats->online_reserved += sectors;
772 res->sectors += sectors;
775 bch2_disk_reservations_verify(c, flags);
777 atomic64_set(&c->sectors_available, sectors_available);
781 bch2_fs_stats_verify(c);
782 lg_global_unlock(&c->usage_lock);
783 if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD))
784 up_read(&c->gc_lock);
789 /* Startup/shutdown: */
791 static void buckets_free_rcu(struct rcu_head *rcu)
793 struct bucket_array *buckets =
794 container_of(rcu, struct bucket_array, rcu);
797 sizeof(struct bucket_array) +
798 buckets->nbuckets * sizeof(struct bucket));
801 int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
803 struct bucket_array *buckets = NULL, *old_buckets = NULL;
804 unsigned long *buckets_dirty = NULL;
805 u8 *oldest_gens = NULL;
806 alloc_fifo free[RESERVE_NR];
808 alloc_heap alloc_heap;
809 copygc_heap copygc_heap;
811 size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
812 ca->mi.bucket_size / c->opts.btree_node_size);
813 /* XXX: these should be tunable */
814 size_t reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9);
815 size_t copygc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7);
816 size_t free_inc_reserve = copygc_reserve / 2;
817 bool resize = ca->buckets != NULL,
818 start_copygc = ca->copygc_thread != NULL;
822 memset(&free, 0, sizeof(free));
823 memset(&free_inc, 0, sizeof(free_inc));
824 memset(&alloc_heap, 0, sizeof(alloc_heap));
825 memset(©gc_heap, 0, sizeof(copygc_heap));
827 if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
828 nbuckets * sizeof(struct bucket),
829 GFP_KERNEL|__GFP_ZERO)) ||
830 !(oldest_gens = kvpmalloc(nbuckets * sizeof(u8),
831 GFP_KERNEL|__GFP_ZERO)) ||
832 !(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) *
833 sizeof(unsigned long),
834 GFP_KERNEL|__GFP_ZERO)) ||
835 !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
836 !init_fifo(&free[RESERVE_MOVINGGC],
837 copygc_reserve, GFP_KERNEL) ||
838 !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
839 !init_fifo(&free_inc, free_inc_reserve, GFP_KERNEL) ||
840 !init_heap(&alloc_heap, free_inc_reserve, GFP_KERNEL) ||
841 !init_heap(©gc_heap, copygc_reserve, GFP_KERNEL))
844 buckets->first_bucket = ca->mi.first_bucket;
845 buckets->nbuckets = nbuckets;
847 bch2_copygc_stop(ca);
849 down_write(&c->gc_lock);
850 down_write(&ca->bucket_lock);
851 lg_global_lock(&c->usage_lock);
853 old_buckets = bucket_array(ca);
856 size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
860 n * sizeof(struct bucket));
864 memcpy(buckets_dirty,
866 BITS_TO_LONGS(n) * sizeof(unsigned long));
869 rcu_assign_pointer(ca->buckets, buckets);
870 buckets = old_buckets;
872 swap(ca->oldest_gens, oldest_gens);
873 swap(ca->buckets_dirty, buckets_dirty);
875 lg_global_unlock(&c->usage_lock);
877 spin_lock(&c->freelist_lock);
878 for (i = 0; i < RESERVE_NR; i++) {
879 fifo_move(&free[i], &ca->free[i]);
880 swap(ca->free[i], free[i]);
882 fifo_move(&free_inc, &ca->free_inc);
883 swap(ca->free_inc, free_inc);
884 spin_unlock(&c->freelist_lock);
886 /* with gc lock held, alloc_heap can't be in use: */
887 swap(ca->alloc_heap, alloc_heap);
889 /* and we shut down copygc: */
890 swap(ca->copygc_heap, copygc_heap);
892 nbuckets = ca->mi.nbuckets;
894 up_write(&ca->bucket_lock);
895 up_write(&c->gc_lock);
898 bch2_copygc_start(c, ca))
899 bch_err(ca, "error restarting copygc thread");
903 free_heap(©gc_heap);
904 free_heap(&alloc_heap);
905 free_fifo(&free_inc);
906 for (i = 0; i < RESERVE_NR; i++)
908 kvpfree(buckets_dirty,
909 BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
911 nbuckets * sizeof(u8));
913 call_rcu(&old_buckets->rcu, buckets_free_rcu);
918 void bch2_dev_buckets_free(struct bch_dev *ca)
922 free_heap(&ca->copygc_heap);
923 free_heap(&ca->alloc_heap);
924 free_fifo(&ca->free_inc);
925 for (i = 0; i < RESERVE_NR; i++)
926 free_fifo(&ca->free[i]);
927 kvpfree(ca->buckets_dirty,
928 BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
929 kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
930 kvpfree(ca->buckets, sizeof(struct bucket_array) +
931 ca->mi.nbuckets * sizeof(struct bucket));
933 free_percpu(ca->usage_percpu);
936 int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
938 if (!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)))
941 return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;