]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/buckets.c
check if fs is mounted before running fsck
[bcachefs-tools-debian] / libbcachefs / buckets.c
1 /*
2  * Code for manipulating bucket marks for garbage collection.
3  *
4  * Copyright 2014 Datera, Inc.
5  *
6  * Bucket states:
7  * - free bucket: mark == 0
8  *   The bucket contains no data and will not be read
9  *
10  * - allocator bucket: owned_by_allocator == 1
11  *   The bucket is on a free list, or it is an open bucket
12  *
13  * - cached bucket: owned_by_allocator == 0 &&
14  *                  dirty_sectors == 0 &&
15  *                  cached_sectors > 0
16  *   The bucket contains data but may be safely discarded as there are
17  *   enough replicas of the data on other cache devices, or it has been
18  *   written back to the backing device
19  *
20  * - dirty bucket: owned_by_allocator == 0 &&
21  *                 dirty_sectors > 0
22  *   The bucket contains data that we must not discard (either only copy,
23  *   or one of the 'main copies' for data requiring multiple replicas)
24  *
25  * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1
26  *   This is a btree node, journal or gen/prio bucket
27  *
28  * Lifecycle:
29  *
30  * bucket invalidated => bucket on freelist => open bucket =>
31  *     [dirty bucket =>] cached bucket => bucket invalidated => ...
32  *
33  * Note that cache promotion can skip the dirty bucket step, as data
34  * is copied from a deeper tier to a shallower tier, onto a cached
35  * bucket.
36  * Note also that a cached bucket can spontaneously become dirty --
37  * see below.
38  *
39  * Only a traversal of the key space can determine whether a bucket is
40  * truly dirty or cached.
41  *
42  * Transitions:
43  *
44  * - free => allocator: bucket was invalidated
45  * - cached => allocator: bucket was invalidated
46  *
47  * - allocator => dirty: open bucket was filled up
48  * - allocator => cached: open bucket was filled up
49  * - allocator => metadata: metadata was allocated
50  *
51  * - dirty => cached: dirty sectors were copied to a deeper tier
52  * - dirty => free: dirty sectors were overwritten or moved (copy gc)
53  * - cached => free: cached sectors were overwritten
54  *
55  * - metadata => free: metadata was freed
56  *
57  * Oddities:
58  * - cached => dirty: a device was removed so formerly replicated data
59  *                    is no longer sufficiently replicated
60  * - free => cached: cannot happen
61  * - free => dirty: cannot happen
62  * - free => metadata: cannot happen
63  */
64
65 #include "bcachefs.h"
66 #include "alloc_background.h"
67 #include "bset.h"
68 #include "btree_gc.h"
69 #include "btree_update.h"
70 #include "buckets.h"
71 #include "ec.h"
72 #include "error.h"
73 #include "movinggc.h"
74 #include "replicas.h"
75
76 #include <linux/preempt.h>
77 #include <trace/events/bcachefs.h>
78
79 /*
80  * Clear journal_seq_valid for buckets for which it's not needed, to prevent
81  * wraparound:
82  */
83 void bch2_bucket_seq_cleanup(struct bch_fs *c)
84 {
85         u64 journal_seq = atomic64_read(&c->journal.seq);
86         u16 last_seq_ondisk = c->journal.last_seq_ondisk;
87         struct bch_dev *ca;
88         struct bucket_array *buckets;
89         struct bucket *g;
90         struct bucket_mark m;
91         unsigned i;
92
93         if (journal_seq - c->last_bucket_seq_cleanup <
94             (1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
95                 return;
96
97         c->last_bucket_seq_cleanup = journal_seq;
98
99         for_each_member_device(ca, c, i) {
100                 down_read(&ca->bucket_lock);
101                 buckets = bucket_array(ca);
102
103                 for_each_bucket(g, buckets) {
104                         bucket_cmpxchg(g, m, ({
105                                 if (!m.journal_seq_valid ||
106                                     bucket_needs_journal_commit(m, last_seq_ondisk))
107                                         break;
108
109                                 m.journal_seq_valid = 0;
110                         }));
111                 }
112                 up_read(&ca->bucket_lock);
113         }
114 }
115
116 void bch2_fs_usage_initialize(struct bch_fs *c)
117 {
118         struct bch_fs_usage *usage;
119         unsigned i;
120
121         percpu_down_write(&c->mark_lock);
122         usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0],
123                                               fs_usage_u64s(c));
124
125         for (i = 0; i < BCH_REPLICAS_MAX; i++)
126                 usage->reserved += usage->persistent_reserved[i];
127
128         for (i = 0; i < c->replicas.nr; i++) {
129                 struct bch_replicas_entry *e =
130                         cpu_replicas_entry(&c->replicas, i);
131
132                 switch (e->data_type) {
133                 case BCH_DATA_BTREE:
134                 case BCH_DATA_USER:
135                         usage->data     += usage->replicas[i];
136                         break;
137                 case BCH_DATA_CACHED:
138                         usage->cached   += usage->replicas[i];
139                         break;
140                 }
141         }
142
143         percpu_up_write(&c->mark_lock);
144 }
145
146 void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
147 {
148         if (fs_usage == c->usage_scratch)
149                 mutex_unlock(&c->usage_scratch_lock);
150         else
151                 kfree(fs_usage);
152 }
153
154 struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
155 {
156         struct bch_fs_usage *ret;
157         unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
158
159         ret = kzalloc(bytes, GFP_NOWAIT);
160         if (ret)
161                 return ret;
162
163         if (mutex_trylock(&c->usage_scratch_lock))
164                 goto out_pool;
165
166         ret = kzalloc(bytes, GFP_NOFS);
167         if (ret)
168                 return ret;
169
170         mutex_lock(&c->usage_scratch_lock);
171 out_pool:
172         ret = c->usage_scratch;
173         memset(ret, 0, bytes);
174         return ret;
175 }
176
177 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
178 {
179         struct bch_dev_usage ret;
180
181         memset(&ret, 0, sizeof(ret));
182         acc_u64s_percpu((u64 *) &ret,
183                         (u64 __percpu *) ca->usage[0],
184                         sizeof(ret) / sizeof(u64));
185
186         return ret;
187 }
188
189 struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
190 {
191         struct bch_fs_usage *ret;
192         unsigned v, u64s = fs_usage_u64s(c);
193 retry:
194         ret = kzalloc(u64s * sizeof(u64), GFP_NOFS);
195         if (unlikely(!ret))
196                 return NULL;
197
198         percpu_down_read_preempt_disable(&c->mark_lock);
199
200         v = fs_usage_u64s(c);
201         if (unlikely(u64s != v)) {
202                 u64s = v;
203                 percpu_up_read_preempt_enable(&c->mark_lock);
204                 kfree(ret);
205                 goto retry;
206         }
207
208         acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
209
210         return ret;
211 }
212
213 #define RESERVE_FACTOR  6
214
215 static u64 reserve_factor(u64 r)
216 {
217         return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
218 }
219
220 static u64 avail_factor(u64 r)
221 {
222         return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
223 }
224
225 u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
226 {
227         return min(fs_usage->hidden +
228                    fs_usage->data +
229                    reserve_factor(fs_usage->reserved +
230                                   fs_usage->online_reserved),
231                    c->capacity);
232 }
233
234 static struct bch_fs_usage_short
235 __bch2_fs_usage_read_short(struct bch_fs *c)
236 {
237         struct bch_fs_usage_short ret;
238         u64 data, reserved;
239
240         ret.capacity = c->capacity -
241                 percpu_u64_get(&c->usage[0]->hidden);
242
243         data            = percpu_u64_get(&c->usage[0]->data);
244         reserved        = percpu_u64_get(&c->usage[0]->reserved) +
245                 percpu_u64_get(&c->usage[0]->online_reserved);
246
247         ret.used        = min(ret.capacity, data + reserve_factor(reserved));
248         ret.free        = ret.capacity - ret.used;
249
250         ret.nr_inodes   = percpu_u64_get(&c->usage[0]->nr_inodes);
251
252         return ret;
253 }
254
255 struct bch_fs_usage_short
256 bch2_fs_usage_read_short(struct bch_fs *c)
257 {
258         struct bch_fs_usage_short ret;
259
260         percpu_down_read_preempt_disable(&c->mark_lock);
261         ret = __bch2_fs_usage_read_short(c);
262         percpu_up_read_preempt_enable(&c->mark_lock);
263
264         return ret;
265 }
266
267 static inline int is_unavailable_bucket(struct bucket_mark m)
268 {
269         return !is_available_bucket(m);
270 }
271
272 static inline int is_fragmented_bucket(struct bucket_mark m,
273                                        struct bch_dev *ca)
274 {
275         if (!m.owned_by_allocator &&
276             m.data_type == BCH_DATA_USER &&
277             bucket_sectors_used(m))
278                 return max_t(int, 0, (int) ca->mi.bucket_size -
279                              bucket_sectors_used(m));
280         return 0;
281 }
282
283 static inline enum bch_data_type bucket_type(struct bucket_mark m)
284 {
285         return m.cached_sectors && !m.dirty_sectors
286                 ? BCH_DATA_CACHED
287                 : m.data_type;
288 }
289
290 static bool bucket_became_unavailable(struct bucket_mark old,
291                                       struct bucket_mark new)
292 {
293         return is_available_bucket(old) &&
294                !is_available_bucket(new);
295 }
296
297 int bch2_fs_usage_apply(struct bch_fs *c,
298                         struct bch_fs_usage *fs_usage,
299                         struct disk_reservation *disk_res)
300 {
301         s64 added = fs_usage->data + fs_usage->reserved;
302         s64 should_not_have_added;
303         int ret = 0;
304
305         percpu_rwsem_assert_held(&c->mark_lock);
306
307         /*
308          * Not allowed to reduce sectors_available except by getting a
309          * reservation:
310          */
311         should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
312         if (WARN_ONCE(should_not_have_added > 0,
313                       "disk usage increased without a reservation")) {
314                 atomic64_sub(should_not_have_added, &c->sectors_available);
315                 added -= should_not_have_added;
316                 ret = -1;
317         }
318
319         if (added > 0) {
320                 disk_res->sectors               -= added;
321                 fs_usage->online_reserved       -= added;
322         }
323
324         preempt_disable();
325         acc_u64s((u64 *) this_cpu_ptr(c->usage[0]),
326                  (u64 *) fs_usage, fs_usage_u64s(c));
327         preempt_enable();
328
329         return ret;
330 }
331
332 static inline void account_bucket(struct bch_fs_usage *fs_usage,
333                                   struct bch_dev_usage *dev_usage,
334                                   enum bch_data_type type,
335                                   int nr, s64 size)
336 {
337         if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
338                 fs_usage->hidden        += size;
339
340         dev_usage->buckets[type]        += nr;
341 }
342
343 static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
344                                   struct bch_fs_usage *fs_usage,
345                                   struct bucket_mark old, struct bucket_mark new,
346                                   bool gc)
347 {
348         struct bch_dev_usage *dev_usage;
349
350         percpu_rwsem_assert_held(&c->mark_lock);
351
352         bch2_fs_inconsistent_on(old.data_type && new.data_type &&
353                                 old.data_type != new.data_type, c,
354                 "different types of data in same bucket: %s, %s",
355                 bch2_data_types[old.data_type],
356                 bch2_data_types[new.data_type]);
357
358         dev_usage = this_cpu_ptr(ca->usage[gc]);
359
360         if (bucket_type(old))
361                 account_bucket(fs_usage, dev_usage, bucket_type(old),
362                                -1, -ca->mi.bucket_size);
363
364         if (bucket_type(new))
365                 account_bucket(fs_usage, dev_usage, bucket_type(new),
366                                1, ca->mi.bucket_size);
367
368         dev_usage->buckets_alloc +=
369                 (int) new.owned_by_allocator - (int) old.owned_by_allocator;
370         dev_usage->buckets_ec +=
371                 (int) new.stripe - (int) old.stripe;
372         dev_usage->buckets_unavailable +=
373                 is_unavailable_bucket(new) - is_unavailable_bucket(old);
374
375         dev_usage->sectors[old.data_type] -= old.dirty_sectors;
376         dev_usage->sectors[new.data_type] += new.dirty_sectors;
377         dev_usage->sectors[BCH_DATA_CACHED] +=
378                 (int) new.cached_sectors - (int) old.cached_sectors;
379         dev_usage->sectors_fragmented +=
380                 is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
381
382         if (!is_available_bucket(old) && is_available_bucket(new))
383                 bch2_wake_allocator(ca);
384 }
385
386 void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
387 {
388         struct bucket_mark old = { .v.counter = 0 };
389         struct bch_fs_usage *fs_usage;
390         struct bucket_array *buckets;
391         struct bucket *g;
392
393         percpu_down_read_preempt_disable(&c->mark_lock);
394         fs_usage = this_cpu_ptr(c->usage[0]);
395         buckets = bucket_array(ca);
396
397         for_each_bucket(g, buckets)
398                 if (g->mark.data_type)
399                         bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
400         percpu_up_read_preempt_enable(&c->mark_lock);
401 }
402
403 #define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr)      \
404 ({                                                              \
405         struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
406                                                                 \
407         bch2_dev_usage_update(c, ca, fs_usage, _old, new, gc);  \
408         _old;                                                   \
409 })
410
411 static inline void update_replicas(struct bch_fs *c,
412                                    struct bch_fs_usage *fs_usage,
413                                    struct bch_replicas_entry *r,
414                                    s64 sectors)
415 {
416         int idx = bch2_replicas_entry_idx(c, r);
417
418         BUG_ON(idx < 0);
419         BUG_ON(!sectors);
420
421         if (r->data_type == BCH_DATA_CACHED)
422                 fs_usage->cached        += sectors;
423         else
424                 fs_usage->data          += sectors;
425         fs_usage->replicas[idx]         += sectors;
426 }
427
428 static inline void update_cached_sectors(struct bch_fs *c,
429                                          struct bch_fs_usage *fs_usage,
430                                          unsigned dev, s64 sectors)
431 {
432         struct bch_replicas_padded r;
433
434         bch2_replicas_entry_cached(&r.e, dev);
435
436         update_replicas(c, fs_usage, &r.e, sectors);
437 }
438
439 #define do_mark_fn(fn, c, pos, flags, ...)                              \
440 ({                                                                      \
441         int gc, ret = 0;                                                \
442                                                                         \
443         percpu_rwsem_assert_held(&c->mark_lock);                        \
444                                                                         \
445         for (gc = 0; gc < 2 && !ret; gc++)                              \
446                 if (!gc == !(flags & BCH_BUCKET_MARK_GC) ||             \
447                     (gc && gc_visited(c, pos)))                         \
448                         ret = fn(c, __VA_ARGS__, gc);                   \
449         ret;                                                            \
450 })
451
452 static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
453                                     size_t b, struct bucket_mark *ret,
454                                     bool gc)
455 {
456         struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
457         struct bucket *g = __bucket(ca, b, gc);
458         struct bucket_mark old, new;
459
460         old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
461                 BUG_ON(!is_available_bucket(new));
462
463                 new.owned_by_allocator  = true;
464                 new.dirty               = true;
465                 new.data_type           = 0;
466                 new.cached_sectors      = 0;
467                 new.dirty_sectors       = 0;
468                 new.gen++;
469         }));
470
471         if (old.cached_sectors)
472                 update_cached_sectors(c, fs_usage, ca->dev_idx,
473                                       -((s64) old.cached_sectors));
474
475         if (!gc)
476                 *ret = old;
477         return 0;
478 }
479
480 void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
481                             size_t b, struct bucket_mark *old)
482 {
483         do_mark_fn(__bch2_invalidate_bucket, c, gc_phase(GC_PHASE_START), 0,
484                    ca, b, old);
485
486         if (!old->owned_by_allocator && old->cached_sectors)
487                 trace_invalidate(ca, bucket_to_sector(ca, b),
488                                  old->cached_sectors);
489 }
490
491 static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
492                                     size_t b, bool owned_by_allocator,
493                                     bool gc)
494 {
495         struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
496         struct bucket *g = __bucket(ca, b, gc);
497         struct bucket_mark old, new;
498
499         old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
500                 new.owned_by_allocator  = owned_by_allocator;
501         }));
502
503         BUG_ON(!gc &&
504                !owned_by_allocator && !old.owned_by_allocator);
505
506         return 0;
507 }
508
509 void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
510                             size_t b, bool owned_by_allocator,
511                             struct gc_pos pos, unsigned flags)
512 {
513         do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
514                    ca, b, owned_by_allocator);
515 }
516
517 static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
518                            bool inserting,
519                            struct bch_fs_usage *fs_usage,
520                            unsigned journal_seq, unsigned flags,
521                            bool gc)
522 {
523         struct bkey_alloc_unpacked u;
524         struct bch_dev *ca;
525         struct bucket *g;
526         struct bucket_mark old, m;
527
528         if (!inserting)
529                 return 0;
530
531         /*
532          * alloc btree is read in by bch2_alloc_read, not gc:
533          */
534         if (flags & BCH_BUCKET_MARK_GC)
535                 return 0;
536
537         u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
538         ca = bch_dev_bkey_exists(c, k.k->p.inode);
539         g = __bucket(ca, k.k->p.offset, gc);
540
541         /*
542          * this should currently only be getting called from the bucket
543          * invalidate path:
544          */
545         BUG_ON(u.dirty_sectors);
546         BUG_ON(u.cached_sectors);
547         BUG_ON(!g->mark.owned_by_allocator);
548
549         old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
550                 m.gen                   = u.gen;
551                 m.data_type             = u.data_type;
552                 m.dirty_sectors         = u.dirty_sectors;
553                 m.cached_sectors        = u.cached_sectors;
554         }));
555
556         g->io_time[READ]        = u.read_time;
557         g->io_time[WRITE]       = u.write_time;
558         g->oldest_gen           = u.oldest_gen;
559         g->gen_valid            = 1;
560
561         if (old.cached_sectors) {
562                 update_cached_sectors(c, fs_usage, ca->dev_idx,
563                                       -old.cached_sectors);
564                 trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
565                                  old.cached_sectors);
566         }
567
568         return 0;
569 }
570
571 #define checked_add(a, b)                                       \
572 ({                                                              \
573         unsigned _res = (unsigned) (a) + (b);                   \
574         bool overflow = _res > U16_MAX;                         \
575         if (overflow)                                           \
576                 _res = U16_MAX;                                 \
577         (a) = _res;                                             \
578         overflow;                                               \
579 })
580
581 static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
582                                        size_t b, enum bch_data_type type,
583                                        unsigned sectors, bool gc)
584 {
585         struct bucket *g = __bucket(ca, b, gc);
586         struct bucket_mark old, new;
587         bool overflow;
588
589         BUG_ON(type != BCH_DATA_SB &&
590                type != BCH_DATA_JOURNAL);
591
592         old = bucket_cmpxchg(g, new, ({
593                 new.dirty       = true;
594                 new.data_type   = type;
595                 overflow = checked_add(new.dirty_sectors, sectors);
596         }));
597
598         bch2_fs_inconsistent_on(overflow, c,
599                 "bucket sector count overflow: %u + %u > U16_MAX",
600                 old.dirty_sectors, sectors);
601
602         if (c)
603                 bch2_dev_usage_update(c, ca, this_cpu_ptr(c->usage[gc]),
604                                       old, new, gc);
605
606         return 0;
607 }
608
609 void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
610                                size_t b, enum bch_data_type type,
611                                unsigned sectors, struct gc_pos pos,
612                                unsigned flags)
613 {
614         BUG_ON(type != BCH_DATA_SB &&
615                type != BCH_DATA_JOURNAL);
616
617         if (likely(c)) {
618                 do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
619                            ca, b, type, sectors);
620         } else {
621                 __bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0);
622         }
623 }
624
625 static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
626                                   s64 delta)
627 {
628         if (delta > 0) {
629                 /*
630                  * marking a new extent, which _will have size_ @delta
631                  *
632                  * in the bch2_mark_update -> BCH_EXTENT_OVERLAP_MIDDLE
633                  * case, we haven't actually created the key we'll be inserting
634                  * yet (for the split) - so we don't want to be using
635                  * k->size/crc.live_size here:
636                  */
637                 return __ptr_disk_sectors(p, delta);
638         } else {
639                 BUG_ON(-delta > p.crc.live_size);
640
641                 return (s64) __ptr_disk_sectors(p, p.crc.live_size + delta) -
642                         (s64) ptr_disk_sectors(p);
643         }
644 }
645
646 /*
647  * Checking against gc's position has to be done here, inside the cmpxchg()
648  * loop, to avoid racing with the start of gc clearing all the marks - GC does
649  * that with the gc pos seqlock held.
650  */
651 static bool bch2_mark_pointer(struct bch_fs *c,
652                               struct extent_ptr_decoded p,
653                               s64 sectors, enum bch_data_type data_type,
654                               struct bch_fs_usage *fs_usage,
655                               unsigned journal_seq, unsigned flags,
656                               bool gc)
657 {
658         struct bucket_mark old, new;
659         struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
660         size_t b = PTR_BUCKET_NR(ca, &p.ptr);
661         struct bucket *g = __bucket(ca, b, gc);
662         bool overflow;
663         u64 v;
664
665         v = atomic64_read(&g->_mark.v);
666         do {
667                 new.v.counter = old.v.counter = v;
668
669                 new.dirty = true;
670
671                 /*
672                  * Check this after reading bucket mark to guard against
673                  * the allocator invalidating a bucket after we've already
674                  * checked the gen
675                  */
676                 if (gen_after(new.gen, p.ptr.gen)) {
677                         BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags));
678                         EBUG_ON(!p.ptr.cached &&
679                                 test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
680                         return true;
681                 }
682
683                 if (!p.ptr.cached)
684                         overflow = checked_add(new.dirty_sectors, sectors);
685                 else
686                         overflow = checked_add(new.cached_sectors, sectors);
687
688                 if (!new.dirty_sectors &&
689                     !new.cached_sectors) {
690                         new.data_type   = 0;
691
692                         if (journal_seq) {
693                                 new.journal_seq_valid = 1;
694                                 new.journal_seq = journal_seq;
695                         }
696                 } else {
697                         new.data_type = data_type;
698                 }
699
700                 if (flags & BCH_BUCKET_MARK_NOATOMIC) {
701                         g->_mark = new;
702                         break;
703                 }
704         } while ((v = atomic64_cmpxchg(&g->_mark.v,
705                               old.v.counter,
706                               new.v.counter)) != old.v.counter);
707
708         bch2_fs_inconsistent_on(overflow, c,
709                 "bucket sector count overflow: %u + %lli > U16_MAX",
710                 !p.ptr.cached
711                 ? old.dirty_sectors
712                 : old.cached_sectors, sectors);
713
714         bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
715
716         BUG_ON(!gc && bucket_became_unavailable(old, new));
717
718         return false;
719 }
720
721 static int bch2_mark_stripe_ptr(struct bch_fs *c,
722                                 struct bch_extent_stripe_ptr p,
723                                 enum bch_data_type data_type,
724                                 struct bch_fs_usage *fs_usage,
725                                 s64 sectors, unsigned flags,
726                                 bool gc)
727 {
728         struct stripe *m;
729         unsigned old, new, nr_data;
730         int blocks_nonempty_delta;
731         s64 parity_sectors;
732
733         BUG_ON(!sectors);
734
735         m = genradix_ptr(&c->stripes[gc], p.idx);
736
737         spin_lock(&c->ec_stripes_heap_lock);
738
739         if (!m || !m->alive) {
740                 spin_unlock(&c->ec_stripes_heap_lock);
741                 bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
742                                     (u64) p.idx);
743                 return -1;
744         }
745
746         BUG_ON(m->r.e.data_type != data_type);
747
748         nr_data = m->nr_blocks - m->nr_redundant;
749
750         parity_sectors = DIV_ROUND_UP(abs(sectors) * m->nr_redundant, nr_data);
751
752         if (sectors < 0)
753                 parity_sectors = -parity_sectors;
754         sectors += parity_sectors;
755
756         old = m->block_sectors[p.block];
757         m->block_sectors[p.block] += sectors;
758         new = m->block_sectors[p.block];
759
760         blocks_nonempty_delta = (int) !!new - (int) !!old;
761         if (blocks_nonempty_delta) {
762                 m->blocks_nonempty += blocks_nonempty_delta;
763
764                 if (!gc)
765                         bch2_stripes_heap_update(c, m, p.idx);
766         }
767
768         m->dirty = true;
769
770         spin_unlock(&c->ec_stripes_heap_lock);
771
772         update_replicas(c, fs_usage, &m->r.e, sectors);
773
774         return 0;
775 }
776
777 static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
778                             s64 sectors, enum bch_data_type data_type,
779                             struct bch_fs_usage *fs_usage,
780                             unsigned journal_seq, unsigned flags,
781                             bool gc)
782 {
783         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
784         const union bch_extent_entry *entry;
785         struct extent_ptr_decoded p;
786         struct bch_replicas_padded r;
787         s64 dirty_sectors = 0;
788         unsigned i;
789         int ret;
790
791         r.e.data_type   = data_type;
792         r.e.nr_devs     = 0;
793         r.e.nr_required = 1;
794
795         BUG_ON(!sectors);
796
797         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
798                 s64 disk_sectors = data_type == BCH_DATA_BTREE
799                         ? sectors
800                         : ptr_disk_sectors_delta(p, sectors);
801                 bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
802                                         fs_usage, journal_seq, flags, gc);
803
804                 if (p.ptr.cached) {
805                         if (disk_sectors && !stale)
806                                 update_cached_sectors(c, fs_usage, p.ptr.dev,
807                                                       disk_sectors);
808                 } else if (!p.ec_nr) {
809                         dirty_sectors          += disk_sectors;
810                         r.e.devs[r.e.nr_devs++] = p.ptr.dev;
811                 } else {
812                         for (i = 0; i < p.ec_nr; i++) {
813                                 ret = bch2_mark_stripe_ptr(c, p.ec[i],
814                                                 data_type, fs_usage,
815                                                 disk_sectors, flags, gc);
816                                 if (ret)
817                                         return ret;
818                         }
819
820                         r.e.nr_required = 0;
821                 }
822         }
823
824         if (dirty_sectors)
825                 update_replicas(c, fs_usage, &r.e, dirty_sectors);
826
827         return 0;
828 }
829
830 static void bucket_set_stripe(struct bch_fs *c,
831                               const struct bch_stripe *v,
832                               bool enabled,
833                               struct bch_fs_usage *fs_usage,
834                               u64 journal_seq,
835                               bool gc)
836 {
837         unsigned i;
838
839         for (i = 0; i < v->nr_blocks; i++) {
840                 const struct bch_extent_ptr *ptr = v->ptrs + i;
841                 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
842                 size_t b = PTR_BUCKET_NR(ca, ptr);
843                 struct bucket *g = __bucket(ca, b, gc);
844                 struct bucket_mark new, old;
845
846                 BUG_ON(ptr_stale(ca, ptr));
847
848                 old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
849                         new.dirty                       = true;
850                         new.stripe                      = enabled;
851                         if (journal_seq) {
852                                 new.journal_seq_valid   = 1;
853                                 new.journal_seq         = journal_seq;
854                         }
855                 }));
856         }
857 }
858
859 static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
860                             bool inserting,
861                             struct bch_fs_usage *fs_usage,
862                             u64 journal_seq, unsigned flags,
863                             bool gc)
864 {
865         struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
866         size_t idx = s.k->p.offset;
867         struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
868         unsigned i;
869
870         spin_lock(&c->ec_stripes_heap_lock);
871
872         if (!m || (!inserting && !m->alive)) {
873                 spin_unlock(&c->ec_stripes_heap_lock);
874                 bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
875                                     idx);
876                 return -1;
877         }
878
879         if (m->alive)
880                 bch2_stripes_heap_del(c, m, idx);
881
882         memset(m, 0, sizeof(*m));
883
884         if (inserting) {
885                 m->sectors      = le16_to_cpu(s.v->sectors);
886                 m->algorithm    = s.v->algorithm;
887                 m->nr_blocks    = s.v->nr_blocks;
888                 m->nr_redundant = s.v->nr_redundant;
889
890                 memset(&m->r, 0, sizeof(m->r));
891
892                 m->r.e.data_type        = BCH_DATA_USER;
893                 m->r.e.nr_devs          = s.v->nr_blocks;
894                 m->r.e.nr_required      = s.v->nr_blocks - s.v->nr_redundant;
895
896                 for (i = 0; i < s.v->nr_blocks; i++)
897                         m->r.e.devs[i] = s.v->ptrs[i].dev;
898
899         /*
900          * XXX: account for stripes somehow here
901          */
902 #if 0
903         update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
904 #endif
905
906                 /* gc recalculates these fields: */
907                 if (!(flags & BCH_BUCKET_MARK_GC)) {
908                         for (i = 0; i < s.v->nr_blocks; i++) {
909                                 m->block_sectors[i] =
910                                         stripe_blockcount_get(s.v, i);
911                                 m->blocks_nonempty += !!m->block_sectors[i];
912                         }
913                 }
914
915                 if (!gc)
916                         bch2_stripes_heap_insert(c, m, idx);
917                 else
918                         m->alive = true;
919         }
920
921         spin_unlock(&c->ec_stripes_heap_lock);
922
923         bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
924         return 0;
925 }
926
927 static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
928                            bool inserting, s64 sectors,
929                            struct bch_fs_usage *fs_usage,
930                            unsigned journal_seq, unsigned flags,
931                            bool gc)
932 {
933         int ret = 0;
934
935         preempt_disable();
936
937         if (!fs_usage || gc)
938                 fs_usage = this_cpu_ptr(c->usage[gc]);
939
940         switch (k.k->type) {
941         case KEY_TYPE_alloc:
942                 ret = bch2_mark_alloc(c, k, inserting,
943                                 fs_usage, journal_seq, flags, gc);
944                 break;
945         case KEY_TYPE_btree_ptr:
946                 ret = bch2_mark_extent(c, k, inserting
947                                 ?  c->opts.btree_node_size
948                                 : -c->opts.btree_node_size,
949                                 BCH_DATA_BTREE,
950                                 fs_usage, journal_seq, flags, gc);
951                 break;
952         case KEY_TYPE_extent:
953                 ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
954                                 fs_usage, journal_seq, flags, gc);
955                 break;
956         case KEY_TYPE_stripe:
957                 ret = bch2_mark_stripe(c, k, inserting,
958                                 fs_usage, journal_seq, flags, gc);
959                 break;
960         case KEY_TYPE_inode:
961                 if (inserting)
962                         fs_usage->nr_inodes++;
963                 else
964                         fs_usage->nr_inodes--;
965                 break;
966         case KEY_TYPE_reservation: {
967                 unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
968
969                 sectors *= replicas;
970                 replicas = clamp_t(unsigned, replicas, 1,
971                                    ARRAY_SIZE(fs_usage->persistent_reserved));
972
973                 fs_usage->reserved                              += sectors;
974                 fs_usage->persistent_reserved[replicas - 1]     += sectors;
975                 break;
976         }
977         }
978
979         preempt_enable();
980
981         return ret;
982 }
983
984 int bch2_mark_key_locked(struct bch_fs *c,
985                    struct bkey_s_c k,
986                    bool inserting, s64 sectors,
987                    struct gc_pos pos,
988                    struct bch_fs_usage *fs_usage,
989                    u64 journal_seq, unsigned flags)
990 {
991         return do_mark_fn(__bch2_mark_key, c, pos, flags,
992                           k, inserting, sectors, fs_usage,
993                           journal_seq, flags);
994 }
995
996 int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
997                   bool inserting, s64 sectors,
998                   struct gc_pos pos,
999                   struct bch_fs_usage *fs_usage,
1000                   u64 journal_seq, unsigned flags)
1001 {
1002         int ret;
1003
1004         percpu_down_read_preempt_disable(&c->mark_lock);
1005         ret = bch2_mark_key_locked(c, k, inserting, sectors,
1006                                    pos, fs_usage, journal_seq, flags);
1007         percpu_up_read_preempt_enable(&c->mark_lock);
1008
1009         return ret;
1010 }
1011
1012 void bch2_mark_update(struct btree_trans *trans,
1013                       struct btree_insert_entry *insert,
1014                       struct bch_fs_usage *fs_usage)
1015 {
1016         struct bch_fs           *c = trans->c;
1017         struct btree_iter       *iter = insert->iter;
1018         struct btree            *b = iter->l[0].b;
1019         struct btree_node_iter  node_iter = iter->l[0].iter;
1020         struct gc_pos           pos = gc_pos_btree_node(b);
1021         struct bkey_packed      *_k;
1022
1023         if (!btree_node_type_needs_gc(iter->btree_id))
1024                 return;
1025
1026         if (!(trans->flags & BTREE_INSERT_NOMARK))
1027                 bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
1028                         bpos_min(insert->k->k.p, b->key.k.p).offset -
1029                         bkey_start_offset(&insert->k->k),
1030                         pos, fs_usage, trans->journal_res.seq, 0);
1031
1032         while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
1033                                                       KEY_TYPE_discard))) {
1034                 struct bkey             unpacked;
1035                 struct bkey_s_c         k;
1036                 s64                     sectors = 0;
1037
1038                 k = bkey_disassemble(b, _k, &unpacked);
1039
1040                 if (btree_node_is_extents(b)
1041                     ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
1042                     : bkey_cmp(insert->k->k.p, k.k->p))
1043                         break;
1044
1045                 if (btree_node_is_extents(b)) {
1046                         switch (bch2_extent_overlap(&insert->k->k, k.k)) {
1047                         case BCH_EXTENT_OVERLAP_ALL:
1048                                 sectors = -((s64) k.k->size);
1049                                 break;
1050                         case BCH_EXTENT_OVERLAP_BACK:
1051                                 sectors = bkey_start_offset(&insert->k->k) -
1052                                         k.k->p.offset;
1053                                 break;
1054                         case BCH_EXTENT_OVERLAP_FRONT:
1055                                 sectors = bkey_start_offset(k.k) -
1056                                         insert->k->k.p.offset;
1057                                 break;
1058                         case BCH_EXTENT_OVERLAP_MIDDLE:
1059                                 sectors = k.k->p.offset - insert->k->k.p.offset;
1060                                 BUG_ON(sectors <= 0);
1061
1062                                 bch2_mark_key_locked(c, k, true, sectors,
1063                                         pos, fs_usage, trans->journal_res.seq, 0);
1064
1065                                 sectors = bkey_start_offset(&insert->k->k) -
1066                                         k.k->p.offset;
1067                                 break;
1068                         }
1069
1070                         BUG_ON(sectors >= 0);
1071                 }
1072
1073                 bch2_mark_key_locked(c, k, false, sectors,
1074                         pos, fs_usage, trans->journal_res.seq, 0);
1075
1076                 bch2_btree_node_iter_advance(&node_iter, b);
1077         }
1078 }
1079
1080 void bch2_trans_fs_usage_apply(struct btree_trans *trans,
1081                                struct bch_fs_usage *fs_usage)
1082 {
1083         struct bch_fs *c = trans->c;
1084         struct btree_insert_entry *i;
1085         static int warned_disk_usage = 0;
1086         u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
1087         char buf[200];
1088
1089         if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) ||
1090             warned_disk_usage ||
1091             xchg(&warned_disk_usage, 1))
1092                 return;
1093
1094         pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
1095
1096         trans_for_each_update_iter(trans, i) {
1097                 struct btree_iter       *iter = i->iter;
1098                 struct btree            *b = iter->l[0].b;
1099                 struct btree_node_iter  node_iter = iter->l[0].iter;
1100                 struct bkey_packed      *_k;
1101
1102                 pr_err("while inserting");
1103                 bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
1104                 pr_err("%s", buf);
1105                 pr_err("overlapping with");
1106
1107                 node_iter = iter->l[0].iter;
1108                 while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
1109                                                               KEY_TYPE_discard))) {
1110                         struct bkey             unpacked;
1111                         struct bkey_s_c         k;
1112
1113                         k = bkey_disassemble(b, _k, &unpacked);
1114
1115                         if (btree_node_is_extents(b)
1116                             ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
1117                             : bkey_cmp(i->k->k.p, k.k->p))
1118                                 break;
1119
1120                         bch2_bkey_val_to_text(&PBUF(buf), c, k);
1121                         pr_err("%s", buf);
1122
1123                         bch2_btree_node_iter_advance(&node_iter, b);
1124                 }
1125         }
1126 }
1127
1128 /* Disk reservations: */
1129
1130 static u64 bch2_recalc_sectors_available(struct bch_fs *c)
1131 {
1132         percpu_u64_set(&c->pcpu->sectors_available, 0);
1133
1134         return avail_factor(__bch2_fs_usage_read_short(c).free);
1135 }
1136
1137 void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
1138 {
1139         percpu_down_read_preempt_disable(&c->mark_lock);
1140         this_cpu_sub(c->usage[0]->online_reserved,
1141                      res->sectors);
1142         percpu_up_read_preempt_enable(&c->mark_lock);
1143
1144         res->sectors = 0;
1145 }
1146
1147 #define SECTORS_CACHE   1024
1148
1149 int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
1150                               unsigned sectors, int flags)
1151 {
1152         struct bch_fs_pcpu *pcpu;
1153         u64 old, v, get;
1154         s64 sectors_available;
1155         int ret;
1156
1157         percpu_down_read_preempt_disable(&c->mark_lock);
1158         pcpu = this_cpu_ptr(c->pcpu);
1159
1160         if (sectors <= pcpu->sectors_available)
1161                 goto out;
1162
1163         v = atomic64_read(&c->sectors_available);
1164         do {
1165                 old = v;
1166                 get = min((u64) sectors + SECTORS_CACHE, old);
1167
1168                 if (get < sectors) {
1169                         percpu_up_read_preempt_enable(&c->mark_lock);
1170                         goto recalculate;
1171                 }
1172         } while ((v = atomic64_cmpxchg(&c->sectors_available,
1173                                        old, old - get)) != old);
1174
1175         pcpu->sectors_available         += get;
1176
1177 out:
1178         pcpu->sectors_available         -= sectors;
1179         this_cpu_add(c->usage[0]->online_reserved, sectors);
1180         res->sectors                    += sectors;
1181
1182         percpu_up_read_preempt_enable(&c->mark_lock);
1183         return 0;
1184
1185 recalculate:
1186         percpu_down_write(&c->mark_lock);
1187
1188         sectors_available = bch2_recalc_sectors_available(c);
1189
1190         if (sectors <= sectors_available ||
1191             (flags & BCH_DISK_RESERVATION_NOFAIL)) {
1192                 atomic64_set(&c->sectors_available,
1193                              max_t(s64, 0, sectors_available - sectors));
1194                 this_cpu_add(c->usage[0]->online_reserved, sectors);
1195                 res->sectors                    += sectors;
1196                 ret = 0;
1197         } else {
1198                 atomic64_set(&c->sectors_available, sectors_available);
1199                 ret = -ENOSPC;
1200         }
1201
1202         percpu_up_write(&c->mark_lock);
1203
1204         return ret;
1205 }
1206
1207 /* Startup/shutdown: */
1208
1209 static void buckets_free_rcu(struct rcu_head *rcu)
1210 {
1211         struct bucket_array *buckets =
1212                 container_of(rcu, struct bucket_array, rcu);
1213
1214         kvpfree(buckets,
1215                 sizeof(struct bucket_array) +
1216                 buckets->nbuckets * sizeof(struct bucket));
1217 }
1218
1219 int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
1220 {
1221         struct bucket_array *buckets = NULL, *old_buckets = NULL;
1222         unsigned long *buckets_nouse = NULL;
1223         unsigned long *buckets_written = NULL;
1224         alloc_fifo      free[RESERVE_NR];
1225         alloc_fifo      free_inc;
1226         alloc_heap      alloc_heap;
1227         copygc_heap     copygc_heap;
1228
1229         size_t btree_reserve    = DIV_ROUND_UP(BTREE_NODE_RESERVE,
1230                              ca->mi.bucket_size / c->opts.btree_node_size);
1231         /* XXX: these should be tunable */
1232         size_t reserve_none     = max_t(size_t, 1, nbuckets >> 9);
1233         size_t copygc_reserve   = max_t(size_t, 2, nbuckets >> 7);
1234         size_t free_inc_nr      = max(max_t(size_t, 1, nbuckets >> 12),
1235                                       btree_reserve * 2);
1236         bool resize = ca->buckets[0] != NULL,
1237              start_copygc = ca->copygc_thread != NULL;
1238         int ret = -ENOMEM;
1239         unsigned i;
1240
1241         memset(&free,           0, sizeof(free));
1242         memset(&free_inc,       0, sizeof(free_inc));
1243         memset(&alloc_heap,     0, sizeof(alloc_heap));
1244         memset(&copygc_heap,    0, sizeof(copygc_heap));
1245
1246         if (!(buckets           = kvpmalloc(sizeof(struct bucket_array) +
1247                                             nbuckets * sizeof(struct bucket),
1248                                             GFP_KERNEL|__GFP_ZERO)) ||
1249             !(buckets_nouse     = kvpmalloc(BITS_TO_LONGS(nbuckets) *
1250                                             sizeof(unsigned long),
1251                                             GFP_KERNEL|__GFP_ZERO)) ||
1252             !(buckets_written   = kvpmalloc(BITS_TO_LONGS(nbuckets) *
1253                                             sizeof(unsigned long),
1254                                             GFP_KERNEL|__GFP_ZERO)) ||
1255             !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
1256             !init_fifo(&free[RESERVE_MOVINGGC],
1257                        copygc_reserve, GFP_KERNEL) ||
1258             !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
1259             !init_fifo(&free_inc,       free_inc_nr, GFP_KERNEL) ||
1260             !init_heap(&alloc_heap,     ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL) ||
1261             !init_heap(&copygc_heap,    copygc_reserve, GFP_KERNEL))
1262                 goto err;
1263
1264         buckets->first_bucket   = ca->mi.first_bucket;
1265         buckets->nbuckets       = nbuckets;
1266
1267         bch2_copygc_stop(ca);
1268
1269         if (resize) {
1270                 down_write(&c->gc_lock);
1271                 down_write(&ca->bucket_lock);
1272                 percpu_down_write(&c->mark_lock);
1273         }
1274
1275         old_buckets = bucket_array(ca);
1276
1277         if (resize) {
1278                 size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
1279
1280                 memcpy(buckets->b,
1281                        old_buckets->b,
1282                        n * sizeof(struct bucket));
1283                 memcpy(buckets_nouse,
1284                        ca->buckets_nouse,
1285                        BITS_TO_LONGS(n) * sizeof(unsigned long));
1286                 memcpy(buckets_written,
1287                        ca->buckets_written,
1288                        BITS_TO_LONGS(n) * sizeof(unsigned long));
1289         }
1290
1291         rcu_assign_pointer(ca->buckets[0], buckets);
1292         buckets = old_buckets;
1293
1294         swap(ca->buckets_nouse, buckets_nouse);
1295         swap(ca->buckets_written, buckets_written);
1296
1297         if (resize)
1298                 percpu_up_write(&c->mark_lock);
1299
1300         spin_lock(&c->freelist_lock);
1301         for (i = 0; i < RESERVE_NR; i++) {
1302                 fifo_move(&free[i], &ca->free[i]);
1303                 swap(ca->free[i], free[i]);
1304         }
1305         fifo_move(&free_inc, &ca->free_inc);
1306         swap(ca->free_inc, free_inc);
1307         spin_unlock(&c->freelist_lock);
1308
1309         /* with gc lock held, alloc_heap can't be in use: */
1310         swap(ca->alloc_heap, alloc_heap);
1311
1312         /* and we shut down copygc: */
1313         swap(ca->copygc_heap, copygc_heap);
1314
1315         nbuckets = ca->mi.nbuckets;
1316
1317         if (resize) {
1318                 up_write(&ca->bucket_lock);
1319                 up_write(&c->gc_lock);
1320         }
1321
1322         if (start_copygc &&
1323             bch2_copygc_start(c, ca))
1324                 bch_err(ca, "error restarting copygc thread");
1325
1326         ret = 0;
1327 err:
1328         free_heap(&copygc_heap);
1329         free_heap(&alloc_heap);
1330         free_fifo(&free_inc);
1331         for (i = 0; i < RESERVE_NR; i++)
1332                 free_fifo(&free[i]);
1333         kvpfree(buckets_nouse,
1334                 BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
1335         kvpfree(buckets_written,
1336                 BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
1337         if (buckets)
1338                 call_rcu(&old_buckets->rcu, buckets_free_rcu);
1339
1340         return ret;
1341 }
1342
1343 void bch2_dev_buckets_free(struct bch_dev *ca)
1344 {
1345         unsigned i;
1346
1347         free_heap(&ca->copygc_heap);
1348         free_heap(&ca->alloc_heap);
1349         free_fifo(&ca->free_inc);
1350         for (i = 0; i < RESERVE_NR; i++)
1351                 free_fifo(&ca->free[i]);
1352         kvpfree(ca->buckets_written,
1353                 BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
1354         kvpfree(ca->buckets_nouse,
1355                 BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
1356         kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
1357                 sizeof(struct bucket_array) +
1358                 ca->mi.nbuckets * sizeof(struct bucket));
1359
1360         free_percpu(ca->usage[0]);
1361 }
1362
1363 int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
1364 {
1365         if (!(ca->usage[0] = alloc_percpu(struct bch_dev_usage)))
1366                 return -ENOMEM;
1367
1368         return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
1369 }