]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/btree_gc.c
Update bcachefs sources to 3c41353bc1 bcachefs: Fix bch2_verify_keylist_sorted
[bcachefs-tools-debian] / libbcachefs / btree_gc.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
4  * Copyright (C) 2014 Datera Inc.
5  */
6
7 #include "bcachefs.h"
8 #include "alloc_background.h"
9 #include "alloc_foreground.h"
10 #include "bkey_methods.h"
11 #include "bkey_buf.h"
12 #include "btree_locking.h"
13 #include "btree_update_interior.h"
14 #include "btree_io.h"
15 #include "btree_gc.h"
16 #include "buckets.h"
17 #include "clock.h"
18 #include "debug.h"
19 #include "ec.h"
20 #include "error.h"
21 #include "extents.h"
22 #include "journal.h"
23 #include "keylist.h"
24 #include "move.h"
25 #include "recovery.h"
26 #include "replicas.h"
27 #include "super-io.h"
28
29 #include <linux/slab.h>
30 #include <linux/bitops.h>
31 #include <linux/freezer.h>
32 #include <linux/kthread.h>
33 #include <linux/preempt.h>
34 #include <linux/rcupdate.h>
35 #include <linux/sched/task.h>
36 #include <trace/events/bcachefs.h>
37
38 static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
39 {
40         preempt_disable();
41         write_seqcount_begin(&c->gc_pos_lock);
42         c->gc_pos = new_pos;
43         write_seqcount_end(&c->gc_pos_lock);
44         preempt_enable();
45 }
46
47 static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
48 {
49         BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
50         __gc_pos_set(c, new_pos);
51 }
52
53 /*
54  * Missing: if an interior btree node is empty, we need to do something -
55  * perhaps just kill it
56  */
57 static int bch2_gc_check_topology(struct bch_fs *c,
58                                   struct btree *b,
59                                   struct bkey_buf *prev,
60                                   struct bkey_buf cur,
61                                   bool is_last)
62 {
63         struct bpos node_start  = b->data->min_key;
64         struct bpos node_end    = b->data->max_key;
65         struct bpos expected_start = bkey_deleted(&prev->k->k)
66                 ? node_start
67                 : bpos_successor(prev->k->k.p);
68         char buf1[200], buf2[200];
69         bool update_min = false;
70         bool update_max = false;
71         int ret = 0;
72
73         if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
74                 struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
75
76                 if (bkey_deleted(&prev->k->k)) {
77                         struct printbuf out = PBUF(buf1);
78                         pr_buf(&out, "start of node: ");
79                         bch2_bpos_to_text(&out, node_start);
80                 } else {
81                         bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
82                 }
83
84                 if (fsck_err_on(bpos_cmp(expected_start, bp->v.min_key), c,
85                                 "btree node with incorrect min_key at btree %s level %u:\n"
86                                 "  prev %s\n"
87                                 "  cur %s",
88                                 bch2_btree_ids[b->c.btree_id], b->c.level,
89                                 buf1,
90                                 (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)))
91                         update_min = true;
92         }
93
94         if (fsck_err_on(is_last &&
95                         bpos_cmp(cur.k->k.p, node_end), c,
96                         "btree node with incorrect max_key at btree %s level %u:\n"
97                         "  %s\n"
98                         "  expected %s",
99                         bch2_btree_ids[b->c.btree_id], b->c.level,
100                         (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1),
101                         (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)))
102                 update_max = true;
103
104         bch2_bkey_buf_copy(prev, c, cur.k);
105
106         if (update_min || update_max) {
107                 struct bkey_i *new;
108                 struct bkey_i_btree_ptr_v2 *bp = NULL;
109                 struct btree *n;
110
111                 if (update_max) {
112                         ret = bch2_journal_key_delete(c, b->c.btree_id,
113                                                       b->c.level, cur.k->k.p);
114                         if (ret)
115                                 return ret;
116                 }
117
118                 new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL);
119                 if (!new) {
120                         bch_err(c, "%s: error allocating new key", __func__);
121                         return -ENOMEM;
122                 }
123
124                 bkey_copy(new, cur.k);
125
126                 if (new->k.type == KEY_TYPE_btree_ptr_v2)
127                         bp = bkey_i_to_btree_ptr_v2(new);
128
129                 if (update_min)
130                         bp->v.min_key = expected_start;
131                 if (update_max)
132                         new->k.p = node_end;
133                 if (bp)
134                         SET_BTREE_PTR_RANGE_UPDATED(&bp->v, true);
135
136                 ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level, new);
137                 if (ret) {
138                         kfree(new);
139                         return ret;
140                 }
141
142                 n = bch2_btree_node_get_noiter(c, cur.k, b->c.btree_id,
143                                                b->c.level - 1, true);
144                 if (n) {
145                         mutex_lock(&c->btree_cache.lock);
146                         bch2_btree_node_hash_remove(&c->btree_cache, n);
147
148                         bkey_copy(&n->key, new);
149                         if (update_min)
150                                 n->data->min_key = expected_start;
151                         if (update_max)
152                                 n->data->max_key = node_end;
153
154                         ret = __bch2_btree_node_hash_insert(&c->btree_cache, n);
155                         BUG_ON(ret);
156                         mutex_unlock(&c->btree_cache.lock);
157                         six_unlock_read(&n->c.lock);
158                 }
159         }
160 fsck_err:
161         return ret;
162 }
163
164 static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
165                                unsigned level, bool is_root,
166                                struct bkey_s_c *k)
167 {
168         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
169         const union bch_extent_entry *entry;
170         struct extent_ptr_decoded p = { 0 };
171         bool do_update = false;
172         int ret = 0;
173
174         bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
175                 struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
176                 struct bucket *g = PTR_BUCKET(ca, &p.ptr, true);
177                 struct bucket *g2 = PTR_BUCKET(ca, &p.ptr, false);
178
179                 if (fsck_err_on(!g->gen_valid, c,
180                                 "bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
181                                 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
182                                 bch2_data_types[ptr_data_type(k->k, &p.ptr)],
183                                 p.ptr.gen)) {
184                         if (p.ptr.cached) {
185                                 g2->_mark.gen   = g->_mark.gen          = p.ptr.gen;
186                                 g2->gen_valid   = g->gen_valid          = true;
187                                 set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
188                         } else {
189                                 do_update = true;
190                         }
191                 }
192
193                 if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
194                                 "bucket %u:%zu data type %s ptr gen in the future: %u > %u",
195                                 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
196                                 bch2_data_types[ptr_data_type(k->k, &p.ptr)],
197                                 p.ptr.gen, g->mark.gen)) {
198                         if (p.ptr.cached) {
199                                 g2->_mark.gen   = g->_mark.gen  = p.ptr.gen;
200                                 g2->gen_valid   = g->gen_valid  = true;
201                                 g2->_mark.data_type             = 0;
202                                 g2->_mark.dirty_sectors         = 0;
203                                 g2->_mark.cached_sectors        = 0;
204                                 set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
205                                 set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
206                         } else {
207                                 do_update = true;
208                         }
209                 }
210
211                 if (fsck_err_on(!p.ptr.cached &&
212                                 gen_cmp(p.ptr.gen, g->mark.gen) < 0, c,
213                                 "bucket %u:%zu data type %s stale dirty ptr: %u < %u",
214                                 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
215                                 bch2_data_types[ptr_data_type(k->k, &p.ptr)],
216                                 p.ptr.gen, g->mark.gen))
217                         do_update = true;
218
219                 if (p.has_ec) {
220                         struct stripe *m = genradix_ptr(&c->stripes[true], p.ec.idx);
221
222                         if (fsck_err_on(!m || !m->alive, c,
223                                         "pointer to nonexistent stripe %llu",
224                                         (u64) p.ec.idx))
225                                 do_update = true;
226
227                         if (fsck_err_on(!bch2_ptr_matches_stripe_m(m, p), c,
228                                         "pointer does not match stripe %llu",
229                                         (u64) p.ec.idx))
230                                 do_update = true;
231                 }
232         }
233
234         if (do_update) {
235                 struct bkey_ptrs ptrs;
236                 union bch_extent_entry *entry;
237                 struct bch_extent_ptr *ptr;
238                 struct bkey_i *new;
239
240                 if (is_root) {
241                         bch_err(c, "cannot update btree roots yet");
242                         return -EINVAL;
243                 }
244
245                 new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
246                 if (!new) {
247                         bch_err(c, "%s: error allocating new key", __func__);
248                         return -ENOMEM;
249                 }
250
251                 bkey_reassemble(new, *k);
252
253                 if (level) {
254                         /*
255                          * We don't want to drop btree node pointers - if the
256                          * btree node isn't there anymore, the read path will
257                          * sort it out:
258                          */
259                         ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
260                         bkey_for_each_ptr(ptrs, ptr) {
261                                 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
262                                 struct bucket *g = PTR_BUCKET(ca, ptr, true);
263
264                                 ptr->gen = g->mark.gen;
265                         }
266                 } else {
267                         bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
268                                 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
269                                 struct bucket *g = PTR_BUCKET(ca, ptr, true);
270
271                                 (ptr->cached &&
272                                  (!g->gen_valid || gen_cmp(ptr->gen, g->mark.gen) > 0)) ||
273                                 (!ptr->cached &&
274                                  gen_cmp(ptr->gen, g->mark.gen) < 0);
275                         }));
276 again:
277                         ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
278                         bkey_extent_entry_for_each(ptrs, entry) {
279                                 if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
280                                         struct stripe *m = genradix_ptr(&c->stripes[true],
281                                                                         entry->stripe_ptr.idx);
282                                         union bch_extent_entry *next_ptr;
283
284                                         bkey_extent_entry_for_each_from(ptrs, next_ptr, entry)
285                                                 if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr)
286                                                         goto found;
287                                         next_ptr = NULL;
288 found:
289                                         if (!next_ptr) {
290                                                 bch_err(c, "aieee, found stripe ptr with no data ptr");
291                                                 continue;
292                                         }
293
294                                         if (!m || !m->alive ||
295                                             !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block],
296                                                                        &next_ptr->ptr,
297                                                                        m->sectors)) {
298                                                 bch2_bkey_extent_entry_drop(new, entry);
299                                                 goto again;
300                                         }
301                                 }
302                         }
303                 }
304
305                 ret = bch2_journal_key_insert(c, btree_id, level, new);
306                 if (ret)
307                         kfree(new);
308                 else
309                         *k = bkey_i_to_s_c(new);
310         }
311 fsck_err:
312         return ret;
313 }
314
315 /* marking of btree keys/nodes: */
316
317 static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
318                             unsigned level, bool is_root,
319                             struct bkey_s_c *k,
320                             u8 *max_stale, bool initial)
321 {
322         struct bkey_ptrs_c ptrs;
323         const struct bch_extent_ptr *ptr;
324         unsigned flags =
325                 BTREE_TRIGGER_GC|
326                 (initial ? BTREE_TRIGGER_NOATOMIC : 0);
327         int ret = 0;
328
329         if (initial) {
330                 BUG_ON(bch2_journal_seq_verify &&
331                        k->k->version.lo > journal_cur_seq(&c->journal));
332
333                 ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
334                 if (ret)
335                         goto err;
336
337                 if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
338                                 "key version number higher than recorded: %llu > %llu",
339                                 k->k->version.lo,
340                                 atomic64_read(&c->key_version)))
341                         atomic64_set(&c->key_version, k->k->version.lo);
342
343                 if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
344                     fsck_err_on(!bch2_bkey_replicas_marked(c, *k), c,
345                                 "superblock not marked as containing replicas (type %u)",
346                                 k->k->type)) {
347                         ret = bch2_mark_bkey_replicas(c, *k);
348                         if (ret) {
349                                 bch_err(c, "error marking bkey replicas: %i", ret);
350                                 goto err;
351                         }
352                 }
353         }
354
355         ptrs = bch2_bkey_ptrs_c(*k);
356         bkey_for_each_ptr(ptrs, ptr) {
357                 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
358                 struct bucket *g = PTR_BUCKET(ca, ptr, true);
359
360                 if (gen_after(g->oldest_gen, ptr->gen))
361                         g->oldest_gen = ptr->gen;
362
363                 *max_stale = max(*max_stale, ptr_stale(ca, ptr));
364         }
365
366         bch2_mark_key(c, *k, 0, k->k->size, NULL, 0, flags);
367 fsck_err:
368 err:
369         if (ret)
370                 bch_err(c, "%s: ret %i", __func__, ret);
371         return ret;
372 }
373
374 static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
375                               bool initial)
376 {
377         struct btree_node_iter iter;
378         struct bkey unpacked;
379         struct bkey_s_c k;
380         struct bkey_buf prev, cur;
381         int ret = 0;
382
383         *max_stale = 0;
384
385         if (!btree_node_type_needs_gc(btree_node_type(b)))
386                 return 0;
387
388         bch2_btree_node_iter_init_from_start(&iter, b);
389         bch2_bkey_buf_init(&prev);
390         bch2_bkey_buf_init(&cur);
391         bkey_init(&prev.k->k);
392
393         while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
394                 ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
395                                        &k, max_stale, initial);
396                 if (ret)
397                         break;
398
399                 bch2_btree_node_iter_advance(&iter, b);
400
401                 if (b->c.level) {
402                         bch2_bkey_buf_reassemble(&cur, c, k);
403
404                         ret = bch2_gc_check_topology(c, b, &prev, cur,
405                                         bch2_btree_node_iter_end(&iter));
406                         if (ret)
407                                 break;
408                 }
409         }
410
411         bch2_bkey_buf_exit(&cur, c);
412         bch2_bkey_buf_exit(&prev, c);
413         return ret;
414 }
415
416 static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
417                          bool initial, bool metadata_only)
418 {
419         struct btree_trans trans;
420         struct btree_iter *iter;
421         struct btree *b;
422         unsigned depth = metadata_only                  ? 1
423                 : bch2_expensive_debug_checks           ? 0
424                 : !btree_node_type_needs_gc(btree_id)   ? 1
425                 : 0;
426         u8 max_stale = 0;
427         int ret = 0;
428
429         bch2_trans_init(&trans, c, 0, 0);
430
431         gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
432
433         __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
434                               0, depth, BTREE_ITER_PREFETCH, b) {
435                 bch2_verify_btree_nr_keys(b);
436
437                 gc_pos_set(c, gc_pos_btree_node(b));
438
439                 ret = btree_gc_mark_node(c, b, &max_stale, initial);
440                 if (ret)
441                         break;
442
443                 if (!initial) {
444                         if (max_stale > 64)
445                                 bch2_btree_node_rewrite(c, iter,
446                                                 b->data->keys.seq,
447                                                 BTREE_INSERT_NOWAIT|
448                                                 BTREE_INSERT_GC_LOCK_HELD);
449                         else if (!bch2_btree_gc_rewrite_disabled &&
450                                  (bch2_btree_gc_always_rewrite || max_stale > 16))
451                                 bch2_btree_node_rewrite(c, iter,
452                                                 b->data->keys.seq,
453                                                 BTREE_INSERT_NOWAIT|
454                                                 BTREE_INSERT_GC_LOCK_HELD);
455                 }
456
457                 bch2_trans_cond_resched(&trans);
458         }
459         bch2_trans_iter_put(&trans, iter);
460
461         ret = bch2_trans_exit(&trans) ?: ret;
462         if (ret)
463                 return ret;
464
465         mutex_lock(&c->btree_root_lock);
466         b = c->btree_roots[btree_id].b;
467         if (!btree_node_fake(b)) {
468                 struct bkey_s_c k = bkey_i_to_s_c(&b->key);
469
470                 ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, true,
471                                        &k, &max_stale, initial);
472         }
473         gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
474         mutex_unlock(&c->btree_root_lock);
475
476         return ret;
477 }
478
479 static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
480                                       unsigned target_depth)
481 {
482         struct btree_and_journal_iter iter;
483         struct bkey_s_c k;
484         struct bkey_buf cur, prev;
485         u8 max_stale = 0;
486         int ret = 0;
487
488         bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
489         bch2_bkey_buf_init(&prev);
490         bch2_bkey_buf_init(&cur);
491         bkey_init(&prev.k->k);
492
493         while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
494                 BUG_ON(bpos_cmp(k.k->p, b->data->min_key) < 0);
495                 BUG_ON(bpos_cmp(k.k->p, b->data->max_key) > 0);
496
497                 ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
498                                        &k, &max_stale, true);
499                 if (ret) {
500                         bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret);
501                         break;
502                 }
503
504                 if (b->c.level) {
505                         bch2_bkey_buf_reassemble(&cur, c, k);
506                         k = bkey_i_to_s_c(cur.k);
507
508                         bch2_btree_and_journal_iter_advance(&iter);
509
510                         ret = bch2_gc_check_topology(c, b,
511                                         &prev, cur,
512                                         !bch2_btree_and_journal_iter_peek(&iter).k);
513                         if (ret)
514                                 break;
515                 } else {
516                         bch2_btree_and_journal_iter_advance(&iter);
517                 }
518         }
519
520         if (b->c.level > target_depth) {
521                 bch2_btree_and_journal_iter_exit(&iter);
522                 bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
523
524                 while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
525                         struct btree *child;
526
527                         bch2_bkey_buf_reassemble(&cur, c, k);
528                         bch2_btree_and_journal_iter_advance(&iter);
529
530                         child = bch2_btree_node_get_noiter(c, cur.k,
531                                                 b->c.btree_id, b->c.level - 1,
532                                                 false);
533                         ret = PTR_ERR_OR_ZERO(child);
534
535                         if (fsck_err_on(ret == -EIO, c,
536                                         "unreadable btree node")) {
537                                 ret = bch2_journal_key_delete(c, b->c.btree_id,
538                                                               b->c.level, cur.k->k.p);
539                                 if (ret)
540                                         return ret;
541
542                                 set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
543                                 continue;
544                         }
545
546                         if (ret) {
547                                 bch_err(c, "%s: error %i getting btree node",
548                                         __func__, ret);
549                                 break;
550                         }
551
552                         ret = bch2_gc_btree_init_recurse(c, child,
553                                                          target_depth);
554                         six_unlock_read(&child->c.lock);
555
556                         if (ret)
557                                 break;
558                 }
559         }
560 fsck_err:
561         bch2_bkey_buf_exit(&cur, c);
562         bch2_bkey_buf_exit(&prev, c);
563         bch2_btree_and_journal_iter_exit(&iter);
564         return ret;
565 }
566
567 static int bch2_gc_btree_init(struct bch_fs *c,
568                               enum btree_id btree_id,
569                               bool metadata_only)
570 {
571         struct btree *b;
572         unsigned target_depth = metadata_only           ? 1
573                 : bch2_expensive_debug_checks           ? 0
574                 : !btree_node_type_needs_gc(btree_id)   ? 1
575                 : 0;
576         u8 max_stale = 0;
577         char buf[100];
578         int ret = 0;
579
580         b = c->btree_roots[btree_id].b;
581
582         if (btree_node_fake(b))
583                 return 0;
584
585         six_lock_read(&b->c.lock, NULL, NULL);
586         if (fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c,
587                         "btree root with incorrect min_key: %s",
588                         (bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) {
589                 BUG();
590         }
591
592         if (fsck_err_on(bpos_cmp(b->data->max_key, POS_MAX), c,
593                         "btree root with incorrect max_key: %s",
594                         (bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) {
595                 BUG();
596         }
597
598         if (b->c.level >= target_depth)
599                 ret = bch2_gc_btree_init_recurse(c, b, target_depth);
600
601         if (!ret) {
602                 struct bkey_s_c k = bkey_i_to_s_c(&b->key);
603
604                 ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, true,
605                                        &k, &max_stale, true);
606         }
607 fsck_err:
608         six_unlock_read(&b->c.lock);
609
610         if (ret)
611                 bch_err(c, "%s: ret %i", __func__, ret);
612         return ret;
613 }
614
615 static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
616 {
617         return  (int) btree_id_to_gc_phase(l) -
618                 (int) btree_id_to_gc_phase(r);
619 }
620
621 static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
622 {
623         enum btree_id ids[BTREE_ID_NR];
624         unsigned i;
625
626         for (i = 0; i < BTREE_ID_NR; i++)
627                 ids[i] = i;
628         bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
629
630         for (i = 0; i < BTREE_ID_NR; i++) {
631                 enum btree_id id = ids[i];
632                 int ret = initial
633                         ? bch2_gc_btree_init(c, id, metadata_only)
634                         : bch2_gc_btree(c, id, initial, metadata_only);
635                 if (ret) {
636                         bch_err(c, "%s: ret %i", __func__, ret);
637                         return ret;
638                 }
639         }
640
641         return 0;
642 }
643
644 static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
645                                   u64 start, u64 end,
646                                   enum bch_data_type type,
647                                   unsigned flags)
648 {
649         u64 b = sector_to_bucket(ca, start);
650
651         do {
652                 unsigned sectors =
653                         min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
654
655                 bch2_mark_metadata_bucket(c, ca, b, type, sectors,
656                                           gc_phase(GC_PHASE_SB), flags);
657                 b++;
658                 start += sectors;
659         } while (start < end);
660 }
661
662 void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
663                               unsigned flags)
664 {
665         struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
666         unsigned i;
667         u64 b;
668
669         /*
670          * This conditional is kind of gross, but we may be called from the
671          * device add path, before the new device has actually been added to the
672          * running filesystem:
673          */
674         if (c) {
675                 lockdep_assert_held(&c->sb_lock);
676                 percpu_down_read(&c->mark_lock);
677         }
678
679         for (i = 0; i < layout->nr_superblocks; i++) {
680                 u64 offset = le64_to_cpu(layout->sb_offset[i]);
681
682                 if (offset == BCH_SB_SECTOR)
683                         mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
684                                               BCH_DATA_sb, flags);
685
686                 mark_metadata_sectors(c, ca, offset,
687                                       offset + (1 << layout->sb_max_size_bits),
688                                       BCH_DATA_sb, flags);
689         }
690
691         for (i = 0; i < ca->journal.nr; i++) {
692                 b = ca->journal.buckets[i];
693                 bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
694                                           ca->mi.bucket_size,
695                                           gc_phase(GC_PHASE_SB), flags);
696         }
697
698         if (c)
699                 percpu_up_read(&c->mark_lock);
700 }
701
702 static void bch2_mark_superblocks(struct bch_fs *c)
703 {
704         struct bch_dev *ca;
705         unsigned i;
706
707         mutex_lock(&c->sb_lock);
708         gc_pos_set(c, gc_phase(GC_PHASE_SB));
709
710         for_each_online_member(ca, c, i)
711                 bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
712         mutex_unlock(&c->sb_lock);
713 }
714
715 #if 0
716 /* Also see bch2_pending_btree_node_free_insert_done() */
717 static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
718 {
719         struct btree_update *as;
720         struct pending_btree_node_free *d;
721
722         mutex_lock(&c->btree_interior_update_lock);
723         gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
724
725         for_each_pending_btree_node_free(c, as, d)
726                 if (d->index_update_done)
727                         bch2_mark_key(c, bkey_i_to_s_c(&d->key),
728                                       0, 0, NULL, 0,
729                                       BTREE_TRIGGER_GC);
730
731         mutex_unlock(&c->btree_interior_update_lock);
732 }
733 #endif
734
735 static void bch2_gc_free(struct bch_fs *c)
736 {
737         struct bch_dev *ca;
738         unsigned i;
739
740         genradix_free(&c->stripes[1]);
741
742         for_each_member_device(ca, c, i) {
743                 kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
744                         sizeof(struct bucket_array) +
745                         ca->mi.nbuckets * sizeof(struct bucket));
746                 ca->buckets[1] = NULL;
747
748                 free_percpu(ca->usage_gc);
749                 ca->usage_gc = NULL;
750         }
751
752         free_percpu(c->usage_gc);
753         c->usage_gc = NULL;
754 }
755
756 static int bch2_gc_done(struct bch_fs *c,
757                         bool initial, bool metadata_only)
758 {
759         struct bch_dev *ca;
760         bool verify = !metadata_only && (!initial ||
761                        (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)));
762         unsigned i, dev;
763         int ret = 0;
764
765 #define copy_field(_f, _msg, ...)                                       \
766         if (dst->_f != src->_f) {                                       \
767                 if (verify)                                             \
768                         fsck_err(c, _msg ": got %llu, should be %llu"   \
769                                 , ##__VA_ARGS__, dst->_f, src->_f);     \
770                 dst->_f = src->_f;                                      \
771                 set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
772         }
773 #define copy_stripe_field(_f, _msg, ...)                                \
774         if (dst->_f != src->_f) {                                       \
775                 if (verify)                                             \
776                         fsck_err(c, "stripe %zu has wrong "_msg         \
777                                 ": got %u, should be %u",               \
778                                 iter.pos, ##__VA_ARGS__,                \
779                                 dst->_f, src->_f);                      \
780                 dst->_f = src->_f;                                      \
781                 set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
782         }
783 #define copy_bucket_field(_f)                                           \
784         if (dst->b[b].mark._f != src->b[b].mark._f) {                   \
785                 if (verify)                                             \
786                         fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f  \
787                                 ": got %u, should be %u", dev, b,       \
788                                 dst->b[b].mark.gen,                     \
789                                 bch2_data_types[dst->b[b].mark.data_type],\
790                                 dst->b[b].mark._f, src->b[b].mark._f);  \
791                 dst->b[b]._mark._f = src->b[b].mark._f;                 \
792                 set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
793         }
794 #define copy_dev_field(_f, _msg, ...)                                   \
795         copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
796 #define copy_fs_field(_f, _msg, ...)                                    \
797         copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
798
799         if (!metadata_only) {
800                 struct genradix_iter iter = genradix_iter_init(&c->stripes[1], 0);
801                 struct stripe *dst, *src;
802
803                 while ((src = genradix_iter_peek(&iter, &c->stripes[1]))) {
804                         dst = genradix_ptr_alloc(&c->stripes[0], iter.pos, GFP_KERNEL);
805
806                         if (dst->alive          != src->alive ||
807                             dst->sectors        != src->sectors ||
808                             dst->algorithm      != src->algorithm ||
809                             dst->nr_blocks      != src->nr_blocks ||
810                             dst->nr_redundant   != src->nr_redundant) {
811                                 bch_err(c, "unexpected stripe inconsistency at bch2_gc_done, confused");
812                                 ret = -EINVAL;
813                                 goto fsck_err;
814                         }
815
816                         for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
817                                 copy_stripe_field(block_sectors[i],
818                                                   "block_sectors[%u]", i);
819
820                         dst->blocks_nonempty = 0;
821                         for (i = 0; i < dst->nr_blocks; i++)
822                                 dst->blocks_nonempty += dst->block_sectors[i] != 0;
823
824                         genradix_iter_advance(&iter, &c->stripes[1]);
825                 }
826         }
827
828         for (i = 0; i < ARRAY_SIZE(c->usage); i++)
829                 bch2_fs_usage_acc_to_base(c, i);
830
831         for_each_member_device(ca, c, dev) {
832                 struct bucket_array *dst = __bucket_array(ca, 0);
833                 struct bucket_array *src = __bucket_array(ca, 1);
834                 size_t b;
835
836                 for (b = 0; b < src->nbuckets; b++) {
837                         copy_bucket_field(gen);
838                         copy_bucket_field(data_type);
839                         copy_bucket_field(stripe);
840                         copy_bucket_field(dirty_sectors);
841                         copy_bucket_field(cached_sectors);
842
843                         dst->b[b].oldest_gen = src->b[b].oldest_gen;
844                 }
845
846                 {
847                         struct bch_dev_usage *dst = ca->usage_base;
848                         struct bch_dev_usage *src = (void *)
849                                 bch2_acc_percpu_u64s((void *) ca->usage_gc,
850                                                      dev_usage_u64s());
851
852                         copy_dev_field(buckets_ec,              "buckets_ec");
853                         copy_dev_field(buckets_unavailable,     "buckets_unavailable");
854
855                         for (i = 0; i < BCH_DATA_NR; i++) {
856                                 copy_dev_field(d[i].buckets,    "%s buckets", bch2_data_types[i]);
857                                 copy_dev_field(d[i].sectors,    "%s sectors", bch2_data_types[i]);
858                                 copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
859                         }
860                 }
861         };
862
863         {
864                 unsigned nr = fs_usage_u64s(c);
865                 struct bch_fs_usage *dst = c->usage_base;
866                 struct bch_fs_usage *src = (void *)
867                         bch2_acc_percpu_u64s((void *) c->usage_gc, nr);
868
869                 copy_fs_field(hidden,           "hidden");
870                 copy_fs_field(btree,            "btree");
871
872                 if (!metadata_only) {
873                         copy_fs_field(data,     "data");
874                         copy_fs_field(cached,   "cached");
875                         copy_fs_field(reserved, "reserved");
876                         copy_fs_field(nr_inodes,"nr_inodes");
877
878                         for (i = 0; i < BCH_REPLICAS_MAX; i++)
879                                 copy_fs_field(persistent_reserved[i],
880                                               "persistent_reserved[%i]", i);
881                 }
882
883                 for (i = 0; i < c->replicas.nr; i++) {
884                         struct bch_replicas_entry *e =
885                                 cpu_replicas_entry(&c->replicas, i);
886                         char buf[80];
887
888                         if (metadata_only &&
889                             (e->data_type == BCH_DATA_user ||
890                              e->data_type == BCH_DATA_cached))
891                                 continue;
892
893                         bch2_replicas_entry_to_text(&PBUF(buf), e);
894
895                         copy_fs_field(replicas[i], "%s", buf);
896                 }
897         }
898
899 #undef copy_fs_field
900 #undef copy_dev_field
901 #undef copy_bucket_field
902 #undef copy_stripe_field
903 #undef copy_field
904 fsck_err:
905         if (ret)
906                 bch_err(c, "%s: ret %i", __func__, ret);
907         return ret;
908 }
909
910 static int bch2_gc_start(struct bch_fs *c,
911                          bool metadata_only)
912 {
913         struct bch_dev *ca;
914         unsigned i;
915         int ret;
916
917         BUG_ON(c->usage_gc);
918
919         c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
920                                          sizeof(u64), GFP_KERNEL);
921         if (!c->usage_gc) {
922                 bch_err(c, "error allocating c->usage_gc");
923                 return -ENOMEM;
924         }
925
926         for_each_member_device(ca, c, i) {
927                 BUG_ON(ca->buckets[1]);
928                 BUG_ON(ca->usage_gc);
929
930                 ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
931                                 ca->mi.nbuckets * sizeof(struct bucket),
932                                 GFP_KERNEL|__GFP_ZERO);
933                 if (!ca->buckets[1]) {
934                         percpu_ref_put(&ca->ref);
935                         bch_err(c, "error allocating ca->buckets[gc]");
936                         return -ENOMEM;
937                 }
938
939                 ca->usage_gc = alloc_percpu(struct bch_dev_usage);
940                 if (!ca->usage_gc) {
941                         bch_err(c, "error allocating ca->usage_gc");
942                         percpu_ref_put(&ca->ref);
943                         return -ENOMEM;
944                 }
945         }
946
947         ret = bch2_ec_mem_alloc(c, true);
948         if (ret) {
949                 bch_err(c, "error allocating ec gc mem");
950                 return ret;
951         }
952
953         percpu_down_write(&c->mark_lock);
954
955         /*
956          * indicate to stripe code that we need to allocate for the gc stripes
957          * radix tree, too
958          */
959         gc_pos_set(c, gc_phase(GC_PHASE_START));
960
961         for_each_member_device(ca, c, i) {
962                 struct bucket_array *dst = __bucket_array(ca, 1);
963                 struct bucket_array *src = __bucket_array(ca, 0);
964                 size_t b;
965
966                 dst->first_bucket       = src->first_bucket;
967                 dst->nbuckets           = src->nbuckets;
968
969                 for (b = 0; b < src->nbuckets; b++) {
970                         struct bucket *d = &dst->b[b];
971                         struct bucket *s = &src->b[b];
972
973                         d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
974                         d->gen_valid = s->gen_valid;
975
976                         if (metadata_only &&
977                             (s->mark.data_type == BCH_DATA_user ||
978                              s->mark.data_type == BCH_DATA_cached))
979                                 d->_mark = s->mark;
980                 }
981         };
982
983         percpu_up_write(&c->mark_lock);
984
985         return 0;
986 }
987
988 /**
989  * bch2_gc - walk _all_ references to buckets, and recompute them:
990  *
991  * Order matters here:
992  *  - Concurrent GC relies on the fact that we have a total ordering for
993  *    everything that GC walks - see  gc_will_visit_node(),
994  *    gc_will_visit_root()
995  *
996  *  - also, references move around in the course of index updates and
997  *    various other crap: everything needs to agree on the ordering
998  *    references are allowed to move around in - e.g., we're allowed to
999  *    start with a reference owned by an open_bucket (the allocator) and
1000  *    move it to the btree, but not the reverse.
1001  *
1002  *    This is necessary to ensure that gc doesn't miss references that
1003  *    move around - if references move backwards in the ordering GC
1004  *    uses, GC could skip past them
1005  */
1006 int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
1007 {
1008         struct bch_dev *ca;
1009         u64 start_time = local_clock();
1010         unsigned i, iter = 0;
1011         int ret;
1012
1013         lockdep_assert_held(&c->state_lock);
1014         trace_gc_start(c);
1015
1016         down_write(&c->gc_lock);
1017
1018         /* flush interior btree updates: */
1019         closure_wait_event(&c->btree_interior_update_wait,
1020                            !bch2_btree_interior_updates_nr_pending(c));
1021 again:
1022         ret = bch2_gc_start(c, metadata_only);
1023         if (ret)
1024                 goto out;
1025
1026         bch2_mark_superblocks(c);
1027
1028         ret = bch2_gc_btrees(c, initial, metadata_only);
1029         if (ret)
1030                 goto out;
1031
1032 #if 0
1033         bch2_mark_pending_btree_node_frees(c);
1034 #endif
1035         c->gc_count++;
1036
1037         if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
1038             (!iter && bch2_test_restart_gc)) {
1039                 /*
1040                  * XXX: make sure gens we fixed got saved
1041                  */
1042                 if (iter++ <= 2) {
1043                         bch_info(c, "Second GC pass needed, restarting:");
1044                         clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
1045                         __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
1046
1047                         percpu_down_write(&c->mark_lock);
1048                         bch2_gc_free(c);
1049                         percpu_up_write(&c->mark_lock);
1050                         /* flush fsck errors, reset counters */
1051                         bch2_flush_fsck_errs(c);
1052
1053                         goto again;
1054                 }
1055
1056                 bch_info(c, "Unable to fix bucket gens, looping");
1057                 ret = -EINVAL;
1058         }
1059 out:
1060         if (!ret) {
1061                 bch2_journal_block(&c->journal);
1062
1063                 percpu_down_write(&c->mark_lock);
1064                 ret = bch2_gc_done(c, initial, metadata_only);
1065
1066                 bch2_journal_unblock(&c->journal);
1067         } else {
1068                 percpu_down_write(&c->mark_lock);
1069         }
1070
1071         /* Indicates that gc is no longer in progress: */
1072         __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
1073
1074         bch2_gc_free(c);
1075         percpu_up_write(&c->mark_lock);
1076
1077         up_write(&c->gc_lock);
1078
1079         trace_gc_end(c);
1080         bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
1081
1082         /*
1083          * Wake up allocator in case it was waiting for buckets
1084          * because of not being able to inc gens
1085          */
1086         for_each_member_device(ca, c, i)
1087                 bch2_wake_allocator(ca);
1088
1089         /*
1090          * At startup, allocations can happen directly instead of via the
1091          * allocator thread - issue wakeup in case they blocked on gc_lock:
1092          */
1093         closure_wake_up(&c->freelist_wait);
1094         return ret;
1095 }
1096
1097 static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
1098 {
1099         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1100         const struct bch_extent_ptr *ptr;
1101
1102         percpu_down_read(&c->mark_lock);
1103         bkey_for_each_ptr(ptrs, ptr) {
1104                 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
1105                 struct bucket *g = PTR_BUCKET(ca, ptr, false);
1106
1107                 if (gen_after(g->mark.gen, ptr->gen) > 16) {
1108                         percpu_up_read(&c->mark_lock);
1109                         return true;
1110                 }
1111         }
1112
1113         bkey_for_each_ptr(ptrs, ptr) {
1114                 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
1115                 struct bucket *g = PTR_BUCKET(ca, ptr, false);
1116
1117                 if (gen_after(g->gc_gen, ptr->gen))
1118                         g->gc_gen = ptr->gen;
1119         }
1120         percpu_up_read(&c->mark_lock);
1121
1122         return false;
1123 }
1124
1125 /*
1126  * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
1127  * node pointers currently never have cached pointers that can become stale:
1128  */
1129 static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
1130 {
1131         struct btree_trans trans;
1132         struct btree_iter *iter;
1133         struct bkey_s_c k;
1134         struct bkey_buf sk;
1135         int ret = 0, commit_err = 0;
1136
1137         bch2_bkey_buf_init(&sk);
1138         bch2_trans_init(&trans, c, 0, 0);
1139
1140         iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
1141                                    BTREE_ITER_PREFETCH|
1142                                    BTREE_ITER_NOT_EXTENTS|
1143                                    BTREE_ITER_ALL_SNAPSHOTS);
1144
1145         while ((k = bch2_btree_iter_peek(iter)).k &&
1146                !(ret = bkey_err(k))) {
1147                 c->gc_gens_pos = iter->pos;
1148
1149                 if (gc_btree_gens_key(c, k) && !commit_err) {
1150                         bch2_bkey_buf_reassemble(&sk, c, k);
1151                         bch2_extent_normalize(c, bkey_i_to_s(sk.k));
1152
1153                         bch2_trans_update(&trans, iter, sk.k, 0);
1154
1155                         commit_err = bch2_trans_commit(&trans, NULL, NULL,
1156                                                        BTREE_INSERT_NOWAIT|
1157                                                        BTREE_INSERT_NOFAIL);
1158                         if (commit_err == -EINTR) {
1159                                 commit_err = 0;
1160                                 continue;
1161                         }
1162                 }
1163
1164                 bch2_btree_iter_advance(iter);
1165         }
1166         bch2_trans_iter_put(&trans, iter);
1167
1168         bch2_trans_exit(&trans);
1169         bch2_bkey_buf_exit(&sk, c);
1170
1171         return ret;
1172 }
1173
1174 int bch2_gc_gens(struct bch_fs *c)
1175 {
1176         struct bch_dev *ca;
1177         struct bucket_array *buckets;
1178         struct bucket *g;
1179         unsigned i;
1180         int ret;
1181
1182         /*
1183          * Ideally we would be using state_lock and not gc_lock here, but that
1184          * introduces a deadlock in the RO path - we currently take the state
1185          * lock at the start of going RO, thus the gc thread may get stuck:
1186          */
1187         down_read(&c->gc_lock);
1188
1189         for_each_member_device(ca, c, i) {
1190                 down_read(&ca->bucket_lock);
1191                 buckets = bucket_array(ca);
1192
1193                 for_each_bucket(g, buckets)
1194                         g->gc_gen = g->mark.gen;
1195                 up_read(&ca->bucket_lock);
1196         }
1197
1198         for (i = 0; i < BTREE_ID_NR; i++)
1199                 if ((1 << i) & BTREE_ID_HAS_PTRS) {
1200                         c->gc_gens_btree = i;
1201                         c->gc_gens_pos = POS_MIN;
1202                         ret = bch2_gc_btree_gens(c, i);
1203                         if (ret) {
1204                                 bch_err(c, "error recalculating oldest_gen: %i", ret);
1205                                 goto err;
1206                         }
1207                 }
1208
1209         for_each_member_device(ca, c, i) {
1210                 down_read(&ca->bucket_lock);
1211                 buckets = bucket_array(ca);
1212
1213                 for_each_bucket(g, buckets)
1214                         g->oldest_gen = g->gc_gen;
1215                 up_read(&ca->bucket_lock);
1216         }
1217
1218         c->gc_gens_btree        = 0;
1219         c->gc_gens_pos          = POS_MIN;
1220
1221         c->gc_count++;
1222 err:
1223         up_read(&c->gc_lock);
1224         return ret;
1225 }
1226
1227 static int bch2_gc_thread(void *arg)
1228 {
1229         struct bch_fs *c = arg;
1230         struct io_clock *clock = &c->io_clock[WRITE];
1231         unsigned long last = atomic64_read(&clock->now);
1232         unsigned last_kick = atomic_read(&c->kick_gc);
1233         int ret;
1234
1235         set_freezable();
1236
1237         while (1) {
1238                 while (1) {
1239                         set_current_state(TASK_INTERRUPTIBLE);
1240
1241                         if (kthread_should_stop()) {
1242                                 __set_current_state(TASK_RUNNING);
1243                                 return 0;
1244                         }
1245
1246                         if (atomic_read(&c->kick_gc) != last_kick)
1247                                 break;
1248
1249                         if (c->btree_gc_periodic) {
1250                                 unsigned long next = last + c->capacity / 16;
1251
1252                                 if (atomic64_read(&clock->now) >= next)
1253                                         break;
1254
1255                                 bch2_io_clock_schedule_timeout(clock, next);
1256                         } else {
1257                                 schedule();
1258                         }
1259
1260                         try_to_freeze();
1261                 }
1262                 __set_current_state(TASK_RUNNING);
1263
1264                 last = atomic64_read(&clock->now);
1265                 last_kick = atomic_read(&c->kick_gc);
1266
1267                 /*
1268                  * Full gc is currently incompatible with btree key cache:
1269                  */
1270 #if 0
1271                 ret = bch2_gc(c, false, false);
1272 #else
1273                 ret = bch2_gc_gens(c);
1274 #endif
1275                 if (ret < 0)
1276                         bch_err(c, "btree gc failed: %i", ret);
1277
1278                 debug_check_no_locks_held();
1279         }
1280
1281         return 0;
1282 }
1283
1284 void bch2_gc_thread_stop(struct bch_fs *c)
1285 {
1286         struct task_struct *p;
1287
1288         p = c->gc_thread;
1289         c->gc_thread = NULL;
1290
1291         if (p) {
1292                 kthread_stop(p);
1293                 put_task_struct(p);
1294         }
1295 }
1296
1297 int bch2_gc_thread_start(struct bch_fs *c)
1298 {
1299         struct task_struct *p;
1300
1301         if (c->gc_thread)
1302                 return 0;
1303
1304         p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
1305         if (IS_ERR(p)) {
1306                 bch_err(c, "error creating gc thread: %li", PTR_ERR(p));
1307                 return PTR_ERR(p);
1308         }
1309
1310         get_task_struct(p);
1311         c->gc_thread = p;
1312         wake_up_process(p);
1313         return 0;
1314 }