git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/btree_gc.c

   1 /*
   2  * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
   3  * Copyright (C) 2014 Datera Inc.
   4  */
   5
   6 #include "bcachefs.h"
   7 #include "alloc_background.h"
   8 #include "alloc_foreground.h"
   9 #include "bkey_methods.h"
  10 #include "btree_locking.h"
  11 #include "btree_update_interior.h"
  12 #include "btree_io.h"
  13 #include "btree_gc.h"
  14 #include "buckets.h"
  15 #include "clock.h"
  16 #include "debug.h"
  17 #include "ec.h"
  18 #include "error.h"
  19 #include "extents.h"
  20 #include "journal.h"
  21 #include "journal_io.h"
  22 #include "keylist.h"
  23 #include "move.h"
  24 #include "replicas.h"
  25 #include "super-io.h"
  26
  27 #include <linux/slab.h>
  28 #include <linux/bitops.h>
  29 #include <linux/freezer.h>
  30 #include <linux/kthread.h>
  31 #include <linux/preempt.h>
  32 #include <linux/rcupdate.h>
  33 #include <linux/sched/task.h>
  34 #include <trace/events/bcachefs.h>
  35
  36 static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
  37 {
  38         write_seqcount_begin(&c->gc_pos_lock);
  39         c->gc_pos = new_pos;
  40         write_seqcount_end(&c->gc_pos_lock);
  41 }
  42
  43 static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
  44 {
  45         BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
  46         __gc_pos_set(c, new_pos);
  47 }
  48
  49 /* range_checks - for validating min/max pos of each btree node: */
  50
  51 struct range_checks {
  52         struct range_level {
  53                 struct bpos     min;
  54                 struct bpos     max;
  55         }                       l[BTREE_MAX_DEPTH];
  56         unsigned                depth;
  57 };
  58
  59 static void btree_node_range_checks_init(struct range_checks *r, unsigned depth)
  60 {
  61         unsigned i;
  62
  63         for (i = 0; i < BTREE_MAX_DEPTH; i++)
  64                 r->l[i].min = r->l[i].max = POS_MIN;
  65         r->depth = depth;
  66 }
  67
  68 static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
  69                                     struct range_checks *r)
  70 {
  71         struct range_level *l = &r->l[b->level];
  72
  73         struct bpos expected_min = bkey_cmp(l->min, l->max)
  74                 ? btree_type_successor(b->btree_id, l->max)
  75                 : l->max;
  76
  77         bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, expected_min), c,
  78                 "btree node has incorrect min key: %llu:%llu != %llu:%llu",
  79                 b->data->min_key.inode,
  80                 b->data->min_key.offset,
  81                 expected_min.inode,
  82                 expected_min.offset);
  83
  84         l->max = b->data->max_key;
  85
  86         if (b->level > r->depth) {
  87                 l = &r->l[b->level - 1];
  88
  89                 bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, l->min), c,
  90                         "btree node min doesn't match min of child nodes: %llu:%llu != %llu:%llu",
  91                         b->data->min_key.inode,
  92                         b->data->min_key.offset,
  93                         l->min.inode,
  94                         l->min.offset);
  95
  96                 bch2_fs_inconsistent_on(bkey_cmp(b->data->max_key, l->max), c,
  97                         "btree node max doesn't match max of child nodes: %llu:%llu != %llu:%llu",
  98                         b->data->max_key.inode,
  99                         b->data->max_key.offset,
 100                         l->max.inode,
 101                         l->max.offset);
 102
 103                 if (bkey_cmp(b->data->max_key, POS_MAX))
 104                         l->min = l->max =
 105                                 btree_type_successor(b->btree_id,
 106                                                      b->data->max_key);
 107         }
 108 }
 109
 110 /* marking of btree keys/nodes: */
 111
 112 static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
 113                             u8 *max_stale, bool initial)
 114 {
 115         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
 116         const struct bch_extent_ptr *ptr;
 117         struct gc_pos pos = { 0 };
 118         unsigned flags =
 119                 BCH_BUCKET_MARK_GC|
 120                 (initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
 121         int ret = 0;
 122
 123         if (initial) {
 124                 BUG_ON(journal_seq_verify(c) &&
 125                        k.k->version.lo > journal_cur_seq(&c->journal));
 126
 127                 if (k.k->version.lo > atomic64_read(&c->key_version))
 128                         atomic64_set(&c->key_version, k.k->version.lo);
 129
 130                 if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
 131                     fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c,
 132                                 "superblock not marked as containing replicas (type %u)",
 133                                 k.k->type)) {
 134                         ret = bch2_mark_bkey_replicas(c, k);
 135                         if (ret)
 136                                 return ret;
 137                 }
 138
 139                 bkey_for_each_ptr(ptrs, ptr) {
 140                         struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
 141                         size_t b = PTR_BUCKET_NR(ca, ptr);
 142                         struct bucket *g = PTR_BUCKET(ca, ptr);
 143
 144                         if (mustfix_fsck_err_on(!g->gen_valid, c,
 145                                         "found ptr with missing gen in alloc btree,\n"
 146                                         "type %u gen %u",
 147                                         k.k->type, ptr->gen)) {
 148                                 g->_mark.gen = ptr->gen;
 149                                 g->gen_valid = 1;
 150                                 bucket_set_dirty(ca, b);
 151                         }
 152
 153                         if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
 154                                         "%u ptr gen in the future: %u > %u",
 155                                         k.k->type, ptr->gen, g->mark.gen)) {
 156                                 g->_mark.gen = ptr->gen;
 157                                 g->gen_valid = 1;
 158                                 bucket_set_dirty(ca, b);
 159                                 set_bit(BCH_FS_FIXED_GENS, &c->flags);
 160                         }
 161                 }
 162         }
 163
 164         bkey_for_each_ptr(ptrs, ptr) {
 165                 struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
 166                 size_t b = PTR_BUCKET_NR(ca, ptr);
 167
 168                 if (gen_after(ca->oldest_gens[b], ptr->gen))
 169                         ca->oldest_gens[b] = ptr->gen;
 170
 171                 *max_stale = max(*max_stale, ptr_stale(ca, ptr));
 172         }
 173
 174         bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags);
 175 fsck_err:
 176         return ret;
 177 }
 178
 179 static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
 180                               u8 *max_stale, bool initial)
 181 {
 182         struct btree_node_iter iter;
 183         struct bkey unpacked;
 184         struct bkey_s_c k;
 185         int ret = 0;
 186
 187         *max_stale = 0;
 188
 189         if (!btree_node_type_needs_gc(btree_node_type(b)))
 190                 return 0;
 191
 192         for_each_btree_node_key_unpack(b, k, &iter,
 193                                        &unpacked) {
 194                 bch2_bkey_debugcheck(c, b, k);
 195
 196                 ret = bch2_gc_mark_key(c, k, max_stale, initial);
 197                 if (ret)
 198                         break;
 199         }
 200
 201         return ret;
 202 }
 203
 204 static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 205                          bool initial)
 206 {
 207         struct btree_iter iter;
 208         struct btree *b;
 209         struct range_checks r;
 210         unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1;
 211         u8 max_stale;
 212         int ret = 0;
 213
 214         gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
 215
 216         /*
 217          * if expensive_debug_checks is on, run range_checks on all leaf nodes:
 218          *
 219          * and on startup, we have to read every btree node (XXX: only if it was
 220          * an unclean shutdown)
 221          */
 222         if (initial || expensive_debug_checks(c))
 223                 depth = 0;
 224
 225         btree_node_range_checks_init(&r, depth);
 226
 227         __for_each_btree_node(&iter, c, btree_id, POS_MIN,
 228                               0, depth, BTREE_ITER_PREFETCH, b) {
 229                 btree_node_range_checks(c, b, &r);
 230
 231                 bch2_verify_btree_nr_keys(b);
 232
 233                 ret = btree_gc_mark_node(c, b, &max_stale, initial);
 234                 if (ret)
 235                         break;
 236
 237                 gc_pos_set(c, gc_pos_btree_node(b));
 238
 239                 if (!initial) {
 240                         if (max_stale > 64)
 241                                 bch2_btree_node_rewrite(c, &iter,
 242                                                 b->data->keys.seq,
 243                                                 BTREE_INSERT_USE_RESERVE|
 244                                                 BTREE_INSERT_NOWAIT|
 245                                                 BTREE_INSERT_GC_LOCK_HELD);
 246                         else if (!btree_gc_rewrite_disabled(c) &&
 247                                  (btree_gc_always_rewrite(c) || max_stale > 16))
 248                                 bch2_btree_node_rewrite(c, &iter,
 249                                                 b->data->keys.seq,
 250                                                 BTREE_INSERT_NOWAIT|
 251                                                 BTREE_INSERT_GC_LOCK_HELD);
 252                 }
 253
 254                 bch2_btree_iter_cond_resched(&iter);
 255         }
 256         ret = bch2_btree_iter_unlock(&iter) ?: ret;
 257         if (ret)
 258                 return ret;
 259
 260         mutex_lock(&c->btree_root_lock);
 261
 262         b = c->btree_roots[btree_id].b;
 263         if (!btree_node_fake(b))
 264                 bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
 265                                  &max_stale, initial);
 266         gc_pos_set(c, gc_pos_btree_root(b->btree_id));
 267
 268         mutex_unlock(&c->btree_root_lock);
 269         return 0;
 270 }
 271
 272 static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
 273 {
 274         return  (int) btree_id_to_gc_phase(l) -
 275                 (int) btree_id_to_gc_phase(r);
 276 }
 277
 278 static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
 279                           bool initial)
 280 {
 281         enum btree_id ids[BTREE_ID_NR];
 282         u8 max_stale;
 283         unsigned i;
 284
 285         for (i = 0; i < BTREE_ID_NR; i++)
 286                 ids[i] = i;
 287         bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
 288
 289         for (i = 0; i < BTREE_ID_NR; i++) {
 290                 enum btree_id id = ids[i];
 291                 enum btree_node_type type = __btree_node_type(0, id);
 292
 293                 int ret = bch2_gc_btree(c, id, initial);
 294                 if (ret)
 295                         return ret;
 296
 297                 if (journal && btree_node_type_needs_gc(type)) {
 298                         struct bkey_i *k, *n;
 299                         struct jset_entry *j;
 300                         struct journal_replay *r;
 301                         int ret;
 302
 303                         list_for_each_entry(r, journal, list)
 304                                 for_each_jset_key(k, n, j, &r->j) {
 305                                         if (type == __btree_node_type(j->level, j->btree_id)) {
 306                                                 ret = bch2_gc_mark_key(c,
 307                                                         bkey_i_to_s_c(k),
 308                                                         &max_stale, initial);
 309                                                 if (ret)
 310                                                         return ret;
 311                                         }
 312                                 }
 313                 }
 314         }
 315
 316         return 0;
 317 }
 318
 319 static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
 320                                   u64 start, u64 end,
 321                                   enum bch_data_type type,
 322                                   unsigned flags)
 323 {
 324         u64 b = sector_to_bucket(ca, start);
 325
 326         do {
 327                 unsigned sectors =
 328                         min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
 329
 330                 bch2_mark_metadata_bucket(c, ca, b, type, sectors,
 331                                           gc_phase(GC_PHASE_SB), flags);
 332                 b++;
 333                 start += sectors;
 334         } while (start < end);
 335 }
 336
 337 void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
 338                               unsigned flags)
 339 {
 340         struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
 341         unsigned i;
 342         u64 b;
 343
 344         /*
 345          * This conditional is kind of gross, but we may be called from the
 346          * device add path, before the new device has actually been added to the
 347          * running filesystem:
 348          */
 349         if (c) {
 350                 lockdep_assert_held(&c->sb_lock);
 351                 percpu_down_read_preempt_disable(&c->mark_lock);
 352         } else {
 353                 preempt_disable();
 354         }
 355
 356         for (i = 0; i < layout->nr_superblocks; i++) {
 357                 u64 offset = le64_to_cpu(layout->sb_offset[i]);
 358
 359                 if (offset == BCH_SB_SECTOR)
 360                         mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
 361                                               BCH_DATA_SB, flags);
 362
 363                 mark_metadata_sectors(c, ca, offset,
 364                                       offset + (1 << layout->sb_max_size_bits),
 365                                       BCH_DATA_SB, flags);
 366         }
 367
 368         for (i = 0; i < ca->journal.nr; i++) {
 369                 b = ca->journal.buckets[i];
 370                 bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_JOURNAL,
 371                                           ca->mi.bucket_size,
 372                                           gc_phase(GC_PHASE_SB), flags);
 373         }
 374
 375         if (c) {
 376                 percpu_up_read_preempt_enable(&c->mark_lock);
 377         } else {
 378                 preempt_enable();
 379         }
 380 }
 381
 382 static void bch2_mark_superblocks(struct bch_fs *c)
 383 {
 384         struct bch_dev *ca;
 385         unsigned i;
 386
 387         mutex_lock(&c->sb_lock);
 388         gc_pos_set(c, gc_phase(GC_PHASE_SB));
 389
 390         for_each_online_member(ca, c, i)
 391                 bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_GC);
 392         mutex_unlock(&c->sb_lock);
 393 }
 394
 395 /* Also see bch2_pending_btree_node_free_insert_done() */
 396 static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
 397 {
 398         struct gc_pos pos = { 0 };
 399         struct btree_update *as;
 400         struct pending_btree_node_free *d;
 401
 402         mutex_lock(&c->btree_interior_update_lock);
 403         gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
 404
 405         for_each_pending_btree_node_free(c, as, d)
 406                 if (d->index_update_done)
 407                         bch2_mark_key(c, bkey_i_to_s_c(&d->key),
 408                                       true, 0,
 409                                       pos, NULL, 0,
 410                                       BCH_BUCKET_MARK_GC);
 411
 412         mutex_unlock(&c->btree_interior_update_lock);
 413 }
 414
 415 static void bch2_mark_allocator_buckets(struct bch_fs *c)
 416 {
 417         struct bch_dev *ca;
 418         struct open_bucket *ob;
 419         size_t i, j, iter;
 420         unsigned ci;
 421
 422         percpu_down_read_preempt_disable(&c->mark_lock);
 423
 424         spin_lock(&c->freelist_lock);
 425         gc_pos_set(c, gc_pos_alloc(c, NULL));
 426
 427         for_each_member_device(ca, c, ci) {
 428                 fifo_for_each_entry(i, &ca->free_inc, iter)
 429                         bch2_mark_alloc_bucket(c, ca, i, true,
 430                                                gc_pos_alloc(c, NULL),
 431                                                BCH_BUCKET_MARK_GC);
 432
 433
 434
 435                 for (j = 0; j < RESERVE_NR; j++)
 436                         fifo_for_each_entry(i, &ca->free[j], iter)
 437                                 bch2_mark_alloc_bucket(c, ca, i, true,
 438                                                        gc_pos_alloc(c, NULL),
 439                                                        BCH_BUCKET_MARK_GC);
 440         }
 441
 442         spin_unlock(&c->freelist_lock);
 443
 444         for (ob = c->open_buckets;
 445              ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
 446              ob++) {
 447                 spin_lock(&ob->lock);
 448                 if (ob->valid) {
 449                         gc_pos_set(c, gc_pos_alloc(c, ob));
 450                         ca = bch_dev_bkey_exists(c, ob->ptr.dev);
 451                         bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), true,
 452                                                gc_pos_alloc(c, ob),
 453                                                BCH_BUCKET_MARK_GC);
 454                 }
 455                 spin_unlock(&ob->lock);
 456         }
 457
 458         percpu_up_read_preempt_enable(&c->mark_lock);
 459 }
 460
 461 static void bch2_gc_free(struct bch_fs *c)
 462 {
 463         struct bch_dev *ca;
 464         unsigned i;
 465
 466         genradix_free(&c->stripes[1]);
 467
 468         for_each_member_device(ca, c, i) {
 469                 kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
 470                         sizeof(struct bucket_array) +
 471                         ca->mi.nbuckets * sizeof(struct bucket));
 472                 ca->buckets[1] = NULL;
 473
 474                 free_percpu(ca->usage[1]);
 475                 ca->usage[1] = NULL;
 476         }
 477
 478         free_percpu(c->usage[1]);
 479         c->usage[1] = NULL;
 480 }
 481
 482 static void fs_usage_reset(struct bch_fs_usage *fs_usage)
 483 {
 484         memset(&fs_usage->s.gc_start[0], 0,
 485                sizeof(*fs_usage) - offsetof(typeof(*fs_usage), s.gc_start));
 486 }
 487
 488 static void fs_usage_cpy(struct bch_fs_usage *dst,
 489                          struct bch_fs_usage *src)
 490 {
 491         memcpy(&dst->s.gc_start[0],
 492                &src->s.gc_start[0],
 493                sizeof(*dst) - offsetof(typeof(*dst), s.gc_start));
 494 }
 495
 496 static void bch2_gc_done_nocheck(struct bch_fs *c)
 497 {
 498         struct bch_dev *ca;
 499         unsigned i;
 500         int cpu;
 501
 502         {
 503                 struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
 504                 struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
 505                 struct stripe *dst, *src;
 506
 507                 c->ec_stripes_heap.used = 0;
 508
 509                 while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
 510                        (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
 511                         *dst = *src;
 512
 513                         if (dst->alive)
 514                                 bch2_stripes_heap_insert(c, dst, dst_iter.pos);
 515
 516                         genradix_iter_advance(&dst_iter, &c->stripes[0]);
 517                         genradix_iter_advance(&src_iter, &c->stripes[1]);
 518                 }
 519         }
 520
 521         for_each_member_device(ca, c, i) {
 522                 struct bucket_array *src = __bucket_array(ca, 1);
 523
 524                 memcpy(__bucket_array(ca, 0), src,
 525                        sizeof(struct bucket_array) +
 526                        sizeof(struct bucket) * src->nbuckets);
 527         };
 528
 529         for_each_member_device(ca, c, i) {
 530                 struct bch_dev_usage *p;
 531
 532                 for_each_possible_cpu(cpu) {
 533                         p = per_cpu_ptr(ca->usage[0], cpu);
 534                         memset(p, 0, sizeof(*p));
 535                 }
 536
 537                 preempt_disable();
 538                 *this_cpu_ptr(ca->usage[0]) = __bch2_dev_usage_read(ca, 1);
 539                 preempt_enable();
 540         }
 541
 542         {
 543                 struct bch_fs_usage src = __bch2_fs_usage_read(c, 1);
 544
 545                 for_each_possible_cpu(cpu)
 546                         fs_usage_reset(per_cpu_ptr(c->usage[0], cpu));
 547
 548                 preempt_disable();
 549                 fs_usage_cpy(this_cpu_ptr(c->usage[0]), &src);
 550                 preempt_enable();
 551         }
 552
 553 }
 554
 555 static void bch2_gc_done(struct bch_fs *c, bool initial)
 556 {
 557         struct bch_dev *ca;
 558         unsigned i;
 559         int cpu;
 560
 561 #define copy_field(_f, _msg, ...)                                       \
 562         if (dst._f != src._f) {                                         \
 563                 bch_err(c, _msg ": got %llu, should be %llu, fixing"\
 564                         , ##__VA_ARGS__, dst._f, src._f);               \
 565                 dst._f = src._f;                                        \
 566         }
 567 #define copy_stripe_field(_f, _msg, ...)                                \
 568         if (dst->_f != src->_f) {                                       \
 569                 bch_err_ratelimited(c, "stripe %zu has wrong "_msg      \
 570                         ": got %u, should be %u, fixing",               \
 571                         dst_iter.pos, ##__VA_ARGS__,                    \
 572                         dst->_f, src->_f);                              \
 573                 dst->_f = src->_f;                                      \
 574         }
 575 #define copy_bucket_field(_f)                                           \
 576         if (dst->b[b].mark._f != src->b[b].mark._f) {                   \
 577                 bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
 578                         ": got %u, should be %u, fixing",               \
 579                         i, b, dst->b[b].mark._f, src->b[b].mark._f);    \
 580                 dst->b[b]._mark._f = src->b[b].mark._f;                 \
 581         }
 582 #define copy_dev_field(_f, _msg, ...)                                   \
 583         copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
 584 #define copy_fs_field(_f, _msg, ...)                                    \
 585         copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
 586
 587         percpu_down_write(&c->mark_lock);
 588
 589         if (initial) {
 590                 bch2_gc_done_nocheck(c);
 591                 goto out;
 592         }
 593
 594         {
 595                 struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
 596                 struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
 597                 struct stripe *dst, *src;
 598                 unsigned i;
 599
 600                 c->ec_stripes_heap.used = 0;
 601
 602                 while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
 603                        (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
 604                         copy_stripe_field(alive,        "alive");
 605                         copy_stripe_field(sectors,      "sectors");
 606                         copy_stripe_field(algorithm,    "algorithm");
 607                         copy_stripe_field(nr_blocks,    "nr_blocks");
 608                         copy_stripe_field(nr_redundant, "nr_redundant");
 609                         copy_stripe_field(blocks_nonempty.counter,
 610                                           "blocks_nonempty");
 611
 612                         for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
 613                                 copy_stripe_field(block_sectors[i].counter,
 614                                                   "block_sectors[%u]", i);
 615
 616                         if (dst->alive)
 617                                 bch2_stripes_heap_insert(c, dst, dst_iter.pos);
 618
 619                         genradix_iter_advance(&dst_iter, &c->stripes[0]);
 620                         genradix_iter_advance(&src_iter, &c->stripes[1]);
 621                 }
 622         }
 623
 624         for_each_member_device(ca, c, i) {
 625                 struct bucket_array *dst = __bucket_array(ca, 0);
 626                 struct bucket_array *src = __bucket_array(ca, 1);
 627                 size_t b;
 628
 629                 if (initial) {
 630                         memcpy(dst, src,
 631                                sizeof(struct bucket_array) +
 632                                sizeof(struct bucket) * dst->nbuckets);
 633                 }
 634
 635                 for (b = 0; b < src->nbuckets; b++) {
 636                         copy_bucket_field(gen);
 637                         copy_bucket_field(data_type);
 638                         copy_bucket_field(owned_by_allocator);
 639                         copy_bucket_field(stripe);
 640                         copy_bucket_field(dirty_sectors);
 641                         copy_bucket_field(cached_sectors);
 642                 }
 643         };
 644
 645         for_each_member_device(ca, c, i) {
 646                 struct bch_dev_usage dst = __bch2_dev_usage_read(ca, 0);
 647                 struct bch_dev_usage src = __bch2_dev_usage_read(ca, 1);
 648                 struct bch_dev_usage *p;
 649                 unsigned b;
 650
 651                 for (b = 0; b < BCH_DATA_NR; b++)
 652                         copy_dev_field(buckets[b],
 653                                        "buckets[%s]", bch2_data_types[b]);
 654                 copy_dev_field(buckets_alloc, "buckets_alloc");
 655                 copy_dev_field(buckets_ec, "buckets_ec");
 656
 657                 for (b = 0; b < BCH_DATA_NR; b++)
 658                         copy_dev_field(sectors[b],
 659                                        "sectors[%s]", bch2_data_types[b]);
 660                 copy_dev_field(sectors_fragmented,
 661                                "sectors_fragmented");
 662
 663                 for_each_possible_cpu(cpu) {
 664                         p = per_cpu_ptr(ca->usage[0], cpu);
 665                         memset(p, 0, sizeof(*p));
 666                 }
 667
 668                 preempt_disable();
 669                 p = this_cpu_ptr(ca->usage[0]);
 670                 *p = dst;
 671                 preempt_enable();
 672         }
 673
 674         {
 675                 struct bch_fs_usage dst = __bch2_fs_usage_read(c, 0);
 676                 struct bch_fs_usage src = __bch2_fs_usage_read(c, 1);
 677                 unsigned r, b;
 678
 679                 copy_fs_field(s.hidden,         "hidden");
 680                 copy_fs_field(s.data,           "data");
 681                 copy_fs_field(s.cached,         "cached");
 682                 copy_fs_field(s.reserved,       "reserved");
 683                 copy_fs_field(s.nr_inodes,      "nr_inodes");
 684
 685                 for (r = 0; r < BCH_REPLICAS_MAX; r++) {
 686                         for (b = 0; b < BCH_DATA_NR; b++)
 687                                 copy_fs_field(replicas[r].data[b],
 688                                               "replicas[%i].data[%s]",
 689                                               r, bch2_data_types[b]);
 690                         copy_fs_field(replicas[r].ec_data,
 691                                       "replicas[%i].ec_data", r);
 692                         copy_fs_field(replicas[r].persistent_reserved,
 693                                       "replicas[%i].persistent_reserved", r);
 694                 }
 695
 696                 for (b = 0; b < BCH_DATA_NR; b++)
 697                         copy_fs_field(buckets[b],
 698                                       "buckets[%s]", bch2_data_types[b]);
 699
 700                 for_each_possible_cpu(cpu)
 701                         fs_usage_reset(per_cpu_ptr(c->usage[0], cpu));
 702
 703                 preempt_disable();
 704                 fs_usage_cpy(this_cpu_ptr(c->usage[0]), &dst);
 705                 preempt_enable();
 706         }
 707 out:
 708         percpu_up_write(&c->mark_lock);
 709
 710 #undef copy_fs_field
 711 #undef copy_dev_field
 712 #undef copy_bucket_field
 713 #undef copy_stripe_field
 714 #undef copy_field
 715 }
 716
 717 static int bch2_gc_start(struct bch_fs *c)
 718 {
 719         struct bch_dev *ca;
 720         unsigned i;
 721
 722         /*
 723          * indicate to stripe code that we need to allocate for the gc stripes
 724          * radix tree, too
 725          */
 726         gc_pos_set(c, gc_phase(GC_PHASE_START));
 727
 728         BUG_ON(c->usage[1]);
 729
 730         c->usage[1] = alloc_percpu(struct bch_fs_usage);
 731         if (!c->usage[1])
 732                 return -ENOMEM;
 733
 734         for_each_member_device(ca, c, i) {
 735                 BUG_ON(ca->buckets[1]);
 736                 BUG_ON(ca->usage[1]);
 737
 738                 ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
 739                                 ca->mi.nbuckets * sizeof(struct bucket),
 740                                 GFP_KERNEL|__GFP_ZERO);
 741                 if (!ca->buckets[1]) {
 742                         percpu_ref_put(&ca->ref);
 743                         return -ENOMEM;
 744                 }
 745
 746                 ca->usage[1] = alloc_percpu(struct bch_dev_usage);
 747                 if (!ca->usage[1]) {
 748                         percpu_ref_put(&ca->ref);
 749                         return -ENOMEM;
 750                 }
 751         }
 752
 753         percpu_down_write(&c->mark_lock);
 754
 755         for_each_member_device(ca, c, i) {
 756                 struct bucket_array *dst = __bucket_array(ca, 1);
 757                 struct bucket_array *src = __bucket_array(ca, 0);
 758                 size_t b;
 759
 760                 dst->first_bucket       = src->first_bucket;
 761                 dst->nbuckets           = src->nbuckets;
 762
 763                 for (b = 0; b < src->nbuckets; b++)
 764                         dst->b[b]._mark.gen = src->b[b].mark.gen;
 765         };
 766
 767         percpu_up_write(&c->mark_lock);
 768
 769         return bch2_ec_mem_alloc(c, true);
 770 }
 771
 772 /**
 773  * bch2_gc - walk _all_ references to buckets, and recompute them:
 774  *
 775  * Order matters here:
 776  *  - Concurrent GC relies on the fact that we have a total ordering for
 777  *    everything that GC walks - see  gc_will_visit_node(),
 778  *    gc_will_visit_root()
 779  *
 780  *  - also, references move around in the course of index updates and
 781  *    various other crap: everything needs to agree on the ordering
 782  *    references are allowed to move around in - e.g., we're allowed to
 783  *    start with a reference owned by an open_bucket (the allocator) and
 784  *    move it to the btree, but not the reverse.
 785  *
 786  *    This is necessary to ensure that gc doesn't miss references that
 787  *    move around - if references move backwards in the ordering GC
 788  *    uses, GC could skip past them
 789  */
 790 int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
 791 {
 792         struct bch_dev *ca;
 793         u64 start_time = local_clock();
 794         unsigned i, iter = 0;
 795         int ret;
 796
 797         trace_gc_start(c);
 798
 799         down_write(&c->gc_lock);
 800 again:
 801         ret = bch2_gc_start(c);
 802         if (ret)
 803                 goto out;
 804
 805         bch2_mark_superblocks(c);
 806
 807         ret = bch2_gc_btrees(c, journal, initial);
 808         if (ret)
 809                 goto out;
 810
 811         bch2_mark_pending_btree_node_frees(c);
 812         bch2_mark_allocator_buckets(c);
 813
 814         c->gc_count++;
 815 out:
 816         if (!ret && test_bit(BCH_FS_FIXED_GENS, &c->flags)) {
 817                 /*
 818                  * XXX: make sure gens we fixed got saved
 819                  */
 820                 if (iter++ <= 2) {
 821                         bch_info(c, "Fixed gens, restarting mark and sweep:");
 822                         clear_bit(BCH_FS_FIXED_GENS, &c->flags);
 823                         goto again;
 824                 }
 825
 826                 bch_info(c, "Unable to fix bucket gens, looping");
 827                 ret = -EINVAL;
 828         }
 829
 830         if (!ret)
 831                 bch2_gc_done(c, initial);
 832
 833         /* Indicates that gc is no longer in progress: */
 834         __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
 835
 836         bch2_gc_free(c);
 837         up_write(&c->gc_lock);
 838
 839         if (!ret && initial)
 840                 set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
 841
 842         trace_gc_end(c);
 843         bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
 844
 845         /*
 846          * Wake up allocator in case it was waiting for buckets
 847          * because of not being able to inc gens
 848          */
 849         for_each_member_device(ca, c, i)
 850                 bch2_wake_allocator(ca);
 851
 852         /*
 853          * At startup, allocations can happen directly instead of via the
 854          * allocator thread - issue wakeup in case they blocked on gc_lock:
 855          */
 856         closure_wake_up(&c->freelist_wait);
 857         return ret;
 858 }
 859
 860 /* Btree coalescing */
 861
 862 static void recalc_packed_keys(struct btree *b)
 863 {
 864         struct bset *i = btree_bset_first(b);
 865         struct bkey_packed *k;
 866
 867         memset(&b->nr, 0, sizeof(b->nr));
 868
 869         BUG_ON(b->nsets != 1);
 870
 871         vstruct_for_each(i, k)
 872                 btree_keys_account_key_add(&b->nr, 0, k);
 873 }
 874
 875 static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
 876                                 struct btree *old_nodes[GC_MERGE_NODES])
 877 {
 878         struct btree *parent = btree_node_parent(iter, old_nodes[0]);
 879         unsigned i, nr_old_nodes, nr_new_nodes, u64s = 0;
 880         unsigned blocks = btree_blocks(c) * 2 / 3;
 881         struct btree *new_nodes[GC_MERGE_NODES];
 882         struct btree_update *as;
 883         struct keylist keylist;
 884         struct bkey_format_state format_state;
 885         struct bkey_format new_format;
 886
 887         memset(new_nodes, 0, sizeof(new_nodes));
 888         bch2_keylist_init(&keylist, NULL);
 889
 890         /* Count keys that are not deleted */
 891         for (i = 0; i < GC_MERGE_NODES && old_nodes[i]; i++)
 892                 u64s += old_nodes[i]->nr.live_u64s;
 893
 894         nr_old_nodes = nr_new_nodes = i;
 895
 896         /* Check if all keys in @old_nodes could fit in one fewer node */
 897         if (nr_old_nodes <= 1 ||
 898             __vstruct_blocks(struct btree_node, c->block_bits,
 899                              DIV_ROUND_UP(u64s, nr_old_nodes - 1)) > blocks)
 900                 return;
 901
 902         /* Find a format that all keys in @old_nodes can pack into */
 903         bch2_bkey_format_init(&format_state);
 904
 905         for (i = 0; i < nr_old_nodes; i++)
 906                 __bch2_btree_calc_format(&format_state, old_nodes[i]);
 907
 908         new_format = bch2_bkey_format_done(&format_state);
 909
 910         /* Check if repacking would make any nodes too big to fit */
 911         for (i = 0; i < nr_old_nodes; i++)
 912                 if (!bch2_btree_node_format_fits(c, old_nodes[i], &new_format)) {
 913                         trace_btree_gc_coalesce_fail(c,
 914                                         BTREE_GC_COALESCE_FAIL_FORMAT_FITS);
 915                         return;
 916                 }
 917
 918         if (bch2_keylist_realloc(&keylist, NULL, 0,
 919                         (BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
 920                 trace_btree_gc_coalesce_fail(c,
 921                                 BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC);
 922                 return;
 923         }
 924
 925         as = bch2_btree_update_start(c, iter->btree_id,
 926                         btree_update_reserve_required(c, parent) + nr_old_nodes,
 927                         BTREE_INSERT_NOFAIL|
 928                         BTREE_INSERT_USE_RESERVE,
 929                         NULL);
 930         if (IS_ERR(as)) {
 931                 trace_btree_gc_coalesce_fail(c,
 932                                 BTREE_GC_COALESCE_FAIL_RESERVE_GET);
 933                 bch2_keylist_free(&keylist, NULL);
 934                 return;
 935         }
 936
 937         trace_btree_gc_coalesce(c, old_nodes[0]);
 938
 939         for (i = 0; i < nr_old_nodes; i++)
 940                 bch2_btree_interior_update_will_free_node(as, old_nodes[i]);
 941
 942         /* Repack everything with @new_format and sort down to one bset */
 943         for (i = 0; i < nr_old_nodes; i++)
 944                 new_nodes[i] =
 945                         __bch2_btree_node_alloc_replacement(as, old_nodes[i],
 946                                                             new_format);
 947
 948         /*
 949          * Conceptually we concatenate the nodes together and slice them
 950          * up at different boundaries.
 951          */
 952         for (i = nr_new_nodes - 1; i > 0; --i) {
 953                 struct btree *n1 = new_nodes[i];
 954                 struct btree *n2 = new_nodes[i - 1];
 955
 956                 struct bset *s1 = btree_bset_first(n1);
 957                 struct bset *s2 = btree_bset_first(n2);
 958                 struct bkey_packed *k, *last = NULL;
 959
 960                 /* Calculate how many keys from @n2 we could fit inside @n1 */
 961                 u64s = 0;
 962
 963                 for (k = s2->start;
 964                      k < vstruct_last(s2) &&
 965                      vstruct_blocks_plus(n1->data, c->block_bits,
 966                                          u64s + k->u64s) <= blocks;
 967                      k = bkey_next(k)) {
 968                         last = k;
 969                         u64s += k->u64s;
 970                 }
 971
 972                 if (u64s == le16_to_cpu(s2->u64s)) {
 973                         /* n2 fits entirely in n1 */
 974                         n1->key.k.p = n1->data->max_key = n2->data->max_key;
 975
 976                         memcpy_u64s(vstruct_last(s1),
 977                                     s2->start,
 978                                     le16_to_cpu(s2->u64s));
 979                         le16_add_cpu(&s1->u64s, le16_to_cpu(s2->u64s));
 980
 981                         set_btree_bset_end(n1, n1->set);
 982
 983                         six_unlock_write(&n2->lock);
 984                         bch2_btree_node_free_never_inserted(c, n2);
 985                         six_unlock_intent(&n2->lock);
 986
 987                         memmove(new_nodes + i - 1,
 988                                 new_nodes + i,
 989                                 sizeof(new_nodes[0]) * (nr_new_nodes - i));
 990                         new_nodes[--nr_new_nodes] = NULL;
 991                 } else if (u64s) {
 992                         /* move part of n2 into n1 */
 993                         n1->key.k.p = n1->data->max_key =
 994                                 bkey_unpack_pos(n1, last);
 995
 996                         n2->data->min_key =
 997                                 btree_type_successor(iter->btree_id,
 998                                                      n1->data->max_key);
 999
1000                         memcpy_u64s(vstruct_last(s1),
1001                                     s2->start, u64s);
1002                         le16_add_cpu(&s1->u64s, u64s);
1003
1004                         memmove(s2->start,
1005                                 vstruct_idx(s2, u64s),
1006                                 (le16_to_cpu(s2->u64s) - u64s) * sizeof(u64));
1007                         s2->u64s = cpu_to_le16(le16_to_cpu(s2->u64s) - u64s);
1008
1009                         set_btree_bset_end(n1, n1->set);
1010                         set_btree_bset_end(n2, n2->set);
1011                 }
1012         }
1013
1014         for (i = 0; i < nr_new_nodes; i++) {
1015                 struct btree *n = new_nodes[i];
1016
1017                 recalc_packed_keys(n);
1018                 btree_node_reset_sib_u64s(n);
1019
1020                 bch2_btree_build_aux_trees(n);
1021                 six_unlock_write(&n->lock);
1022
1023                 bch2_btree_node_write(c, n, SIX_LOCK_intent);
1024         }
1025
1026         /*
1027          * The keys for the old nodes get deleted. We don't want to insert keys
1028          * that compare equal to the keys for the new nodes we'll also be
1029          * inserting - we can't because keys on a keylist must be strictly
1030          * greater than the previous keys, and we also don't need to since the
1031          * key for the new node will serve the same purpose (overwriting the key
1032          * for the old node).
1033          */
1034         for (i = 0; i < nr_old_nodes; i++) {
1035                 struct bkey_i delete;
1036                 unsigned j;
1037
1038                 for (j = 0; j < nr_new_nodes; j++)
1039                         if (!bkey_cmp(old_nodes[i]->key.k.p,
1040                                       new_nodes[j]->key.k.p))
1041                                 goto next;
1042
1043                 bkey_init(&delete.k);
1044                 delete.k.p = old_nodes[i]->key.k.p;
1045                 bch2_keylist_add_in_order(&keylist, &delete);
1046 next:
1047                 i = i;
1048         }
1049
1050         /*
1051          * Keys for the new nodes get inserted: bch2_btree_insert_keys() only
1052          * does the lookup once and thus expects the keys to be in sorted order
1053          * so we have to make sure the new keys are correctly ordered with
1054          * respect to the deleted keys added in the previous loop
1055          */
1056         for (i = 0; i < nr_new_nodes; i++)
1057                 bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key);
1058
1059         /* Insert the newly coalesced nodes */
1060         bch2_btree_insert_node(as, parent, iter, &keylist, 0);
1061
1062         BUG_ON(!bch2_keylist_empty(&keylist));
1063
1064         BUG_ON(iter->l[old_nodes[0]->level].b != old_nodes[0]);
1065
1066         bch2_btree_iter_node_replace(iter, new_nodes[0]);
1067
1068         for (i = 0; i < nr_new_nodes; i++)
1069                 bch2_open_buckets_put(c, &new_nodes[i]->ob);
1070
1071         /* Free the old nodes and update our sliding window */
1072         for (i = 0; i < nr_old_nodes; i++) {
1073                 bch2_btree_node_free_inmem(c, old_nodes[i], iter);
1074
1075                 /*
1076                  * the index update might have triggered a split, in which case
1077                  * the nodes we coalesced - the new nodes we just created -
1078                  * might not be sibling nodes anymore - don't add them to the
1079                  * sliding window (except the first):
1080                  */
1081                 if (!i) {
1082                         old_nodes[i] = new_nodes[i];
1083                 } else {
1084                         old_nodes[i] = NULL;
1085                         if (new_nodes[i])
1086                                 six_unlock_intent(&new_nodes[i]->lock);
1087                 }
1088         }
1089
1090         bch2_btree_update_done(as);
1091         bch2_keylist_free(&keylist, NULL);
1092 }
1093
1094 static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
1095 {
1096         struct btree_iter iter;
1097         struct btree *b;
1098         bool kthread = (current->flags & PF_KTHREAD) != 0;
1099         unsigned i;
1100
1101         /* Sliding window of adjacent btree nodes */
1102         struct btree *merge[GC_MERGE_NODES];
1103         u32 lock_seq[GC_MERGE_NODES];
1104
1105         /*
1106          * XXX: We don't have a good way of positively matching on sibling nodes
1107          * that have the same parent - this code works by handling the cases
1108          * where they might not have the same parent, and is thus fragile. Ugh.
1109          *
1110          * Perhaps redo this to use multiple linked iterators?
1111          */
1112         memset(merge, 0, sizeof(merge));
1113
1114         __for_each_btree_node(&iter, c, btree_id, POS_MIN,
1115                               BTREE_MAX_DEPTH, 0,
1116                               BTREE_ITER_PREFETCH, b) {
1117                 memmove(merge + 1, merge,
1118                         sizeof(merge) - sizeof(merge[0]));
1119                 memmove(lock_seq + 1, lock_seq,
1120                         sizeof(lock_seq) - sizeof(lock_seq[0]));
1121
1122                 merge[0] = b;
1123
1124                 for (i = 1; i < GC_MERGE_NODES; i++) {
1125                         if (!merge[i] ||
1126                             !six_relock_intent(&merge[i]->lock, lock_seq[i]))
1127                                 break;
1128
1129                         if (merge[i]->level != merge[0]->level) {
1130                                 six_unlock_intent(&merge[i]->lock);
1131                                 break;
1132                         }
1133                 }
1134                 memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0]));
1135
1136                 bch2_coalesce_nodes(c, &iter, merge);
1137
1138                 for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) {
1139                         lock_seq[i] = merge[i]->lock.state.seq;
1140                         six_unlock_intent(&merge[i]->lock);
1141                 }
1142
1143                 lock_seq[0] = merge[0]->lock.state.seq;
1144
1145                 if (kthread && kthread_should_stop()) {
1146                         bch2_btree_iter_unlock(&iter);
1147                         return -ESHUTDOWN;
1148                 }
1149
1150                 bch2_btree_iter_cond_resched(&iter);
1151
1152                 /*
1153                  * If the parent node wasn't relocked, it might have been split
1154                  * and the nodes in our sliding window might not have the same
1155                  * parent anymore - blow away the sliding window:
1156                  */
1157                 if (btree_iter_node(&iter, iter.level + 1) &&
1158                     !btree_node_intent_locked(&iter, iter.level + 1))
1159                         memset(merge + 1, 0,
1160                                (GC_MERGE_NODES - 1) * sizeof(merge[0]));
1161         }
1162         return bch2_btree_iter_unlock(&iter);
1163 }
1164
1165 /**
1166  * bch_coalesce - coalesce adjacent nodes with low occupancy
1167  */
1168 void bch2_coalesce(struct bch_fs *c)
1169 {
1170         enum btree_id id;
1171
1172         down_read(&c->gc_lock);
1173         trace_gc_coalesce_start(c);
1174
1175         for (id = 0; id < BTREE_ID_NR; id++) {
1176                 int ret = c->btree_roots[id].b
1177                         ? bch2_coalesce_btree(c, id)
1178                         : 0;
1179
1180                 if (ret) {
1181                         if (ret != -ESHUTDOWN)
1182                                 bch_err(c, "btree coalescing failed: %d", ret);
1183                         return;
1184                 }
1185         }
1186
1187         trace_gc_coalesce_end(c);
1188         up_read(&c->gc_lock);
1189 }
1190
1191 static int bch2_gc_thread(void *arg)
1192 {
1193         struct bch_fs *c = arg;
1194         struct io_clock *clock = &c->io_clock[WRITE];
1195         unsigned long last = atomic_long_read(&clock->now);
1196         unsigned last_kick = atomic_read(&c->kick_gc);
1197         int ret;
1198
1199         set_freezable();
1200
1201         while (1) {
1202                 while (1) {
1203                         set_current_state(TASK_INTERRUPTIBLE);
1204
1205                         if (kthread_should_stop()) {
1206                                 __set_current_state(TASK_RUNNING);
1207                                 return 0;
1208                         }
1209
1210                         if (atomic_read(&c->kick_gc) != last_kick)
1211                                 break;
1212
1213                         if (c->btree_gc_periodic) {
1214                                 unsigned long next = last + c->capacity / 16;
1215
1216                                 if (atomic_long_read(&clock->now) >= next)
1217                                         break;
1218
1219                                 bch2_io_clock_schedule_timeout(clock, next);
1220                         } else {
1221                                 schedule();
1222                         }
1223
1224                         try_to_freeze();
1225                 }
1226                 __set_current_state(TASK_RUNNING);
1227
1228                 last = atomic_long_read(&clock->now);
1229                 last_kick = atomic_read(&c->kick_gc);
1230
1231                 ret = bch2_gc(c, NULL, false);
1232                 if (ret)
1233                         bch_err(c, "btree gc failed: %i", ret);
1234
1235                 debug_check_no_locks_held();
1236         }
1237
1238         return 0;
1239 }
1240
1241 void bch2_gc_thread_stop(struct bch_fs *c)
1242 {
1243         struct task_struct *p;
1244
1245         p = c->gc_thread;
1246         c->gc_thread = NULL;
1247
1248         if (p) {
1249                 kthread_stop(p);
1250                 put_task_struct(p);
1251         }
1252 }
1253
1254 int bch2_gc_thread_start(struct bch_fs *c)
1255 {
1256         struct task_struct *p;
1257
1258         BUG_ON(c->gc_thread);
1259
1260         p = kthread_create(bch2_gc_thread, c, "bch_gc");
1261         if (IS_ERR(p))
1262                 return PTR_ERR(p);
1263
1264         get_task_struct(p);
1265         c->gc_thread = p;
1266         wake_up_process(p);
1267         return 0;
1268 }
1269
1270 /* Initial GC computes bucket marks during startup */
1271
1272 int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
1273 {
1274         int ret = bch2_gc(c, journal, true);
1275
1276         /*
1277          * Skip past versions that might have possibly been used (as nonces),
1278          * but hadn't had their pointers written:
1279          */
1280         if (c->sb.encryption_type)
1281                 atomic64_add(1 << 16, &c->key_version);
1282
1283         return ret;
1284 }