+ start += sectors;
+ } while (start < end);
+}
+
+static void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
+ unsigned flags)
+{
+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+ unsigned i;
+ u64 b;
+
+ for (i = 0; i < layout->nr_superblocks; i++) {
+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
+
+ if (offset == BCH_SB_SECTOR)
+ mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
+ BCH_DATA_sb, flags);
+
+ mark_metadata_sectors(c, ca, offset,
+ offset + (1 << layout->sb_max_size_bits),
+ BCH_DATA_sb, flags);
+ }
+
+ for (i = 0; i < ca->journal.nr; i++) {
+ b = ca->journal.buckets[i];
+ bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
+ ca->mi.bucket_size,
+ gc_phase(GC_PHASE_SB), flags);
+ }
+}
+
+static void bch2_mark_superblocks(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned i;
+
+ mutex_lock(&c->sb_lock);
+ gc_pos_set(c, gc_phase(GC_PHASE_SB));
+
+ for_each_online_member(ca, c, i)
+ bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
+ mutex_unlock(&c->sb_lock);
+}
+
+#if 0
+/* Also see bch2_pending_btree_node_free_insert_done() */
+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
+{
+ struct btree_update *as;
+ struct pending_btree_node_free *d;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
+
+ for_each_pending_btree_node_free(c, as, d)
+ if (d->index_update_done)
+ bch2_mark_key(c, bkey_i_to_s_c(&d->key), BTREE_TRIGGER_GC);
+
+ mutex_unlock(&c->btree_interior_update_lock);
+}
+#endif
+
+static void bch2_gc_free(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned i;
+
+ genradix_free(&c->reflink_gc_table);
+ genradix_free(&c->gc_stripes);
+
+ for_each_member_device(ca, c, i) {
+ kvpfree(rcu_dereference_protected(ca->buckets_gc, 1),
+ sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket));
+ ca->buckets_gc = NULL;
+
+ free_percpu(ca->usage_gc);
+ ca->usage_gc = NULL;
+ }
+
+ free_percpu(c->usage_gc);
+ c->usage_gc = NULL;
+}
+
+static int bch2_gc_done(struct bch_fs *c,
+ bool initial, bool metadata_only)
+{
+ struct bch_dev *ca = NULL;
+ struct printbuf buf = PRINTBUF;
+ bool verify = !metadata_only &&
+ !c->opts.reconstruct_alloc &&
+ (!initial || (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)));
+ unsigned i, dev;
+ int ret = 0;
+
+ percpu_down_write(&c->mark_lock);
+
+#define copy_field(_err, _f, _msg, ...) \
+ if (dst->_f != src->_f && \
+ (!verify || \
+ fsck_err(c, _err, _msg ": got %llu, should be %llu" \
+ , ##__VA_ARGS__, dst->_f, src->_f))) \
+ dst->_f = src->_f
+#define copy_dev_field(_err, _f, _msg, ...) \
+ copy_field(_err, _f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
+#define copy_fs_field(_err, _f, _msg, ...) \
+ copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__)
+
+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+ bch2_fs_usage_acc_to_base(c, i);
+
+ for_each_member_device(ca, c, dev) {
+ struct bch_dev_usage *dst = ca->usage_base;
+ struct bch_dev_usage *src = (void *)
+ bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc,
+ dev_usage_u64s());
+
+ for (i = 0; i < BCH_DATA_NR; i++) {
+ copy_dev_field(dev_usage_buckets_wrong,
+ d[i].buckets, "%s buckets", bch2_data_types[i]);
+ copy_dev_field(dev_usage_sectors_wrong,
+ d[i].sectors, "%s sectors", bch2_data_types[i]);
+ copy_dev_field(dev_usage_fragmented_wrong,
+ d[i].fragmented, "%s fragmented", bch2_data_types[i]);
+ }
+ }
+
+ {
+ unsigned nr = fs_usage_u64s(c);
+ struct bch_fs_usage *dst = c->usage_base;
+ struct bch_fs_usage *src = (void *)
+ bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
+
+ copy_fs_field(fs_usage_hidden_wrong,
+ hidden, "hidden");
+ copy_fs_field(fs_usage_btree_wrong,
+ btree, "btree");
+
+ if (!metadata_only) {
+ copy_fs_field(fs_usage_data_wrong,
+ data, "data");
+ copy_fs_field(fs_usage_cached_wrong,
+ cached, "cached");
+ copy_fs_field(fs_usage_reserved_wrong,
+ reserved, "reserved");
+ copy_fs_field(fs_usage_nr_inodes_wrong,
+ nr_inodes,"nr_inodes");
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
+ copy_fs_field(fs_usage_persistent_reserved_wrong,
+ persistent_reserved[i],
+ "persistent_reserved[%i]", i);
+ }
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ if (metadata_only &&
+ (e->data_type == BCH_DATA_user ||
+ e->data_type == BCH_DATA_cached))
+ continue;
+
+ printbuf_reset(&buf);
+ bch2_replicas_entry_to_text(&buf, e);
+
+ copy_fs_field(fs_usage_replicas_wrong,
+ replicas[i], "%s", buf.buf);
+ }
+ }
+
+#undef copy_fs_field
+#undef copy_dev_field
+#undef copy_stripe_field
+#undef copy_field
+fsck_err:
+ if (ca)
+ percpu_ref_put(&ca->ref);
+ if (ret)
+ bch_err_fn(c, ret);
+
+ percpu_up_write(&c->mark_lock);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int bch2_gc_start(struct bch_fs *c)
+{
+ struct bch_dev *ca = NULL;
+ unsigned i;
+
+ BUG_ON(c->usage_gc);
+
+ c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
+ sizeof(u64), GFP_KERNEL);
+ if (!c->usage_gc) {
+ bch_err(c, "error allocating c->usage_gc");
+ return -BCH_ERR_ENOMEM_gc_start;
+ }
+
+ for_each_member_device(ca, c, i) {
+ BUG_ON(ca->usage_gc);
+
+ ca->usage_gc = alloc_percpu(struct bch_dev_usage);
+ if (!ca->usage_gc) {
+ bch_err(c, "error allocating ca->usage_gc");
+ percpu_ref_put(&ca->ref);
+ return -BCH_ERR_ENOMEM_gc_start;
+ }
+
+ this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets,
+ ca->mi.nbuckets - ca->mi.first_bucket);
+ }
+
+ return 0;
+}
+
+static int bch2_gc_reset(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned i;
+
+ for_each_member_device(ca, c, i) {
+ free_percpu(ca->usage_gc);
+ ca->usage_gc = NULL;
+ }
+
+ free_percpu(c->usage_gc);
+ c->usage_gc = NULL;
+
+ return bch2_gc_start(c);
+}
+
+/* returns true if not equal */
+static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
+ struct bch_alloc_v4 r)
+{
+ return l.gen != r.gen ||
+ l.oldest_gen != r.oldest_gen ||
+ l.data_type != r.data_type ||
+ l.dirty_sectors != r.dirty_sectors ||
+ l.cached_sectors != r.cached_sectors ||
+ l.stripe_redundancy != r.stripe_redundancy ||
+ l.stripe != r.stripe;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ bool metadata_only)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
+ struct bucket gc, *b;
+ struct bkey_i_alloc_v4 *a;
+ struct bch_alloc_v4 old_convert, new;
+ const struct bch_alloc_v4 *old;
+ enum bch_data_type type;
+ int ret;
+
+ if (bkey_ge(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets)))
+ return 1;
+
+ old = bch2_alloc_to_v4(k, &old_convert);
+ new = *old;
+
+ percpu_down_read(&c->mark_lock);
+ b = gc_bucket(ca, iter->pos.offset);
+
+ /*
+ * b->data_type doesn't yet include need_discard & need_gc_gen states -
+ * fix that here:
+ */
+ type = __alloc_data_type(b->dirty_sectors,
+ b->cached_sectors,
+ b->stripe,
+ *old,
+ b->data_type);
+ if (b->data_type != type) {
+ struct bch_dev_usage *u;
+
+ preempt_disable();
+ u = this_cpu_ptr(ca->usage_gc);
+ u->d[b->data_type].buckets--;
+ b->data_type = type;
+ u->d[b->data_type].buckets++;
+ preempt_enable();
+ }
+
+ gc = *b;
+ percpu_up_read(&c->mark_lock);
+
+ if (metadata_only &&
+ gc.data_type != BCH_DATA_sb &&
+ gc.data_type != BCH_DATA_journal &&
+ gc.data_type != BCH_DATA_btree)
+ return 0;
+
+ if (gen_after(old->gen, gc.gen))
+ return 0;
+
+ if (c->opts.reconstruct_alloc ||
+ fsck_err_on(new.data_type != gc.data_type, c,
+ alloc_key_data_type_wrong,
+ "bucket %llu:%llu gen %u has wrong data_type"
+ ": got %s, should be %s",
+ iter->pos.inode, iter->pos.offset,
+ gc.gen,
+ bch2_data_types[new.data_type],
+ bch2_data_types[gc.data_type]))
+ new.data_type = gc.data_type;
+
+#define copy_bucket_field(_errtype, _f) \
+ if (c->opts.reconstruct_alloc || \
+ fsck_err_on(new._f != gc._f, c, _errtype, \
+ "bucket %llu:%llu gen %u data type %s has wrong " #_f \
+ ": got %u, should be %u", \
+ iter->pos.inode, iter->pos.offset, \
+ gc.gen, \
+ bch2_data_types[gc.data_type], \
+ new._f, gc._f)) \
+ new._f = gc._f; \
+
+ copy_bucket_field(alloc_key_gen_wrong,
+ gen);
+ copy_bucket_field(alloc_key_dirty_sectors_wrong,
+ dirty_sectors);
+ copy_bucket_field(alloc_key_cached_sectors_wrong,
+ cached_sectors);
+ copy_bucket_field(alloc_key_stripe_wrong,
+ stripe);
+ copy_bucket_field(alloc_key_stripe_redundancy_wrong,
+ stripe_redundancy);
+#undef copy_bucket_field
+
+ if (!bch2_alloc_v4_cmp(*old, new))
+ return 0;
+
+ a = bch2_alloc_to_v4_mut(trans, k);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ return ret;
+
+ a->v = new;
+
+ /*
+ * The trigger normally makes sure this is set, but we're not running
+ * triggers:
+ */
+ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
+ a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+
+ ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN);
+fsck_err:
+ return ret;
+}
+
+static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_dev *ca;
+ unsigned i;
+ int ret = 0;
+
+ for_each_member_device(ca, c, i) {
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
+ POS(ca->dev_idx, ca->mi.first_bucket),
+ BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+ bch2_alloc_write_key(trans, &iter, k, metadata_only));
+
+ if (ret < 0) {
+ bch_err_fn(c, ret);
+ percpu_ref_put(&ca->ref);
+ break;
+ }
+ }
+
+ bch2_trans_put(trans);
+ return ret < 0 ? ret : 0;
+}
+
+static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
+{
+ struct bch_dev *ca;
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bucket *g;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ unsigned i;
+ int ret;
+
+ for_each_member_device(ca, c, i) {
+ struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket),
+ GFP_KERNEL|__GFP_ZERO);
+ if (!buckets) {
+ percpu_ref_put(&ca->ref);
+ bch_err(c, "error allocating ca->buckets[gc]");
+ ret = -BCH_ERR_ENOMEM_gc_alloc_start;
+ goto err;
+ }
+
+ buckets->first_bucket = ca->mi.first_bucket;
+ buckets->nbuckets = ca->mi.nbuckets;
+ rcu_assign_pointer(ca->buckets_gc, buckets);
+ }
+
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
+ g = gc_bucket(ca, k.k->p.offset);
+
+ a = bch2_alloc_to_v4(k, &a_convert);
+
+ g->gen_valid = 1;
+ g->gen = a->gen;
+
+ if (metadata_only &&
+ (a->data_type == BCH_DATA_user ||
+ a->data_type == BCH_DATA_cached ||
+ a->data_type == BCH_DATA_parity)) {
+ g->data_type = a->data_type;
+ g->dirty_sectors = a->dirty_sectors;
+ g->cached_sectors = a->cached_sectors;
+ g->stripe = a->stripe;
+ g->stripe_redundancy = a->stripe_redundancy;
+ }
+
+ 0;
+ }));
+err:
+ bch2_trans_put(trans);
+ if (ret)
+ bch_err_fn(c, ret);
+ return ret;