+/* returns true if not equal */
+static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
+ struct bch_alloc_v4 r)
+{
+ return l.gen != r.gen ||
+ l.oldest_gen != r.oldest_gen ||
+ l.data_type != r.data_type ||
+ l.dirty_sectors != r.dirty_sectors ||
+ l.cached_sectors != r.cached_sectors ||
+ l.stripe_redundancy != r.stripe_redundancy ||
+ l.stripe != r.stripe;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ bool metadata_only)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
+ struct bucket gc, *b;
+ struct bkey_i_alloc_v4 *a;
+ struct bch_alloc_v4 old_convert, new;
+ const struct bch_alloc_v4 *old;
+ enum bch_data_type type;
+ int ret;
+
+ if (bkey_ge(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets)))
+ return 1;
+
+ old = bch2_alloc_to_v4(k, &old_convert);
+ new = *old;
+
+ percpu_down_read(&c->mark_lock);
+ b = gc_bucket(ca, iter->pos.offset);
+
+ /*
+ * b->data_type doesn't yet include need_discard & need_gc_gen states -
+ * fix that here:
+ */
+ type = __alloc_data_type(b->dirty_sectors,
+ b->cached_sectors,
+ b->stripe,
+ *old,
+ b->data_type);
+ if (b->data_type != type) {
+ struct bch_dev_usage *u;
+
+ preempt_disable();
+ u = this_cpu_ptr(ca->usage_gc);
+ u->d[b->data_type].buckets--;
+ b->data_type = type;
+ u->d[b->data_type].buckets++;
+ preempt_enable();
+ }
+
+ gc = *b;
+ percpu_up_read(&c->mark_lock);
+
+ if (metadata_only &&
+ gc.data_type != BCH_DATA_sb &&
+ gc.data_type != BCH_DATA_journal &&
+ gc.data_type != BCH_DATA_btree)
+ return 0;
+
+ if (gen_after(old->gen, gc.gen))
+ return 0;
+
+ if (c->opts.reconstruct_alloc ||
+ fsck_err_on(new.data_type != gc.data_type, c,
+ alloc_key_data_type_wrong,
+ "bucket %llu:%llu gen %u has wrong data_type"
+ ": got %s, should be %s",
+ iter->pos.inode, iter->pos.offset,
+ gc.gen,
+ bch2_data_types[new.data_type],
+ bch2_data_types[gc.data_type]))
+ new.data_type = gc.data_type;
+
+#define copy_bucket_field(_errtype, _f) \
+ if (c->opts.reconstruct_alloc || \
+ fsck_err_on(new._f != gc._f, c, _errtype, \
+ "bucket %llu:%llu gen %u data type %s has wrong " #_f \
+ ": got %u, should be %u", \
+ iter->pos.inode, iter->pos.offset, \
+ gc.gen, \
+ bch2_data_types[gc.data_type], \
+ new._f, gc._f)) \
+ new._f = gc._f; \
+
+ copy_bucket_field(alloc_key_gen_wrong,
+ gen);
+ copy_bucket_field(alloc_key_dirty_sectors_wrong,
+ dirty_sectors);
+ copy_bucket_field(alloc_key_cached_sectors_wrong,
+ cached_sectors);
+ copy_bucket_field(alloc_key_stripe_wrong,
+ stripe);
+ copy_bucket_field(alloc_key_stripe_redundancy_wrong,
+ stripe_redundancy);
+#undef copy_bucket_field
+
+ if (!bch2_alloc_v4_cmp(*old, new))
+ return 0;
+
+ a = bch2_alloc_to_v4_mut(trans, k);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ return ret;
+
+ a->v = new;
+
+ /*
+ * The trigger normally makes sure this is set, but we're not running
+ * triggers:
+ */
+ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
+ a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+
+ ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN);
+fsck_err:
+ return ret;
+}
+
+static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_dev *ca;
+ unsigned i;
+ int ret = 0;
+
+ for_each_member_device(ca, c, i) {
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
+ POS(ca->dev_idx, ca->mi.first_bucket),
+ BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+ bch2_alloc_write_key(trans, &iter, k, metadata_only));
+
+ if (ret < 0) {
+ bch_err_fn(c, ret);
+ percpu_ref_put(&ca->ref);
+ break;
+ }
+ }
+
+ bch2_trans_put(trans);
+ return ret < 0 ? ret : 0;
+}
+
+static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
+{
+ struct bch_dev *ca;
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bucket *g;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ unsigned i;
+ int ret;
+
+ for_each_member_device(ca, c, i) {
+ struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket),
+ GFP_KERNEL|__GFP_ZERO);
+ if (!buckets) {
+ percpu_ref_put(&ca->ref);
+ bch_err(c, "error allocating ca->buckets[gc]");
+ ret = -BCH_ERR_ENOMEM_gc_alloc_start;
+ goto err;
+ }
+
+ buckets->first_bucket = ca->mi.first_bucket;
+ buckets->nbuckets = ca->mi.nbuckets;
+ rcu_assign_pointer(ca->buckets_gc, buckets);
+ }
+
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
+ g = gc_bucket(ca, k.k->p.offset);
+
+ a = bch2_alloc_to_v4(k, &a_convert);
+
+ g->gen_valid = 1;
+ g->gen = a->gen;
+
+ if (metadata_only &&
+ (a->data_type == BCH_DATA_user ||
+ a->data_type == BCH_DATA_cached ||
+ a->data_type == BCH_DATA_parity)) {
+ g->data_type = a->data_type;
+ g->dirty_sectors = a->dirty_sectors;
+ g->cached_sectors = a->cached_sectors;
+ g->stripe = a->stripe;
+ g->stripe_redundancy = a->stripe_redundancy;
+ }
+
+ 0;
+ }));
+err:
+ bch2_trans_put(trans);
+ if (ret)
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
+{
+ struct bch_dev *ca;
+ unsigned i;
+
+ for_each_member_device(ca, c, i) {
+ struct bucket_array *buckets = gc_bucket_array(ca);
+ struct bucket *g;
+
+ for_each_bucket(g, buckets) {
+ if (metadata_only &&
+ (g->data_type == BCH_DATA_user ||
+ g->data_type == BCH_DATA_cached ||
+ g->data_type == BCH_DATA_parity))
+ continue;
+ g->data_type = 0;
+ g->dirty_sectors = 0;
+ g->cached_sectors = 0;
+ }
+ }
+}
+
+static int bch2_gc_write_reflink_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ size_t *idx)
+{
+ struct bch_fs *c = trans->c;
+ const __le64 *refcount = bkey_refcount_c(k);
+ struct printbuf buf = PRINTBUF;
+ struct reflink_gc *r;
+ int ret = 0;
+
+ if (!refcount)
+ return 0;
+
+ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
+ r->offset < k.k->p.offset)
+ ++*idx;
+
+ if (!r ||
+ r->offset != k.k->p.offset ||
+ r->size != k.k->size) {
+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
+ return -EINVAL;
+ }
+
+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
+ reflink_v_refcount_wrong,
+ "reflink key has wrong refcount:\n"
+ " %s\n"
+ " should be %u",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+ r->refcount)) {
+ struct bkey_i *new = bch2_bkey_make_mut(trans, iter, &k, 0);