#undef x
};
+struct bkey_alloc_buf {
+ struct bkey_i k;
+ struct bch_alloc_v3 v;
+
+#define x(_name, _bits) + _bits / 8
+ u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
+#undef x
+} __attribute__((packed, aligned(8)));
+
/* Persistent alloc info: */
static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
return ret;
}
-struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
- const struct bkey_alloc_unpacked src)
+static void bch2_alloc_pack(struct bch_fs *c,
+ struct bkey_alloc_buf *dst,
+ const struct bkey_alloc_unpacked src)
{
- struct bkey_alloc_buf *dst;
-
- dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
- if (!IS_ERR(dst))
- bch2_alloc_pack_v3(dst, src);
-
- return dst;
+ bch2_alloc_pack_v3(dst, src);
}
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_alloc_unpacked *u, unsigned trigger_flags)
{
- struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
+ struct bkey_alloc_buf *a;
+
+ a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
+ if (IS_ERR(a))
+ return PTR_ERR(a);
- return PTR_ERR_OR_ZERO(a) ?:
- bch2_trans_update(trans, iter, &a->k, trigger_flags);
+ bch2_alloc_pack(trans->c, a, *u);
+ return bch2_trans_update(trans, iter, &a->k, trigger_flags);
}
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
#undef x
}
-int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
+int bch2_alloc_read(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter;
int ret;
bch2_trans_init(&trans, c, 0, 0);
+ down_read(&c->gc_lock);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
+ if (!bkey_is_alloc(k.k))
+ continue;
+
ca = bch_dev_bkey_exists(c, k.k->p.inode);
- g = __bucket(ca, k.k->p.offset, gc);
+ g = bucket(ca, k.k->p.offset);
u = bch2_alloc_unpack(k);
- if (!gc)
- *bucket_gen(ca, k.k->p.offset) = u.gen;
-
+ *bucket_gen(ca, k.k->p.offset) = u.gen;
g->_mark.gen = u.gen;
+ g->_mark.data_type = u.data_type;
+ g->_mark.dirty_sectors = u.dirty_sectors;
+ g->_mark.cached_sectors = u.cached_sectors;
+ g->_mark.stripe = u.stripe != 0;
+ g->stripe = u.stripe;
+ g->stripe_redundancy = u.stripe_redundancy;
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
- g->oldest_gen = !gc ? u.oldest_gen : u.gen;
+ g->oldest_gen = u.oldest_gen;
g->gen_valid = 1;
-
- if (!gc ||
- (metadata_only &&
- (u.data_type == BCH_DATA_user ||
- u.data_type == BCH_DATA_cached ||
- u.data_type == BCH_DATA_parity))) {
- g->_mark.data_type = u.data_type;
- g->_mark.dirty_sectors = u.dirty_sectors;
- g->_mark.cached_sectors = u.cached_sectors;
- g->_mark.stripe = u.stripe != 0;
- g->stripe = u.stripe;
- g->stripe_redundancy = u.stripe_redundancy;
- }
-
}
bch2_trans_iter_exit(&trans, &iter);
+ up_read(&c->gc_lock);
bch2_trans_exit(&trans);
- if (ret)
+ if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c k;
+ struct bkey_alloc_unpacked old_u, new_u;
+ int ret;
+retry:
+ bch2_trans_begin(trans);
+ ret = bch2_btree_key_cache_flush(trans,
+ BTREE_ID_alloc, iter->pos);
+ if (ret)
+ goto err;
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ old_u = bch2_alloc_unpack(k);
+ new_u = alloc_mem_to_key(c, iter);
+
+ if (!bkey_alloc_unpacked_cmp(old_u, new_u))
+ return 0;
+
+ ret = bch2_alloc_write(trans, iter, &new_u,
+ BTREE_TRIGGER_NORUN) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|flags);
+err:
+ if (ret == -EINTR)
+ goto retry;
+ return ret;
+}
+
+int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bch_dev *ca;
+ unsigned i;
+ int ret = 0;
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ for_each_member_device(ca, c, i) {
+ bch2_btree_iter_set_pos(&iter,
+ POS(ca->dev_idx, ca->mi.first_bucket));
+
+ while (iter.pos.offset < ca->mi.nbuckets) {
+ ret = bch2_alloc_write_key(&trans, &iter, flags);
+ if (ret) {
+ percpu_ref_put(&ca->ref);
+ goto err;
+ }
+ bch2_btree_iter_advance(&iter);
+ }
+ }
+err:
+ bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_exit(&trans);
return ret;
}
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct bkey_s_c k;
struct bkey_alloc_unpacked u;
u64 *time, now;
int ret = 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
+ ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto out;
- u = bch2_alloc_unpack(k);
+ u = alloc_mem_to_key(c, &iter);
time = rw == READ ? &u.read_time : &u.write_time;
now = atomic64_read(&c->io_clock[rw].now);
return nr;
}
+/*
+ * returns sequence number of most recent journal entry that updated this
+ * bucket:
+ */
+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
+{
+ if (m.journal_seq_valid) {
+ u64 journal_seq = atomic64_read(&c->journal.seq);
+ u64 bucket_seq = journal_seq;
+
+ bucket_seq &= ~((u64) U16_MAX);
+ bucket_seq |= m.journal_seq;
+
+ if (bucket_seq > journal_seq)
+ bucket_seq -= 1 << 16;
+
+ return bucket_seq;
+ } else {
+ return 0;
+ }
+}
+
static int bucket_invalidate_btree(struct btree_trans *trans,
- struct bch_dev *ca, u64 b,
- struct bkey_alloc_unpacked *u)
+ struct bch_dev *ca, u64 b)
{
struct bch_fs *c = trans->c;
+ struct bkey_alloc_unpacked u;
struct btree_iter iter;
- struct bkey_s_c k;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ca->dev_idx, b),
BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
+ ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto err;
- *u = bch2_alloc_unpack(k);
- u->gen++;
- u->data_type = 0;
- u->dirty_sectors = 0;
- u->cached_sectors = 0;
- u->read_time = atomic64_read(&c->io_clock[READ].now);
- u->write_time = atomic64_read(&c->io_clock[WRITE].now);
+ u = alloc_mem_to_key(c, &iter);
+
+ u.gen++;
+ u.data_type = 0;
+ u.dirty_sectors = 0;
+ u.cached_sectors = 0;
+ u.read_time = atomic64_read(&c->io_clock[READ].now);
+ u.write_time = atomic64_read(&c->io_clock[WRITE].now);
- ret = bch2_alloc_write(trans, &iter, u,
+ ret = bch2_alloc_write(trans, &iter, &u,
BTREE_TRIGGER_BUCKET_INVALIDATE);
err:
bch2_trans_iter_exit(trans, &iter);
static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
u64 *journal_seq, unsigned flags)
{
- struct bkey_alloc_unpacked u;
+ struct bucket *g;
+ struct bucket_mark m;
size_t b;
int ret = 0;
- /*
- * If the read-only path is trying to shut down, we can't be generating
- * new btree updates:
- */
- if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags))
- return 1;
-
BUG_ON(!ca->alloc_heap.used ||
!ca->alloc_heap.data[0].nr);
b = ca->alloc_heap.data[0].bucket;
/* first, put on free_inc and mark as owned by allocator: */
percpu_down_read(&c->mark_lock);
+ g = bucket(ca, b);
+ m = READ_ONCE(g->mark);
+
+ BUG_ON(m.dirty_sectors);
bch2_mark_alloc_bucket(c, ca, b, true);
BUG_ON(!fifo_push(&ca->free_inc, b));
spin_unlock(&c->freelist_lock);
+ /*
+ * If we're not invalidating cached data, we only increment the bucket
+ * gen in memory here, the incremented gen will be updated in the btree
+ * by bch2_trans_mark_pointer():
+ */
+ if (!m.cached_sectors &&
+ !bucket_needs_journal_commit(m, c->journal.last_seq_ondisk)) {
+ BUG_ON(m.data_type);
+ bucket_cmpxchg(g, m, m.gen++);
+ *bucket_gen(ca, b) = m.gen;
+ percpu_up_read(&c->mark_lock);
+ goto out;
+ }
+
percpu_up_read(&c->mark_lock);
+ /*
+ * If the read-only path is trying to shut down, we can't be generating
+ * new btree updates:
+ */
+ if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
+ ret = 1;
+ goto out;
+ }
+
ret = bch2_trans_do(c, NULL, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED|
flags,
- bucket_invalidate_btree(&trans, ca, b, &u));
-
+ bucket_invalidate_btree(&trans, ca, b));
+out:
if (!ret) {
/* remove from alloc_heap: */
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
* bucket (i.e. deleting the last reference) before writing to
* this bucket again:
*/
- *journal_seq = max(*journal_seq, u.journal_seq);
+ *journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
} else {
size_t b2;
#include "alloc_foreground.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
-#include "btree_key_cache.h"
#include "btree_locking.h"
#include "btree_update_interior.h"
#include "btree_io.h"
bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+ struct bucket *g2 = PTR_BUCKET(ca, &p.ptr);
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
if (fsck_err_on(!g->gen_valid, c,
p.ptr.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (!p.ptr.cached) {
- g->_mark.gen = p.ptr.gen;
- g->gen_valid = true;
+ g2->_mark.gen = g->_mark.gen = p.ptr.gen;
+ g2->gen_valid = g->gen_valid = true;
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
p.ptr.gen, g->mark.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (!p.ptr.cached) {
- g->_mark.gen = p.ptr.gen;
- g->gen_valid = true;
- g->_mark.data_type = 0;
- g->_mark.dirty_sectors = 0;
- g->_mark.cached_sectors = 0;
+ g2->_mark.gen = g->_mark.gen = p.ptr.gen;
+ g2->gen_valid = g->gen_valid = true;
+ g2->_mark.data_type = 0;
+ g2->_mark.dirty_sectors = 0;
+ g2->_mark.cached_sectors = 0;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
bch2_data_types[data_type],
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (data_type == BCH_DATA_btree) {
- g->_mark.data_type = data_type;
- set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+ g2->_mark.data_type = g->_mark.data_type = data_type;
+ g2->gen_valid = g->gen_valid = true;
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
unsigned i, dev;
int ret = 0;
- percpu_down_write(&c->mark_lock);
-
#define copy_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
if (verify) \
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f); \
dst->_f = src->_f; \
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f); \
dst->_f = src->_f; \
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
+ }
+#define copy_bucket_field(_f) \
+ if (dst->b[b]._f != src->b[b]._f) { \
+ if (verify) \
+ fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \
+ ": got %u, should be %u", dev, b, \
+ dst->b[b].mark.gen, \
+ bch2_data_types[dst->b[b].mark.data_type],\
+ dst->b[b]._f, src->b[b]._f); \
+ dst->b[b]._f = src->b[b]._f; \
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
bch2_fs_usage_acc_to_base(c, i);
for_each_member_device(ca, c, dev) {
- struct bch_dev_usage *dst = ca->usage_base;
- struct bch_dev_usage *src = (void *)
- bch2_acc_percpu_u64s((void *) ca->usage_gc,
- dev_usage_u64s());
-
- copy_dev_field(buckets_ec, "buckets_ec");
- copy_dev_field(buckets_unavailable, "buckets_unavailable");
-
- for (i = 0; i < BCH_DATA_NR; i++) {
- copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]);
- copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
- copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
+ struct bucket_array *dst = __bucket_array(ca, 0);
+ struct bucket_array *src = __bucket_array(ca, 1);
+ size_t b;
+
+ for (b = 0; b < src->nbuckets; b++) {
+ copy_bucket_field(_mark.gen);
+ copy_bucket_field(_mark.data_type);
+ copy_bucket_field(_mark.stripe);
+ copy_bucket_field(_mark.dirty_sectors);
+ copy_bucket_field(_mark.cached_sectors);
+ copy_bucket_field(stripe_redundancy);
+ copy_bucket_field(stripe);
+
+ dst->b[b].oldest_gen = src->b[b].oldest_gen;
+ }
+
+ {
+ struct bch_dev_usage *dst = ca->usage_base;
+ struct bch_dev_usage *src = (void *)
+ bch2_acc_percpu_u64s((void *) ca->usage_gc,
+ dev_usage_u64s());
+
+ copy_dev_field(buckets_ec, "buckets_ec");
+ copy_dev_field(buckets_unavailable, "buckets_unavailable");
+
+ for (i = 0; i < BCH_DATA_NR; i++) {
+ copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]);
+ copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
+ copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
+ }
}
};
#undef copy_fs_field
#undef copy_dev_field
+#undef copy_bucket_field
#undef copy_stripe_field
#undef copy_field
fsck_err:
percpu_ref_put(&ca->ref);
if (ret)
bch_err(c, "%s: ret %i", __func__, ret);
-
- percpu_up_write(&c->mark_lock);
return ret;
}
BUG_ON(ca->buckets[1]);
BUG_ON(ca->usage_gc);
+ ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket),
+ GFP_KERNEL|__GFP_ZERO);
+ if (!ca->buckets[1]) {
+ percpu_ref_put(&ca->ref);
+ bch_err(c, "error allocating ca->buckets[gc]");
+ return -ENOMEM;
+ }
+
ca->usage_gc = alloc_percpu(struct bch_dev_usage);
if (!ca->usage_gc) {
bch_err(c, "error allocating ca->usage_gc");
}
}
- return 0;
-}
-
-static int bch2_alloc_write_key(struct btree_trans *trans,
- struct btree_iter *iter,
- bool initial, bool metadata_only)
-{
- struct bch_fs *c = trans->c;
- struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
- struct bucket *g;
- struct bkey_s_c k;
- struct bkey_alloc_unpacked old_u, new_u, gc_u;
- struct bkey_alloc_buf *a;
- int ret;
+ percpu_down_write(&c->mark_lock);
/*
- * For this to be correct at runtime, we'll need to figure out a way for
- * it to actually lock the key in the btree key cache:
+ * indicate to stripe code that we need to allocate for the gc stripes
+ * radix tree, too
*/
-
- if (!initial) {
- ret = bch2_btree_key_cache_flush(trans,
- BTREE_ID_alloc, iter->pos);
- if (ret)
- return ret;
- }
-
- k = bch2_btree_iter_peek_slot(iter);
- ret = bkey_err(k);
- if (ret)
- return ret;
-
- old_u = new_u = bch2_alloc_unpack(k);
-
- percpu_down_read(&c->mark_lock);
- g = gc_bucket(ca, iter->pos.offset);
- gc_u = (struct bkey_alloc_unpacked) {
- .dev = iter->pos.inode,
- .bucket = iter->pos.offset,
- .gen = g->mark.gen,
- .oldest_gen = g->oldest_gen,
- .data_type = g->mark.data_type,
- .dirty_sectors = g->mark.dirty_sectors,
- .cached_sectors = g->mark.cached_sectors,
- .read_time = g->io_time[READ],
- .write_time = g->io_time[WRITE],
- .stripe = g->stripe,
- .stripe_redundancy = g->stripe_redundancy,
- };
- percpu_up_read(&c->mark_lock);
-
- if (metadata_only &&
- gc_u.data_type != BCH_DATA_sb &&
- gc_u.data_type != BCH_DATA_journal &&
- gc_u.data_type != BCH_DATA_btree)
- return 0;
-
- if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
- gen_after(old_u.gen, gc_u.gen))
- return 0;
-
-#define copy_bucket_field(_f) \
- if (fsck_err_on(new_u._f != gc_u._f, c, \
- "bucket %llu:%llu gen %u data type %s has wrong " #_f \
- ": got %u, should be %u", \
- iter->pos.inode, iter->pos.offset, \
- new_u.gen, \
- bch2_data_types[new_u.data_type], \
- new_u._f, gc_u._f)) \
- new_u._f = gc_u._f; \
-
- copy_bucket_field(gen);
- copy_bucket_field(data_type);
- copy_bucket_field(stripe);
- copy_bucket_field(dirty_sectors);
- copy_bucket_field(cached_sectors);
- copy_bucket_field(stripe_redundancy);
- copy_bucket_field(stripe);
-#undef copy_bucket_field
-
- new_u.oldest_gen = gc_u.oldest_gen;
-
- if (!bkey_alloc_unpacked_cmp(old_u, new_u))
- return 0;
-
- a = bch2_alloc_pack(trans, new_u);
- if (IS_ERR(a))
- return PTR_ERR(a);
-
- ret = initial
- ? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k)
- : bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
-fsck_err:
- return ret;
-}
-
-static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only)
-{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bch_dev *ca;
- unsigned i;
- int ret = 0;
-
- bch2_trans_init(&trans, c, 0, 0);
+ gc_pos_set(c, gc_phase(GC_PHASE_START));
for_each_member_device(ca, c, i) {
- for_each_btree_key(&trans, iter, BTREE_ID_alloc,
- POS(ca->dev_idx, ca->mi.first_bucket),
- BTREE_ITER_SLOTS|
- BTREE_ITER_PREFETCH, k, ret) {
- if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
- break;
+ struct bucket_array *dst = __bucket_array(ca, 1);
+ struct bucket_array *src = __bucket_array(ca, 0);
+ size_t b;
- ret = __bch2_trans_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW,
- bch2_alloc_write_key(&trans, &iter,
- initial, metadata_only));
- if (ret)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+ dst->first_bucket = src->first_bucket;
+ dst->nbuckets = src->nbuckets;
- if (ret) {
- bch_err(c, "error writing alloc info: %i", ret);
- percpu_ref_put(&ca->ref);
- break;
- }
- }
+ for (b = 0; b < src->nbuckets; b++) {
+ struct bucket *d = &dst->b[b];
+ struct bucket *s = &src->b[b];
- bch2_trans_exit(&trans);
- return ret;
-}
-
-static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only)
-{
- struct bch_dev *ca;
- unsigned i;
+ d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
+ d->gen_valid = s->gen_valid;
- for_each_member_device(ca, c, i) {
- struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
- ca->mi.nbuckets * sizeof(struct bucket),
- GFP_KERNEL|__GFP_ZERO);
- if (!buckets) {
- percpu_ref_put(&ca->ref);
- percpu_up_write(&c->mark_lock);
- bch_err(c, "error allocating ca->buckets[gc]");
- return -ENOMEM;
+ if (metadata_only &&
+ (s->mark.data_type == BCH_DATA_user ||
+ s->mark.data_type == BCH_DATA_cached))
+ d->_mark = s->mark;
}
-
- buckets->first_bucket = ca->mi.first_bucket;
- buckets->nbuckets = ca->mi.nbuckets;
- rcu_assign_pointer(ca->buckets[1], buckets);
};
- return bch2_alloc_read(c, true, metadata_only);
-}
-
-static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_only)
-{
- struct bch_dev *ca;
- unsigned i;
-
- for_each_member_device(ca, c, i) {
- struct bucket_array *buckets = __bucket_array(ca, true);
- struct bucket *g;
+ percpu_up_write(&c->mark_lock);
- for_each_bucket(g, buckets) {
- if (metadata_only &&
- (g->mark.data_type == BCH_DATA_user ||
- g->mark.data_type == BCH_DATA_cached ||
- g->mark.data_type == BCH_DATA_parity))
- continue;
- g->_mark.dirty_sectors = 0;
- g->_mark.cached_sectors = 0;
- }
- };
+ return 0;
}
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
return ret;
}
-static void bch2_gc_reflink_reset(struct bch_fs *c, bool initial,
- bool metadata_only)
-{
- struct genradix_iter iter;
- struct reflink_gc *r;
-
- genradix_for_each(&c->reflink_gc_table, iter, r)
- r->refcount = 0;
-}
-
-static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
- bool metadata_only)
-{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct reflink_gc *r;
- int ret = 0;
-
- if (metadata_only)
- return 0;
-
- bch2_trans_init(&trans, c, 0, 0);
- c->reflink_gc_nr = 0;
-
- for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
- const __le64 *refcount = bkey_refcount_c(k);
-
- if (!refcount)
- continue;
-
- r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
- GFP_KERNEL);
- if (!r) {
- ret = -ENOMEM;
- break;
- }
-
- r->offset = k.k->p.offset;
- r->size = k.k->size;
- r->refcount = 0;
- }
- bch2_trans_iter_exit(&trans, &iter);
-
- bch2_trans_exit(&trans);
- return ret;
-}
-
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
bool metadata_only)
{
return ret;
}
-static void bch2_gc_stripes_reset(struct bch_fs *c, bool initial,
- bool metadata_only)
+static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
+ bool metadata_only)
{
- genradix_free(&c->gc_stripes);
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct reflink_gc *r;
+ int ret = 0;
+
+ if (metadata_only)
+ return 0;
+
+ bch2_trans_init(&trans, c, 0, 0);
+ c->reflink_gc_nr = 0;
+
+ for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ret) {
+ const __le64 *refcount = bkey_refcount_c(k);
+
+ if (!refcount)
+ continue;
+
+ r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
+ GFP_KERNEL);
+ if (!r) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ r->offset = k.k->p.offset;
+ r->size = k.k->size;
+ r->refcount = 0;
+ }
+ bch2_trans_iter_exit(&trans, &iter);
+
+ bch2_trans_exit(&trans);
+ return ret;
}
/**
/* flush interior btree updates: */
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
-
+again:
ret = bch2_gc_start(c, metadata_only) ?:
- bch2_gc_alloc_start(c, initial, metadata_only) ?:
bch2_gc_reflink_start(c, initial, metadata_only);
if (ret)
goto out;
-again:
- gc_pos_set(c, gc_phase(GC_PHASE_START));
bch2_mark_superblocks(c);
if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
(!iter && bch2_test_restart_gc)) {
- if (iter++ > 2) {
- bch_info(c, "Unable to fix bucket gens, looping");
- ret = -EINVAL;
- goto out;
- }
-
/*
* XXX: make sure gens we fixed got saved
*/
- bch_info(c, "Second GC pass needed, restarting:");
- clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
- __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+ if (iter++ <= 2) {
+ bch_info(c, "Second GC pass needed, restarting:");
+ clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+
+ percpu_down_write(&c->mark_lock);
+ bch2_gc_free(c);
+ percpu_up_write(&c->mark_lock);
+ /* flush fsck errors, reset counters */
+ bch2_flush_fsck_errs(c);
- bch2_gc_stripes_reset(c, initial, metadata_only);
- bch2_gc_alloc_reset(c, initial, metadata_only);
- bch2_gc_reflink_reset(c, initial, metadata_only);
+ goto again;
+ }
- /* flush fsck errors, reset counters */
- bch2_flush_fsck_errs(c);
- goto again;
+ bch_info(c, "Unable to fix bucket gens, looping");
+ ret = -EINVAL;
}
out:
if (!ret) {
bch2_journal_block(&c->journal);
- ret = bch2_gc_stripes_done(c, initial, metadata_only) ?:
- bch2_gc_reflink_done(c, initial, metadata_only) ?:
- bch2_gc_alloc_done(c, initial, metadata_only) ?:
+ percpu_down_write(&c->mark_lock);
+ ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
+ bch2_gc_stripes_done(c, initial, metadata_only) ?:
bch2_gc_done(c, initial, metadata_only);
bch2_journal_unblock(&c->journal);
+ } else {
+ percpu_down_write(&c->mark_lock);
}
- percpu_down_write(&c->mark_lock);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));