-199bfbed8a4ebda0ec5c5bd04c742218f2fca586
+dfb7dc100d4bb9c13caa289e6dedd4d0a12f1ecb
struct bkey_s_c *k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
- const struct bch_extent_ptr *ptr;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
bool do_update = false;
int ret = 0;
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = PTR_BUCKET(ca, ptr, true);
- struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
+ bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ struct bucket *g = PTR_BUCKET(ca, &p.ptr, true);
+ struct bucket *g2 = PTR_BUCKET(ca, &p.ptr, false);
if (fsck_err_on(!g->gen_valid, c,
"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
- ptr->dev, PTR_BUCKET_NR(ca, ptr),
- bch2_data_types[ptr_data_type(k->k, ptr)],
- ptr->gen)) {
- if (!ptr->cached) {
- g2->_mark.gen = g->_mark.gen = ptr->gen;
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen)) {
+ if (p.ptr.cached) {
+ g2->_mark.gen = g->_mark.gen = p.ptr.gen;
g2->gen_valid = g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
}
}
- if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
+ if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
- ptr->dev, PTR_BUCKET_NR(ca, ptr),
- bch2_data_types[ptr_data_type(k->k, ptr)],
- ptr->gen, g->mark.gen)) {
- if (!ptr->cached) {
- g2->_mark.gen = g->_mark.gen = ptr->gen;
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen, g->mark.gen)) {
+ if (p.ptr.cached) {
+ g2->_mark.gen = g->_mark.gen = p.ptr.gen;
g2->gen_valid = g->gen_valid = true;
g2->_mark.data_type = 0;
g2->_mark.dirty_sectors = 0;
}
}
- if (fsck_err_on(!ptr->cached &&
- gen_cmp(ptr->gen, g->mark.gen) < 0, c,
+ if (fsck_err_on(!p.ptr.cached &&
+ gen_cmp(p.ptr.gen, g->mark.gen) < 0, c,
"bucket %u:%zu data type %s stale dirty ptr: %u < %u",
- ptr->dev, PTR_BUCKET_NR(ca, ptr),
- bch2_data_types[ptr_data_type(k->k, ptr)],
- ptr->gen, g->mark.gen))
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen, g->mark.gen))
do_update = true;
+
+ if (p.has_ec) {
+ struct stripe *m = genradix_ptr(&c->stripes[true], p.ec.idx);
+
+ if (fsck_err_on(!m || !m->alive, c,
+ "pointer to nonexistent stripe %llu",
+ (u64) p.ec.idx))
+ do_update = true;
+ }
}
if (do_update) {
+ struct bkey_ptrs ptrs;
+ union bch_extent_entry *entry;
struct bch_extent_ptr *ptr;
struct bkey_i *new;
(!ptr->cached &&
gen_cmp(ptr->gen, g->mark.gen) < 0);
}));
+again:
+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
+ struct stripe *m = genradix_ptr(&c->stripes[true],
+ entry->stripe_ptr.idx);
+
+ if (!m || !m->alive) {
+ bch2_bkey_extent_entry_drop(new, entry);
+ goto again;
+ }
+ }
+ }
ret = bch2_journal_key_insert(c, btree_id, level, new);
if (ret)
return ret;
}
+static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
+ struct ec_stripe_head *h)
+{
+ unsigned i;
+ s64 idx;
+ int ret;
+
+ idx = get_existing_stripe(c, h);
+ if (idx < 0) {
+ bch_err(c, "failed to find an existing stripe");
+ return -ENOSPC;
+ }
+
+ h->s->have_existing_stripe = true;
+ ret = get_stripe_key(c, idx, &h->s->existing_stripe);
+ if (ret) {
+ bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
+ return ret;
+ }
+
+ if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
+ /*
+ * this is a problem: we have deleted from the
+ * stripes heap already
+ */
+ BUG();
+ }
+
+ BUG_ON(h->s->existing_stripe.size != h->blocksize);
+ BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
+
+ for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
+ if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
+ __set_bit(i, h->s->blocks_gotten);
+ __set_bit(i, h->s->blocks_allocated);
+ }
+
+ ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+ }
+
+ bkey_copy(&h->s->new_stripe.key.k_i,
+ &h->s->existing_stripe.key.k_i);
+
+ return 0;
+}
+
+static int __bch2_ec_stripe_head_reserve(struct bch_fs *c,
+ struct ec_stripe_head *h)
+{
+ int ret;
+
+ ret = bch2_disk_reservation_get(c, &h->s->res,
+ h->blocksize,
+ h->s->nr_parity, 0);
+
+ if (ret) {
+ /*
+ * This means we need to wait for copygc to
+ * empty out buckets from existing stripes:
+ */
+ bch_err(c, "failed to reserve stripe");
+ }
+
+ return ret;
+}
+
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
struct closure *cl)
{
struct ec_stripe_head *h;
- unsigned i;
- s64 idx;
int ret;
+ bool needs_stripe_new;
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc);
if (!h) {
return NULL;
}
- if (!h->s) {
+ needs_stripe_new = !h->s;
+ if (needs_stripe_new) {
if (ec_new_stripe_alloc(c, h)) {
- bch2_ec_stripe_head_put(c, h);
+ ret = -ENOMEM;
bch_err(c, "failed to allocate new stripe");
- return NULL;
- }
-
- idx = get_existing_stripe(c, h);
- if (idx >= 0) {
- h->s->have_existing_stripe = true;
- ret = get_stripe_key(c, idx, &h->s->existing_stripe);
- if (ret) {
- bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
- bch2_ec_stripe_head_put(c, h);
- return NULL;
- }
-
- if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
- /*
- * this is a problem: we have deleted from the
- * stripes heap already
- */
- BUG();
- }
-
- BUG_ON(h->s->existing_stripe.size != h->blocksize);
- BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
-
- for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
- if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
- __set_bit(i, h->s->blocks_gotten);
- __set_bit(i, h->s->blocks_allocated);
- }
-
- ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
- }
-
- bkey_copy(&h->s->new_stripe.key.k_i,
- &h->s->existing_stripe.key.k_i);
+ goto err;
}
- if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) {
+ if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize))
BUG();
- }
}
- if (!h->s->allocated) {
- if (!h->s->have_existing_stripe &&
- !h->s->res.sectors) {
- ret = bch2_disk_reservation_get(c, &h->s->res,
- h->blocksize,
- h->s->nr_parity, 0);
- if (ret) {
- /*
- * This means we need to wait for copygc to
- * empty out buckets from existing stripes:
- */
- bch2_ec_stripe_head_put(c, h);
- h = NULL;
- goto out;
- }
- }
+ /*
+ * Try reserve a new stripe before reusing an
+ * existing stripe. This will prevent unnecessary
+ * read amplification during write oriented workloads.
+ */
+ ret = 0;
+ if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
+ ret = __bch2_ec_stripe_head_reserve(c, h);
+ if (ret && needs_stripe_new)
+ ret = __bch2_ec_stripe_head_reuse(c, h);
+ if (ret)
+ goto err;
+ if (!h->s->allocated) {
ret = new_stripe_alloc_buckets(c, h, cl);
- if (ret) {
- bch2_ec_stripe_head_put(c, h);
- h = ERR_PTR(-ret);
- goto out;
- }
+ if (ret)
+ goto err;
h->s->allocated = true;
}
-out:
+
return h;
+
+err:
+ bch2_ec_stripe_head_put(c, h);
+ return ERR_PTR(-ret);
}
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
}
}
+void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry)
+{
+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
+ union bch_extent_entry *next = extent_entry_next(entry);
+
+ memmove_u64s(entry, next, (u64 *) end - (u64 *) next);
+ k->k.u64s -= extent_entry_u64s(entry);
+}
+
void bch2_bkey_append_ptr(struct bkey_i *k,
struct bch_extent_ptr ptr)
{
void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
unsigned, unsigned);
+void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
void bch2_extent_ptr_decoded_append(struct bkey_i *,
struct extent_ptr_decoded *);