Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-2272c5f5b76a5dc0c925064b3682110218a3e53b
+c28937622fbd373f152df01f29efa2d79af99633
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
+ enum alloc_reserve reserve,
unsigned flags,
struct closure *cl)
{
unsigned flags,
struct closure *cl)
{
if (ec_open_bucket(c, ptrs))
return 0;
if (ec_open_bucket(c, ptrs))
return 0;
- h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1,
- wp == &c->copygc_write_point,
- cl);
+ h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, reserve, cl);
if (IS_ERR(h))
return PTR_ERR(h);
if (!h)
if (IS_ERR(h))
return PTR_ERR(h);
if (!h)
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target,
nr_replicas, nr_effective,
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target,
nr_replicas, nr_effective,
- have_cache, flags, _cl);
+ have_cache,
+ reserve, flags, _cl);
} else {
retry_blocking:
/*
} else {
retry_blocking:
/*
x(btree_movinggc) \
x(btree) \
x(movinggc) \
x(btree_movinggc) \
x(btree) \
x(movinggc) \
enum alloc_reserve {
#define x(name) RESERVE_##name,
enum alloc_reserve {
#define x(name) RESERVE_##name,
struct printbuf buf = PRINTBUF;
const struct bch_stripe *s;
struct gc_stripe *m;
struct printbuf buf = PRINTBUF;
const struct bch_stripe *s;
struct gc_stripe *m;
s = bkey_s_c_to_stripe(k).v;
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
s = bkey_s_c_to_stripe(k).v;
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
- for (i = 0; i < s->nr_blocks; i++)
- if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
- goto inconsistent;
- return 0;
-inconsistent:
- if (fsck_err_on(true, c,
- "stripe has wrong block sector count %u:\n"
- " %s\n"
- " should be %u", i,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf),
- m ? m->block_sectors[i] : 0)) {
+ for (i = 0; i < s->nr_blocks; i++) {
+ u32 old = stripe_blockcount_get(s, i);
+ u32 new = (m ? m->block_sectors[i] : 0);
+
+ if (old != new) {
+ prt_printf(&buf, "stripe block %u has wrong sector count: got %u, should be %u\n",
+ i, old, new);
+ bad = true;
+ }
+ }
+
+ if (bad)
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ if (fsck_err_on(bad, c, "%s", buf.buf)) {
struct bkey_i_stripe *new;
new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
struct bkey_i_stripe *new;
new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
switch (reserve) {
case RESERVE_NR:
unreachable();
switch (reserve) {
case RESERVE_NR:
unreachable();
+ case RESERVE_stripe:
+ reserved += ca->mi.nbuckets >> 6;
+ fallthrough;
case RESERVE_none:
reserved += ca->mi.nbuckets >> 6;
fallthrough;
case RESERVE_none:
reserved += ca->mi.nbuckets >> 6;
fallthrough;
- for (i = 0; i < new->v.nr_blocks; i++)
- stripe_blockcount_set(&new->v, i, stripe_blockcount_get(old, i));
+ for (i = 0; i < new->v.nr_blocks; i++) {
+ unsigned v = stripe_blockcount_get(old, i);
+
+ if (!v)
+ continue;
+
+ BUG_ON(old->ptrs[i].dev != new->v.ptrs[i].dev ||
+ old->ptrs[i].gen != new->v.ptrs[i].gen ||
+ old->ptrs[i].offset != new->v.ptrs[i].offset);
+
+ stripe_blockcount_set(&new->v, i, v);
+ }
}
ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
}
ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
unsigned algo, unsigned redundancy,
static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
unsigned algo, unsigned redundancy,
+ enum alloc_reserve reserve)
{
struct ec_stripe_head *h;
struct bch_dev *ca;
{
struct ec_stripe_head *h;
struct bch_dev *ca;
h->target = target;
h->algo = algo;
h->redundancy = redundancy;
h->target = target;
h->algo = algo;
h->redundancy = redundancy;
rcu_read_lock();
h->devs = target_rw_devs(c, BCH_DATA_user, target);
rcu_read_lock();
h->devs = target_rw_devs(c, BCH_DATA_user, target);
unsigned target,
unsigned algo,
unsigned redundancy,
unsigned target,
unsigned algo,
unsigned redundancy,
+ enum alloc_reserve reserve)
{
struct bch_fs *c = trans->c;
struct ec_stripe_head *h;
{
struct bch_fs *c = trans->c;
struct ec_stripe_head *h;
if (h->target == target &&
h->algo == algo &&
h->redundancy == redundancy &&
if (h->target == target &&
h->algo == algo &&
h->redundancy == redundancy &&
+ h->reserve == reserve) {
ret = bch2_trans_mutex_lock(trans, &h->lock);
if (ret)
h = ERR_PTR(ret);
goto found;
}
ret = bch2_trans_mutex_lock(trans, &h->lock);
if (ret)
h = ERR_PTR(ret);
goto found;
}
- h = ec_new_stripe_head_alloc(c, target, algo, redundancy, copygc);
+ h = ec_new_stripe_head_alloc(c, target, algo, redundancy, reserve);
found:
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
found:
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
+ enum alloc_reserve reserve, struct closure *cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs = h->devs;
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs = h->devs;
bool have_cache = true;
int ret = 0;
bool have_cache = true;
int ret = 0;
- for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
- if (test_bit(i, h->s->blocks_gotten)) {
- __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
- if (i < h->s->nr_data)
- nr_have_data++;
- else
- nr_have_parity++;
- }
+ for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) {
+ __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
+ if (i < h->s->nr_data)
+ nr_have_data++;
+ else
+ nr_have_parity++;
}
BUG_ON(nr_have_data > h->s->nr_data);
}
BUG_ON(nr_have_data > h->s->nr_data);
&nr_have_parity,
&have_cache,
BCH_DATA_parity,
&nr_have_parity,
&have_cache,
BCH_DATA_parity,
- h->copygc
- ? RESERVE_movinggc
- : RESERVE_none,
cl);
open_bucket_for_each(c, &buckets, ob, i) {
cl);
open_bucket_for_each(c, &buckets, ob, i) {
&nr_have_data,
&have_cache,
BCH_DATA_user,
&nr_have_data,
&have_cache,
BCH_DATA_user,
- h->copygc
- ? RESERVE_movinggc
- : RESERVE_none,
cl);
open_bucket_for_each(c, &buckets, ob, i) {
cl);
open_bucket_for_each(c, &buckets, ob, i) {
if (idx < 0)
return -BCH_ERR_ENOSPC_stripe_reuse;
if (idx < 0)
return -BCH_ERR_ENOSPC_stripe_reuse;
- h->s->have_existing_stripe = true;
ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
if (ret) {
ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
if (ret) {
- bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
+ bch2_stripe_close(c, h->s);
+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
BUG_ON(h->s->existing_stripe.size != h->blocksize);
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
BUG_ON(h->s->existing_stripe.size != h->blocksize);
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
+ /*
+ * Free buckets we initially allocated - they might conflict with
+ * blocks from the stripe we're reusing:
+ */
+ for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) {
+ bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
+ h->s->blocks[i] = 0;
+ }
+ memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
+ memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
+
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
__set_bit(i, h->s->blocks_gotten);
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
__set_bit(i, h->s->blocks_gotten);
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
}
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
}
- bkey_copy(&h->s->new_stripe.key.k_i,
- &h->s->existing_stripe.key.k_i);
+ bkey_copy(&h->s->new_stripe.key.k_i, &h->s->existing_stripe.key.k_i);
+ h->s->have_existing_stripe = true;
+
+ pr_info("reused %llu", h->s->idx);
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
int ret;
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
int ret;
- BUG_ON(h->s->res.sectors);
-
- ret = bch2_disk_reservation_get(c, &h->s->res,
+ if (!h->s->res.sectors) {
+ ret = bch2_disk_reservation_get(c, &h->s->res,
- h->s->nr_parity, 0);
- if (ret)
- return ret;
+ h->s->nr_parity,
+ BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ return ret;
+ }
for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
unsigned target,
unsigned algo,
unsigned redundancy,
unsigned target,
unsigned algo,
unsigned redundancy,
+ enum alloc_reserve reserve,
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct ec_stripe_head *h;
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct ec_stripe_head *h;
- h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, copygc);
+ h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, reserve);
if (!h)
bch_err(c, "no stripe head");
if (IS_ERR_OR_NULL(h))
return h;
if (!h)
bch_err(c, "no stripe head");
if (IS_ERR_OR_NULL(h))
return h;
- needs_stripe_new = !h->s;
- if (needs_stripe_new) {
if (ec_new_stripe_alloc(c, h)) {
ret = -ENOMEM;
bch_err(c, "failed to allocate new stripe");
if (ec_new_stripe_alloc(c, h)) {
ret = -ENOMEM;
bch_err(c, "failed to allocate new stripe");
- /*
- * Try reserve a new stripe before reusing an
- * existing stripe. This will prevent unnecessary
- * read amplification during write oriented workloads.
- */
- ret = 0;
- if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
- ret = __bch2_ec_stripe_head_reserve(trans, h);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ if (h->s->allocated)
+ goto allocated;
+
+ if (h->s->idx)
+ goto alloc_existing;
+#if 0
+ /* First, try to allocate a full stripe: */
+ ret = new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?:
+ __bch2_ec_stripe_head_reserve(trans, h);
+ if (!ret)
+ goto allocated;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, ENOMEM))
- if (ret && needs_stripe_new)
- ret = __bch2_ec_stripe_head_reuse(trans, h);
- if (ret) {
- bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret));
- goto err;
+ if (ret == -BCH_ERR_open_buckets_empty) {
+ /* don't want to reuse in this case */
-
- if (!h->s->allocated) {
- ret = new_stripe_alloc_buckets(trans, h, cl);
+#endif
+ /*
+ * Not enough buckets available for a full stripe: we must reuse an
+ * existing stripe:
+ */
+ while (1) {
+ ret = __bch2_ec_stripe_head_reuse(trans, h);
+ ret = __bch2_ec_stripe_head_reserve(trans, h);
+ if (!ret)
+ break;
+ pr_info("err %s", bch2_err_str(ret));
+ if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl)
+ ret = -BCH_ERR_stripe_alloc_blocked;
+ if (waiting || !cl)
- h->s->allocated = true;
+ /* XXX freelist_wait? */
+ closure_wait(&c->freelist_wait, cl);
+ waiting = true;
+ if (waiting)
+ closure_wake_up(&c->freelist_wait);
+alloc_existing:
+ /*
+ * Retry allocating buckets, with the reserve watermark for this
+ * particular write:
+ */
+ ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
+ if (ret)
+ goto err;
+allocated:
+ h->s->allocated = true;
+ BUG_ON(!h->s->idx);
+
BUG_ON(trans->restarted);
return h;
err:
BUG_ON(trans->restarted);
return h;
err:
unsigned target;
unsigned algo;
unsigned redundancy;
unsigned target;
unsigned algo;
unsigned redundancy;
+ enum alloc_reserve reserve;
struct bch_devs_mask devs;
unsigned nr_active_devs;
struct bch_devs_mask devs;
unsigned nr_active_devs;
void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *,
void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *,
- unsigned, unsigned, unsigned, bool, struct closure *);
+ unsigned, unsigned, unsigned,
+ enum alloc_reserve, struct closure *);
void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
+ x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \
x(BCH_ERR_invalid, invalid_sb) \
x(BCH_ERR_invalid_sb, invalid_sb_magic) \
x(BCH_ERR_invalid_sb, invalid_sb_version) \
x(BCH_ERR_invalid, invalid_sb) \
x(BCH_ERR_invalid_sb, invalid_sb_magic) \
x(BCH_ERR_invalid_sb, invalid_sb_version) \
- pub fn open(devices: &Vec<PathBuf>, opts: c::bch_opts) -> Result<Fs, bch_errcode> {
- let devices: Vec<_> = devices.iter()
- .map(|i| CString::new(i.as_os_str().as_bytes()).unwrap()).collect();
- let dev_c_strs: Vec<_> = devices.iter()
- .map(|i| { let p: *const i8 = i.as_ptr(); p })
+ pub fn open(devs: &Vec<PathBuf>, opts: c::bch_opts) -> Result<Fs, bch_errcode> {
+ let devs: Vec<_> = devs.iter()
+ .map(|i| CString::new(i.as_os_str().as_bytes()).unwrap().into_raw())
- let dev_c_strarray: *const *mut i8 = dev_c_strs[..].as_ptr() as *const *mut i8;
- let ret = unsafe { c::bch2_fs_open(dev_c_strarray, dev_c_strs.len() as u32, opts) };
+ let ret = unsafe { c::bch2_fs_open(devs[..].as_ptr(), devs.len() as u32, opts) };
errptr_to_result(ret).map(|fs| Fs { raw: fs})
}
errptr_to_result(ret).map(|fs| Fs { raw: fs})
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
let s = CString::new(s).unwrap();
fn from_str(s: &str) -> Result<Self, Self::Err> {
let s = CString::new(s).unwrap();
- let p: *const i8 = s.as_ptr();
let v = unsafe {c::match_string(c::bch2_btree_ids[..].as_ptr(), (-(1 as isize)) as usize, p)};
if v >= 0 {
let v = unsafe {c::match_string(c::bch2_btree_ids[..].as_ptr(), (-(1 as isize)) as usize, p)};
if v >= 0 {