-6d1f979bc5cd406925330864d50866b523fc4845
+cdf89ca564aa1916f16a58a06a395bfb3a86d302
POS(ca->dev_idx, ca->mi.first_bucket));
while (iter.pos.offset < ca->mi.nbuckets) {
- bch2_trans_cond_resched(&trans);
-
ret = bch2_alloc_write_key(&trans, &iter, flags);
if (ret) {
percpu_ref_put(&ca->ref);
if (!initial) {
if (max_stale > 64)
- bch2_btree_node_rewrite(&trans, &iter,
- b->data->keys.seq,
+ bch2_btree_node_rewrite(&trans, &iter, b,
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
else if (!bch2_btree_gc_rewrite_disabled &&
(bch2_btree_gc_always_rewrite || max_stale > 16))
bch2_btree_node_rewrite(&trans, &iter,
- b->data->keys.seq,
- BTREE_INSERT_NOWAIT|
+ b, BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
}
-
- bch2_trans_cond_resched(&trans);
}
bch2_trans_iter_exit(&trans, &iter);
static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *);
+/*
+ * Unlocks before scheduling
+ * Note: does not revalidate iterator
+ */
+static inline int bch2_trans_cond_resched(struct btree_trans *trans)
+{
+ if (need_resched() || race_fault()) {
+ bch2_trans_unlock(trans);
+ schedule();
+ return bch2_trans_relock(trans) ? 0 : -EINTR;
+ } else {
+ return 0;
+ }
+}
+
static inline int __btree_path_cmp(const struct btree_path *l,
enum btree_id r_btree_id,
bool r_cached,
unsigned depth_want = path->level;
int ret = 0;
+ if (unlikely(trans->restarted)) {
+ ret = -EINTR;
+ goto out;
+ }
+
/*
* Ensure we obey path->should_be_locked: if it's set, we can't unlock
* and re-traverse the path without a transaction restart:
struct btree_trans *trans = iter->trans;
struct btree_path *path = iter->path;
struct btree *b = NULL;
+ unsigned l;
int ret;
+ BUG_ON(trans->restarted);
EBUG_ON(iter->path->cached);
bch2_btree_iter_verify(iter);
- /* already got to end? */
+ /* already at end? */
if (!btree_path_node(path, path->level))
- goto out;
+ return NULL;
- btree_node_unlock(path, path->level);
- path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
- path->level++;
+ /* got to end? */
+ if (!btree_path_node(path, path->level + 1)) {
+ btree_node_unlock(path, path->level);
+ path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
+ path->level++;
+ return NULL;
+ }
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
- ret = bch2_btree_path_traverse(trans, path, iter->flags);
- if (ret)
+ if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
+ __bch2_btree_path_unlock(path);
+ path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+ path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+ btree_trans_restart(trans);
+ ret = -EINTR;
goto err;
+ }
- /* got to end? */
- b = btree_path_node(path, path->level);
- if (!b)
- goto out;
+ b = btree_path_node(path, path->level + 1);
- if (bpos_cmp(iter->pos, b->key.k.p) < 0) {
+ if (!bpos_cmp(iter->pos, b->key.k.p)) {
+ btree_node_unlock(path, path->level);
+ path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
+ path->level++;
+ } else {
/*
* Haven't gotten to the end of the parent node: go back down to
* the next child node
btree_path_set_pos(trans, path, bpos_successor(iter->pos),
iter->flags & BTREE_ITER_INTENT);
- /* Unlock to avoid screwing up our lock invariants: */
- btree_node_unlock(path, path->level);
-
path->level = iter->min_depth;
+
+ for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
+ if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
+ btree_node_unlock(path, l);
+
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bch2_btree_iter_verify(iter);
bch2_btree_iter_set_pos(iter, pos);
}
-/*
- * Unlocks before scheduling
- * Note: does not revalidate iterator
- */
-static inline int bch2_trans_cond_resched(struct btree_trans *trans)
-{
- if (need_resched() || race_fault()) {
- bch2_trans_unlock(trans);
- schedule();
- return bch2_trans_relock(trans) ? 0 : -EINTR;
- } else {
- return 0;
- }
-}
-
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *,
unsigned, struct bpos, unsigned);
struct bpos, struct bpos, u64 *);
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
- __le64, unsigned);
+ struct btree *, unsigned);
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
struct btree *, struct bkey_i *, bool);
*/
int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
- __le64 seq, unsigned flags)
+ struct btree *b,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
- struct btree *b, *n, *parent;
+ struct btree *n, *parent;
struct btree_update *as;
int ret;
flags |= BTREE_INSERT_NOFAIL;
-retry:
- ret = bch2_btree_iter_traverse(iter);
- if (ret)
- goto out;
-
- b = bch2_btree_iter_peek_node(iter);
- ret = PTR_ERR_OR_ZERO(b);
- if (ret)
- goto out;
-
- if (!b || b->data->keys.seq != seq)
- goto out;
parent = btree_node_parent(iter->path, b);
as = bch2_btree_update_start(trans, iter->path, b->c.level,
: 0) + 1,
flags);
ret = PTR_ERR_OR_ZERO(as);
- if (ret == -EINTR)
- goto retry;
if (ret) {
trace_btree_gc_rewrite_node_fail(c, b);
goto out;
__le64 seq;
};
+static int async_btree_node_rewrite_trans(struct btree_trans *trans,
+ struct async_btree_rewrite *a)
+{
+ struct btree_iter iter;
+ struct btree *b;
+ int ret;
+
+ bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos,
+ BTREE_MAX_DEPTH, a->level, 0);
+ b = bch2_btree_iter_peek_node(&iter);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto out;
+
+ if (!b || b->data->keys.seq != a->seq)
+ goto out;
+
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
+out :
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
void async_btree_node_rewrite_work(struct work_struct *work)
{
struct async_btree_rewrite *a =
container_of(work, struct async_btree_rewrite, work);
struct bch_fs *c = a->c;
- struct btree_trans trans;
- struct btree_iter iter;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_node_iter_init(&trans, &iter, a->btree_id, a->pos,
- BTREE_MAX_DEPTH, a->level, 0);
- bch2_btree_node_rewrite(&trans, &iter, a->seq, 0);
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_do(c, NULL, NULL, 0,
+ async_btree_node_rewrite_trans(&trans, a));
percpu_ref_put(&c->writes);
kfree(a);
}
if (bch2_bkey_maybe_mergable(&insert->k, k.k)) {
ret = check_pos_snapshot_overwritten(trans, btree_id, insert->k.p);
if (ret < 0)
- goto out;
+ goto err;
if (ret)
goto nomerge2;
ret = check_pos_snapshot_overwritten(trans, btree_id, k.k->p);
if (ret < 0)
- goto out;
+ goto err;
if (ret)
goto nomerge2;
BTREE_INSERT_NOFAIL);
if (ret)
break;
-
- bch2_trans_cond_resched(trans);
}
if (ret == -EINTR) {
}
static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p,
- u64 idx, unsigned flags, size_t *r_idx)
+ u64 *idx, unsigned flags, size_t r_idx)
{
struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
s64 ret = 0;
- while (*r_idx < c->reflink_gc_nr) {
- r = genradix_ptr(&c->reflink_gc_table, *r_idx);
- BUG_ON(!r);
-
- if (idx < r->offset)
- break;
- (*r_idx)++;
- }
+ if (r_idx >= c->reflink_gc_nr)
+ goto not_found;
- if (*r_idx >= c->reflink_gc_nr ||
- idx < r->offset - r->size) {
- ret = p.k->size;
+ r = genradix_ptr(&c->reflink_gc_table, r_idx);
+ if (*idx < r->offset - r->size)
goto not_found;
- }
BUG_ON((s64) r->refcount + add < 0);
r->refcount += add;
- return r->offset - idx;
+ *idx = r->offset;
+ return 0;
not_found:
- if ((flags & BTREE_TRIGGER_GC) &&
- (flags & BTREE_TRIGGER_NOATOMIC)) {
- /*
- * XXX: we're replacing the entire reflink pointer with an error
- * key, we should just be replacing the part that was missing:
- */
- if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu",
- p.k->p.inode, p.k->p.offset, p.k->size, idx)) {
- struct bkey_i_error *new;
-
- new = kmalloc(sizeof(*new), GFP_KERNEL);
- if (!new) {
- bch_err(c, "%s: error allocating new key", __func__);
- return -ENOMEM;
- }
+ *idx = U64_MAX;
+ ret = -EIO;
- bkey_init(&new->k);
- new->k.type = KEY_TYPE_error;
- new->k.p = p.k->p;
- new->k.size = p.k->size;
- ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new->k_i);
+ /*
+ * XXX: we're replacing the entire reflink pointer with an error
+ * key, we should just be replacing the part that was missing:
+ */
+ if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu",
+ p.k->p.inode, p.k->p.offset, p.k->size, *idx)) {
+ struct bkey_i_error *new;
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ bch_err(c, "%s: error allocating new key", __func__);
+ return -ENOMEM;
}
- } else {
- bch2_fs_inconsistent(c,
- "%llu:%llu len %u points to nonexistent indirect extent %llu",
- p.k->p.inode, p.k->p.offset, p.k->size, idx);
- bch2_inconsistent_error(c);
- ret = -EIO;
+
+ bkey_init(&new->k);
+ new->k.type = KEY_TYPE_error;
+ new->k.p = p.k->p;
+ new->k.size = p.k->size;
+ ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new->k_i);
}
fsck_err:
return ret;
struct reflink_gc *ref;
size_t l, r, m;
u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
- u64 sectors = (u64) le32_to_cpu(p.v->front_pad) +
- le32_to_cpu(p.v->back_pad) +
- p.k->size;
- s64 ret = 0;
+ u64 end_idx = le64_to_cpu(p.v->idx) + p.k->size +
+ le32_to_cpu(p.v->back_pad);
+ int ret = 0;
BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
(BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE));
r = m;
}
- while (sectors) {
- ret = __bch2_mark_reflink_p(c, p, idx, flags, &l);
- if (ret <= 0)
- return ret;
+ while (idx < end_idx && !ret)
+ ret = __bch2_mark_reflink_p(c, p, &idx, flags, l++);
- ret = min_t(s64, ret, sectors);
- idx += ret;
- sectors -= ret;
- }
-
- return 0;
+ return ret;
}
static int bch2_mark_key_locked(struct bch_fs *c,
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
- u64 idx, unsigned flags)
+ u64 *idx, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_i *n;
__le64 *refcount;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
- s64 ret;
+ int ret;
- bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, idx),
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, *idx),
BTREE_ITER_INTENT|
BTREE_ITER_WITH_UPDATES);
k = bch2_btree_iter_peek_slot(&iter);
if (!refcount) {
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
- p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ p.k->p.inode, p.k->p.offset, p.k->size, *idx);
ret = -EIO;
goto err;
}
if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
bch2_fs_inconsistent(c,
"%llu:%llu len %u idx %llu indirect extent refcount underflow",
- p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ p.k->p.inode, p.k->p.offset, p.k->size, *idx);
ret = -EIO;
goto err;
}
if (ret)
goto err;
- ret = k.k->p.offset - idx;
+ *idx = k.k->p.offset;
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
struct bkey_s_c k, unsigned flags)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
- u64 idx, sectors;
- s64 ret = 0;
+ u64 idx, end_idx;
+ int ret = 0;
if (flags & BTREE_TRIGGER_INSERT) {
struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
v->front_pad = v->back_pad = 0;
}
- idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
- sectors = (u64) le32_to_cpu(p.v->front_pad) +
- le32_to_cpu(p.v->back_pad) +
- p.k->size;
-
- while (sectors) {
- ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
- if (ret < 0)
- return ret;
+ idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+ end_idx = le64_to_cpu(p.v->idx) + p.k->size +
+ le32_to_cpu(p.v->back_pad);
- ret = min_t(s64, ret, sectors);
- idx += ret;
- sectors -= ret;
- }
+ while (idx < end_idx && !ret)
+ ret = __bch2_trans_mark_reflink_p(trans, p, &idx, flags);
- return 0;
+ return ret;
}
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
vfs_d_type(dirent.v->d_type)))
break;
ctx->pos = dirent.k->p.offset + 1;
+
+ /*
+ * read_target looks up subvolumes, we can overflow paths if the
+ * directory has many subvolumes in it
+ */
+ if (hweight64(trans.paths_allocated) > BTREE_ITER_MAX / 2) {
+ ret = -EINTR;
+ break;
+ }
}
bch2_trans_iter_exit(&trans, &iter);
err:
d.k->p.snapshot);
break;
}
-
- bch2_trans_cond_resched(&trans);
}
bch2_trans_iter_exit(&trans, &iter);
&stats->sectors_seen);
next_nondata:
bch2_btree_iter_advance(&iter);
- bch2_trans_cond_resched(&trans);
}
out:
BUG();
}
- ret = bch2_btree_node_rewrite(&trans, &iter,
- b->data->keys.seq, 0) ?: ret;
+ ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
+ if (ret == -EINTR)
+ continue;
+ if (ret)
+ break;
next:
- bch2_trans_cond_resched(&trans);
bch2_btree_iter_next_node(&iter);
}
if (ret == -EINTR)
if (ret)
bch_err(c, "error %i in bch2_move_btree", ret);
+ /* flush relevant btree updates */
+ closure_wait_event(&c->btree_interior_update_wait,
+ !bch2_btree_interior_updates_nr_pending(c));
+
progress_list_del(c, stats);
return ret;
}
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
rereplicate_btree_pred, c, stats) ?: ret;
-
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
-
ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c,
struct bch_dev *ca = NULL;
struct bch_sb_field_members *mi;
struct bch_member dev_mi;
+ struct bucket_array *buckets;
+ struct bucket *g;
unsigned dev_idx, nr_devices, u64s;
int ret;
bch2_dev_usage_journal_reserve(c);
+ /*
+ * Clear marks before marking transactionally in the btree, so that
+ * per-device accounting gets done correctly:
+ */
+ down_read(&ca->bucket_lock);
+ buckets = bucket_array(ca);
+ for_each_bucket(g, buckets)
+ atomic64_set(&g->_mark.v, 0);
+ up_read(&ca->bucket_lock);
+
err = "error marking superblock";
ret = bch2_trans_mark_dev_sb(c, ca);
if (ret)