static enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *,
struct bkey_i *, struct bkey_i *);
-static void sort_key_next(struct btree_node_iter *iter,
+static void sort_key_next(struct btree_node_iter_large *iter,
struct btree *b,
struct btree_node_iter_set *i)
{
?: (l).k - (r).k; \
})
-static inline bool should_drop_next_key(struct btree_node_iter *iter,
+static inline bool should_drop_next_key(struct btree_node_iter_large *iter,
struct btree *b)
{
struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
}
struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
- struct btree *b,
- struct btree_node_iter *iter)
+ struct btree *b,
+ struct btree_node_iter_large *iter)
{
struct bkey_packed *out = dst->start;
struct btree_nr_keys nr;
heap_resort(iter, key_sort_cmp);
- while (!bch2_btree_node_iter_end(iter)) {
+ while (!bch2_btree_node_iter_large_end(iter)) {
if (!should_drop_next_key(iter, b)) {
struct bkey_packed *k =
__btree_node_offset_to_key(b, iter->data->k);
return NULL;
}
+bool bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
+{
+ struct bch_extent_ptr *ptr;
+ bool dropped = false;
+
+ extent_for_each_ptr_backwards(e, ptr)
+ if (ptr->dev == dev) {
+ __bch2_extent_drop_ptr(e, ptr);
+ dropped = true;
+ }
+
+ if (dropped)
+ bch2_extent_drop_redundant_crcs(e);
+ return dropped;
+}
+
+const struct bch_extent_ptr *
+bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group)
+{
+ const struct bch_extent_ptr *ptr;
+
+ extent_for_each_ptr(e, ptr) {
+ struct bch_dev *ca = c->devs[ptr->dev];
+
+ if (ca->mi.group &&
+ ca->mi.group - 1 == group)
+ return ptr;
+ }
+
+ return NULL;
+}
+
+const struct bch_extent_ptr *
+bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target)
+{
+ const struct bch_extent_ptr *ptr;
+
+ extent_for_each_ptr(e, ptr)
+ if (dev_in_target(c->devs[ptr->dev], target))
+ return ptr;
+
+ return NULL;
+}
+
unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent e)
{
const struct bch_extent_ptr *ptr;
return nr_ptrs;
}
-unsigned bch2_extent_nr_good_ptrs(struct bch_fs *c, struct bkey_s_c_extent e)
+unsigned bch2_extent_ptr_durability(struct bch_fs *c,
+ const struct bch_extent_ptr *ptr)
+{
+ struct bch_dev *ca;
+
+ if (ptr->cached)
+ return 0;
+
+ ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
+ return 0;
+
+ return ca->mi.durability;
+}
+
+unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
{
const struct bch_extent_ptr *ptr;
- unsigned nr_ptrs = 0;
+ unsigned durability = 0;
extent_for_each_ptr(e, ptr)
- nr_ptrs += (!ptr->cached &&
- bch_dev_bkey_exists(c, ptr->dev)->mi.state !=
- BCH_MEMBER_STATE_FAILED);
+ durability += bch2_extent_ptr_durability(c, ptr);
- return nr_ptrs;
+ return durability;
}
unsigned bch2_extent_is_compressed(struct bkey_s_c k)
bch2_extent_drop_redundant_crcs(e);
}
-void bch2_extent_drop_ptr_idx(struct bkey_s_extent e, unsigned idx)
-{
- struct bch_extent_ptr *ptr;
- unsigned i = 0;
-
- extent_for_each_ptr(e, ptr)
- if (i++ == idx)
- goto found;
-
- BUG();
-found:
- bch2_extent_drop_ptr(e, ptr);
-}
-
static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
struct bch_extent_crc_unpacked n)
{
unsigned seq;
const char *err;
char buf[160];
- struct bucket *g;
+ struct bucket_mark mark;
struct bch_dev *ca;
unsigned replicas = 0;
bool bad;
extent_for_each_ptr(e, ptr) {
ca = bch_dev_bkey_exists(c, ptr->dev);
- g = PTR_BUCKET(ca, ptr);
replicas++;
if (!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags))
do {
seq = read_seqcount_begin(&c->gc_pos_lock);
+ mark = ptr_bucket_mark(ca, ptr);
+
bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
- (g->mark.data_type != BCH_DATA_BTREE ||
- g->mark.dirty_sectors < c->opts.btree_node_size);
+ (mark.data_type != BCH_DATA_BTREE ||
+ mark.dirty_sectors < c->opts.btree_node_size);
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
err = "inconsistent";
goto err;
}
- if (!bch2_sb_has_replicas(c, e, BCH_DATA_BTREE)) {
+ if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), k);
bch2_fs_bug(c,
err:
bch2_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
bch2_fs_bug(c, "%s btree pointer %s: bucket %zi "
- "gen %i last_gc %i mark %08x",
+ "gen %i mark %08x",
err, buf, PTR_BUCKET_NR(ca, ptr),
- PTR_BUCKET(ca, ptr)->mark.gen,
- ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
- (unsigned) g->mark.counter);
+ mark.gen, (unsigned) mark.counter);
}
static void bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
bkey_start_pos(&_ur)) ?: (r).k - (l).k; \
})
-static inline void extent_sort_sift(struct btree_node_iter *iter,
+static inline void extent_sort_sift(struct btree_node_iter_large *iter,
struct btree *b, size_t i)
{
heap_sift_down(iter, i, extent_sort_cmp);
}
-static inline void extent_sort_next(struct btree_node_iter *iter,
+static inline void extent_sort_next(struct btree_node_iter_large *iter,
struct btree *b,
struct btree_node_iter_set *i)
{
struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct bset *dst,
struct btree *b,
- struct btree_node_iter *iter)
+ struct btree_node_iter_large *iter)
{
struct bkey_format *f = &b->format;
struct btree_node_iter_set *_l = iter->data, *_r;
heap_resort(iter, extent_sort_cmp);
- while (!bch2_btree_node_iter_end(iter)) {
+ while (!bch2_btree_node_iter_large_end(iter)) {
lk = __btree_node_offset_to_key(b, _l->k);
if (iter->used == 1) {
};
static void bch2_add_sectors(struct extent_insert_state *s,
- struct bkey_s_c k, u64 offset, s64 sectors)
+ struct bkey_s_c k, u64 offset, s64 sectors)
{
struct bch_fs *c = s->trans->c;
- struct btree *b = s->insert->iter->nodes[0];
+ struct btree *b = s->insert->iter->l[0].b;
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0);
static enum btree_insert_ret
extent_insert_should_stop(struct extent_insert_state *s)
{
- struct btree *b = s->insert->iter->nodes[0];
+ struct btree *b = s->insert->iter->l[0].b;
/*
* Check if we have sufficient space in both the btree node and the
static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert)
{
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
- struct bset_tree *t = bset_tree_last(b);
+ struct btree_iter_level *l = &iter->l[0];
+ struct bset_tree *t = bset_tree_last(l->b);
struct bkey_packed *where =
- bch2_btree_node_iter_bset_pos(node_iter, b, t);
- struct bkey_packed *prev = bch2_bkey_prev(b, t, where);
+ bch2_btree_node_iter_bset_pos(&l->iter, l->b, t);
+ struct bkey_packed *prev = bch2_bkey_prev(l->b, t, where);
struct bkey_packed *next_live_key = where;
unsigned clobber_u64s;
if (prev)
where = bkey_next(prev);
- while (next_live_key != btree_bkey_last(b, t) &&
+ while (next_live_key != btree_bkey_last(l->b, t) &&
bkey_deleted(next_live_key))
next_live_key = bkey_next(next_live_key);
bch2_extent_merge_inline(c, iter, prev, bkey_to_packed(insert), true))
goto drop_deleted_keys;
- if (next_live_key != btree_bkey_last(b, t) &&
+ if (next_live_key != btree_bkey_last(l->b, t) &&
bch2_extent_merge_inline(c, iter, bkey_to_packed(insert),
next_live_key, false))
goto drop_deleted_keys;
- bch2_bset_insert(b, node_iter, where, insert, clobber_u64s);
- bch2_btree_node_iter_fix(iter, b, node_iter, t, where,
+ bch2_bset_insert(l->b, &l->iter, where, insert, clobber_u64s);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where,
clobber_u64s, where->u64s);
return;
drop_deleted_keys:
- bch2_bset_delete(b, where, clobber_u64s);
- bch2_btree_node_iter_fix(iter, b, node_iter, t, where, clobber_u64s, 0);
+ bch2_bset_delete(l->b, where, clobber_u64s);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
+ where, clobber_u64s, 0);
}
static void extent_insert_committed(struct extent_insert_state *s)
}
if (debug_check_bkeys(c))
- bch2_bkey_debugcheck(c, iter->nodes[iter->level],
- bkey_i_to_s_c(&split.k));
+ bch2_bkey_debugcheck(c, iter->l[0].b, bkey_i_to_s_c(&split.k));
bch2_btree_journal_key(s->trans, iter, &split.k);
static enum btree_insert_ret
extent_insert_advance_pos(struct extent_insert_state *s, struct bkey_s_c k)
{
- struct btree *b = s->insert->iter->nodes[0];
+ struct btree *b = s->insert->iter->l[0].b;
struct bpos next_pos = bpos_min(s->insert->k->k.p,
k.k ? k.k->p : b->key.k.p);
enum btree_insert_ret ret;
switch (bch2_disk_reservation_add(c,
s->trans->disk_res,
- sectors, flags)) {
+ sectors * bch2_extent_nr_dirty_ptrs(k),
+ flags)) {
case 0:
break;
case -ENOSPC:
{
struct bch_fs *c = s->trans->c;
struct btree_iter *iter = s->insert->iter;
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct btree_iter_level *l = &iter->l[0];
+ struct btree *b = l->b;
+ struct btree_node_iter *node_iter = &l->iter;
enum btree_insert_ret ret;
switch (overlap) {
{
struct bch_fs *c = s->trans->c;
struct btree_iter *iter = s->insert->iter;
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct btree_iter_level *l = &iter->l[0];
+ struct btree *b = l->b;
+ struct btree_node_iter *node_iter = &l->iter;
struct bkey_packed *_k;
struct bkey unpacked;
struct bkey_i *insert = s->insert->k;
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct btree_iter_level *l = &iter->l[0];
+ struct btree *b = l->b;
+ struct btree_node_iter *node_iter = &l->iter;
struct bkey_packed *_k;
struct bkey unpacked;
enum btree_insert_ret ret = BTREE_INSERT_OK;
{
const struct bch_extent_ptr *ptr;
struct bch_dev *ca;
- struct bucket *g;
struct bucket_mark mark;
unsigned seq, stale;
char buf[160];
bool bad;
- unsigned ptrs_per_tier[BCH_TIER_MAX];
unsigned replicas = 0;
/*
* going to get overwritten during replay)
*/
- memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
-
extent_for_each_ptr(e, ptr) {
ca = bch_dev_bkey_exists(c, ptr->dev);
- g = PTR_BUCKET(ca, ptr);
replicas++;
- ptrs_per_tier[ca->mi.tier]++;
/*
* If journal replay hasn't finished, we might be seeing keys
do {
seq = read_seqcount_begin(&c->gc_pos_lock);
- mark = READ_ONCE(g->mark);
+ mark = ptr_bucket_mark(ca, ptr);
/* between mark and bucket gen */
smp_rmb();
}
if (!bkey_extent_is_cached(e.k) &&
- !bch2_sb_has_replicas(c, e, BCH_DATA_USER)) {
+ !bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), e.s_c);
bch2_fs_bug(c,
bch2_bkey_val_to_text(c, btree_node_type(b), buf,
sizeof(buf), e.s_c);
bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu "
- "gen %i last_gc %i type %u",
- buf, PTR_BUCKET_NR(ca, ptr), mark.gen,
- ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
- mark.data_type);
+ "gen %i type %u", buf,
+ PTR_BUCKET_NR(ca, ptr), mark.gen, mark.data_type);
return;
}
#undef p
}
-static unsigned PTR_TIER(struct bch_fs *c,
- const struct bch_extent_ptr *ptr)
-{
- return bch_dev_bkey_exists(c, ptr->dev)->mi.tier;
-}
-
static void bch2_extent_crc_init(union bch_extent_crc *crc,
struct bch_extent_crc_unpacked new)
{
}
void bch2_extent_mark_replicas_cached(struct bch_fs *c,
- struct bkey_s_extent e)
+ struct bkey_s_extent e,
+ unsigned nr_desired_replicas,
+ unsigned target)
{
struct bch_extent_ptr *ptr;
- unsigned tier = 0, nr_cached = 0;
- unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c);
- bool have_higher_tier;
+ int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas;
- if (nr_good <= c->opts.data_replicas)
+ if (extra <= 0)
return;
- nr_cached = nr_good - c->opts.data_replicas;
+ extent_for_each_ptr(e, ptr) {
+ int n = bch2_extent_ptr_durability(c, ptr);
- do {
- have_higher_tier = false;
+ if (n && n <= extra &&
+ !dev_in_target(c->devs[ptr->dev], target)) {
+ ptr->cached = true;
+ extra -= n;
+ }
+ }
- extent_for_each_ptr(e, ptr) {
- if (!ptr->cached &&
- PTR_TIER(c, ptr) == tier) {
- ptr->cached = true;
- nr_cached--;
- if (!nr_cached)
- return;
- }
+ extent_for_each_ptr(e, ptr) {
+ int n = bch2_extent_ptr_durability(c, ptr);
- if (PTR_TIER(c, ptr) > tier)
- have_higher_tier = true;
+ if (n && n <= extra) {
+ ptr->cached = true;
+ extra -= n;
}
-
- tier++;
- } while (have_higher_tier);
+ }
}
/*
- * This picks a non-stale pointer, preferabbly from a device other than
- * avoid. Avoid can be NULL, meaning pick any. If there are no non-stale
- * pointers to other devices, it will still pick a pointer from avoid.
- * Note that it prefers lowered-numbered pointers to higher-numbered pointers
- * as the pointers are sorted by tier, hence preferring pointers to tier 0
- * rather than pointers to tier 1.
+ * This picks a non-stale pointer, preferably from a device other than @avoid.
+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
+ * other devices, it will still pick a pointer from avoid.
*/
void bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
struct bch_devs_mask *avoid,
struct bkey_packed *k, struct bkey uk,
bool check, bool could_pack)
{
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct btree_iter_level *l = &iter->l[0];
BUG_ON(!bkey_deleted(k));
return !bkey_packed(k) || could_pack;
} else {
uk.p = new_pos;
- extent_save(b, node_iter, k, &uk);
- bch2_bset_fix_invalidated_key(b, t, k);
- bch2_btree_node_iter_fix(iter, b, node_iter, t,
- k, k->u64s, k->u64s);
+ extent_save(l->b, &l->iter, k, &uk);
+ bch2_bset_fix_invalidated_key(l->b, t, k);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
+ k, k->u64s, k->u64s);
return true;
}
}
static bool extent_merge_do_overlapping(struct btree_iter *iter,
struct bkey *m, bool back_merge)
{
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct btree_iter_level *l = &iter->l[0];
+ struct btree *b = l->b;
+ struct btree_node_iter *node_iter = &l->iter;
struct bset_tree *t;
struct bkey_packed *k;
struct bkey uk;
struct bkey_packed *r,
bool back_merge)
{
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
+ struct btree *b = iter->l[0].b;
+ struct btree_node_iter *node_iter = &iter->l[0].iter;
const struct bkey_format *f = &b->format;
struct bset_tree *t = bset_tree_last(b);
struct bkey_packed *m;
if (back_merge)
bch2_btree_iter_set_pos_same_leaf(iter, li.k.k.p);
- bch2_btree_node_iter_fix(iter, iter->nodes[0], node_iter,
- t, m, m->u64s, m->u64s);
+ bch2_btree_node_iter_fix(iter, b, node_iter,
+ t, m, m->u64s, m->u64s);
if (!back_merge)
bkey_copy(packed_to_bkey(l), &li.k);
extent_i_save(b, m, &li.k);
bch2_bset_fix_invalidated_key(b, t, m);
- bch2_btree_node_iter_fix(iter, iter->nodes[0], node_iter,
- t, m, m->u64s, m->u64s);
+ bch2_btree_node_iter_fix(iter, b, node_iter,
+ t, m, m->u64s, m->u64s);
return true;
default:
BUG();
end.offset += size;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, pos,
- BTREE_ITER_WITH_HOLES, k) {
+ BTREE_ITER_SLOTS, k) {
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break;