/* Btree node freeing/allocation: */
static bool btree_key_matches(struct bch_fs *c,
- struct bkey_s_c_extent l,
- struct bkey_s_c_extent r)
+ struct bkey_s_c l,
+ struct bkey_s_c r)
{
+ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(l);
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(r);
const struct bch_extent_ptr *ptr1, *ptr2;
- extent_for_each_ptr(l, ptr1)
- extent_for_each_ptr(r, ptr2)
+ bkey_for_each_ptr(ptrs1, ptr1)
+ bkey_for_each_ptr(ptrs2, ptr2)
if (ptr1->dev == ptr2->dev &&
ptr1->gen == ptr2->gen &&
ptr1->offset == ptr2->offset)
{
struct bch_fs *c = as->c;
struct pending_btree_node_free *d;
- unsigned replicas;
-
- /*
- * btree_update lock is only needed here to avoid racing with
- * gc:
- */
- mutex_lock(&c->btree_interior_update_lock);
+ struct gc_pos pos = { 0 };
for (d = as->pending; d < as->pending + as->nr_pending; d++)
if (!bkey_cmp(k.k->p, d->key.k.p) &&
- btree_key_matches(c, bkey_s_c_to_extent(k),
- bkey_i_to_s_c_extent(&d->key)))
+ btree_key_matches(c, k, bkey_i_to_s_c(&d->key)))
goto found;
BUG();
found:
BUG_ON(d->index_update_done);
d->index_update_done = true;
- /*
- * Btree nodes are accounted as freed in bch_alloc_stats when they're
- * freed from the index:
- */
- replicas = bch2_extent_nr_dirty_ptrs(k);
- if (replicas)
- stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -=
- c->opts.btree_node_size * replicas;
-
/*
* We're dropping @k from the btree, but it's still live until the
* index update is persistent so we need to keep a reference around for
* bch2_mark_key() compares the current gc pos to the pos we're
* moving this reference from, hence one comparison here:
*/
- if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
- struct bch_fs_usage tmp = { 0 };
-
- bch2_mark_key(c, BKEY_TYPE_BTREE,
+ if (gc_pos_cmp(c->gc_pos, b
+ ? gc_pos_btree_node(b)
+ : gc_pos_btree_root(as->btree_id)) >= 0 &&
+ gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
+ bch2_mark_key_locked(c,
bkey_i_to_s_c(&d->key),
- false, 0, b
- ? gc_pos_btree_node(b)
- : gc_pos_btree_root(as->btree_id),
- &tmp, 0, 0);
- /*
- * Don't apply tmp - pending deletes aren't tracked in
- * bch_alloc_stats:
- */
- }
-
- mutex_unlock(&c->btree_interior_update_lock);
+ false, 0, pos,
+ NULL, 0, BCH_BUCKET_MARK_GC);
}
static void __btree_node_free(struct bch_fs *c, struct btree *b)
void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
+ struct btree_iter *linked;
+
+ for_each_btree_iter(iter, linked)
+ BUG_ON(linked->l[b->level].b == b);
+
/*
* Is this a node that isn't reachable on disk yet?
*
*/
btree_update_drop_new_node(c, b);
- __bch2_btree_node_lock_write(b, iter);
+ six_lock_write(&b->lock);
__btree_node_free(c, b);
six_unlock_write(&b->lock);
-
- bch2_btree_iter_node_drop(iter, b);
+ six_unlock_intent(&b->lock);
}
static void bch2_btree_node_free_ondisk(struct bch_fs *c,
struct pending_btree_node_free *pending)
{
- struct bch_fs_usage stats = { 0 };
-
BUG_ON(!pending->index_update_done);
- bch2_mark_key(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(&pending->key),
+ bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
false, 0,
gc_phase(GC_PHASE_PENDING_DELETE),
- &stats, 0, 0);
- /*
- * Don't apply stats - pending deletes aren't tracked in
- * bch_alloc_stats:
- */
+ NULL, 0, 0);
}
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
struct write_point *wp;
struct btree *b;
BKEY_PADDED(k) tmp;
- struct bkey_i_extent *e;
struct open_buckets ob = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
unsigned nr_reserve;
mutex_unlock(&c->btree_reserve_cache_lock);
retry:
- wp = bch2_alloc_sectors_start(c, c->opts.foreground_target,
+ wp = bch2_alloc_sectors_start(c, c->opts.foreground_target, 0,
writepoint_ptr(&c->btree_write_point),
&devs_have,
res->nr_replicas,
goto retry;
}
- e = bkey_extent_init(&tmp.k);
- bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size);
+ bkey_btree_ptr_init(&tmp.k);
+ bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
bch2_open_bucket_get(c, wp, &ob);
bch2_alloc_sectors_done(c, wp);
set_btree_node_accessed(b);
set_btree_node_dirty(b);
+ set_btree_node_need_write(b);
bch2_bset_init_first(b, &b->data->keys);
memset(&b->nr, 0, sizeof(b->nr));
b->data->flags = 0;
SET_BTREE_NODE_ID(b->data, as->btree_id);
SET_BTREE_NODE_LEVEL(b->data, level);
- b->data->ptr = bkey_i_to_extent(&b->key)->v.start->ptr;
+ b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
bch2_btree_build_aux_trees(b);
goto err_free;
}
- ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(&b->key));
+ ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
if (ret)
goto err_free;
closure_debug_destroy(&as->cl);
mempool_free(as, &c->btree_interior_update_pool);
- percpu_ref_put(&c->writes);
closure_wake_up(&c->btree_interior_update_wait);
mutex_unlock(&c->btree_interior_update_lock);
int ret;
ret = bch2_journal_open_seq_async(&c->journal, as->journal_seq, cl);
- if (ret < 0)
- goto err;
- if (!ret) {
+ if (ret == -EAGAIN) {
continue_at(cl, btree_update_wait_on_journal, system_wq);
return;
}
+ if (ret < 0)
+ goto err;
bch2_journal_flush_seq_async(&c->journal, as->journal_seq, cl);
err:
closure_wait(&btree_current_write(b)->wait, cl);
list_del(&as->write_blocked_list);
+
+ /*
+ * for flush_held_btree_writes() waiting on updates to flush or
+ * nodes to be writeable:
+ */
+ closure_wake_up(&c->btree_interior_update_wait);
mutex_unlock(&c->btree_interior_update_lock);
/*
list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
list_del(&p->write_blocked_list);
btree_update_reparent(as, p);
+
+ /*
+ * for flush_held_btree_writes() waiting on updates to flush or
+ * nodes to be writeable:
+ */
+ closure_wake_up(&c->btree_interior_update_wait);
}
clear_btree_node_dirty(b);
struct btree_reserve *reserve;
struct btree_update *as;
- if (unlikely(!percpu_ref_tryget(&c->writes)))
- return ERR_PTR(-EROFS);
-
reserve = bch2_btree_reserve_get(c, nr_nodes, flags, cl);
- if (IS_ERR(reserve)) {
- percpu_ref_put(&c->writes);
+ if (IS_ERR(reserve))
return ERR_CAST(reserve);
- }
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
memset(as, 0, sizeof(*as));
{
struct bch_fs *c = as->c;
struct btree *old = btree_node_root(c, b);
- struct bch_fs_usage stats = { 0 };
+ struct bch_fs_usage *fs_usage;
__bch2_btree_set_root_inmem(c, b);
- bch2_mark_key(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(&b->key),
+ mutex_lock(&c->btree_interior_update_lock);
+ percpu_down_read_preempt_disable(&c->mark_lock);
+ fs_usage = bch2_fs_usage_get_scratch(c);
+
+ bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0,
gc_pos_btree_root(b->btree_id),
- &stats, 0, 0);
+ fs_usage, 0, 0);
if (old && !btree_node_fake(old))
bch2_btree_node_free_index(as, NULL,
bkey_i_to_s_c(&old->key),
- &stats);
- bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
+ fs_usage);
+ bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
gc_pos_btree_root(b->btree_id));
+
+ percpu_up_read_preempt_enable(&c->mark_lock);
+ mutex_unlock(&c->btree_interior_update_lock);
}
static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b, int rw)
struct btree_node_iter *node_iter)
{
struct bch_fs *c = as->c;
- struct bch_fs_usage stats = { 0 };
+ struct bch_fs_usage *fs_usage;
struct bkey_packed *k;
struct bkey tmp;
BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
- if (bkey_extent_is_data(&insert->k))
- bch2_mark_key(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(insert),
- true, 0,
- gc_pos_btree_node(b), &stats, 0, 0);
+ mutex_lock(&c->btree_interior_update_lock);
+ percpu_down_read_preempt_disable(&c->mark_lock);
+ fs_usage = bch2_fs_usage_get_scratch(c);
+
+ bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
+ true, 0,
+ gc_pos_btree_node(b), fs_usage, 0, 0);
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
if (k && !bkey_cmp_packed(b, k, &insert->k))
bch2_btree_node_free_index(as, b,
bkey_disassemble(b, k, &tmp),
- &stats);
+ fs_usage);
- bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
+ bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
gc_pos_btree_node(b));
+ percpu_up_read_preempt_enable(&c->mark_lock);
+ mutex_unlock(&c->btree_interior_update_lock);
+
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
set_btree_node_dirty(b);
set_btree_node_need_write(b);
if (n3)
bch2_open_buckets_put(c, &n3->ob);
- /*
- * Note - at this point other linked iterators could still have @b read
- * locked; we're depending on the bch2_btree_iter_node_replace() calls
- * below removing all references to @b so we don't return with other
- * iterators pointing to a node they have locked that's been freed.
- *
- * We have to free the node first because the bch2_iter_node_replace()
- * calls will drop _our_ iterator's reference - and intent lock - to @b.
- */
- bch2_btree_node_free_inmem(c, b, iter);
-
/* Successful split, update the iterator to point to the new nodes: */
+ bch2_btree_iter_node_drop(iter, b);
if (n3)
bch2_btree_iter_node_replace(iter, n3);
if (n2)
bch2_btree_iter_node_replace(iter, n2);
bch2_btree_iter_node_replace(iter, n1);
+ bch2_btree_node_free_inmem(c, b, iter);
+
+ bch2_btree_iter_verify_locks(iter);
+
bch2_time_stats_update(&c->times[BCH_TIME_btree_split], start_time);
}
bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
bch2_open_buckets_put(c, &n->ob);
- bch2_btree_node_free_inmem(c, b, iter);
- bch2_btree_node_free_inmem(c, m, iter);
+
+ bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_replace(iter, n);
bch2_btree_iter_verify(iter, n);
+ bch2_btree_node_free_inmem(c, b, iter);
+ bch2_btree_node_free_inmem(c, m, iter);
+
bch2_btree_update_done(as);
- six_unlock_intent(&m->lock);
up_read(&c->gc_lock);
out:
+ bch2_btree_iter_verify_locks(iter);
+
/*
* Don't downgrade locks here: we're called after successful insert,
* and the caller will downgrade locks after a successful insert
bch2_open_buckets_put(c, &n->ob);
- bch2_btree_node_free_inmem(c, b, iter);
-
+ bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_replace(iter, n);
+ bch2_btree_node_free_inmem(c, b, iter);
bch2_btree_update_done(as);
return 0;
struct btree_update *as,
struct btree_iter *iter,
struct btree *b, struct btree *new_hash,
- struct bkey_i_extent *new_key)
+ struct bkey_i_btree_ptr *new_key)
{
struct btree *parent;
int ret;
btree_interior_update_add_node_reference(as, b);
+ /*
+ * XXX: the rest of the update path treats this like we're actually
+ * inserting a new node and deleting the existing node, so the
+ * reservation needs to include enough space for @b
+ *
+ * that is actually sketch as fuck though and I am surprised the code
+ * seems to work like that, definitely need to go back and rework it
+ * into something saner.
+ *
+ * (I think @b is just getting double counted until the btree update
+ * finishes and "deletes" @b on disk)
+ */
+ ret = bch2_disk_reservation_add(c, &as->reserve->disk_res,
+ c->opts.btree_node_size *
+ bch2_bkey_nr_ptrs(bkey_i_to_s_c(&new_key->k_i)),
+ BCH_DISK_RESERVATION_NOFAIL|
+ BCH_DISK_RESERVATION_GC_LOCK_HELD);
+ BUG_ON(ret);
+
parent = btree_node_parent(iter, b);
if (parent) {
if (new_hash) {
bkey_copy(&b->key, &new_key->k_i);
}
} else {
- struct bch_fs_usage stats = { 0 };
+ struct bch_fs_usage *fs_usage;
BUG_ON(btree_node_root(c, b) != b);
bch2_btree_node_lock_write(b, iter);
- bch2_mark_key(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(&new_key->k_i),
+ mutex_lock(&c->btree_interior_update_lock);
+ percpu_down_read_preempt_disable(&c->mark_lock);
+ fs_usage = bch2_fs_usage_get_scratch(c);
+
+ bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0,
gc_pos_btree_root(b->btree_id),
- &stats, 0, 0);
+ fs_usage, 0, 0);
bch2_btree_node_free_index(as, NULL,
bkey_i_to_s_c(&b->key),
- &stats);
- bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
+ fs_usage);
+ bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
gc_pos_btree_root(b->btree_id));
+ percpu_up_read_preempt_enable(&c->mark_lock);
+ mutex_unlock(&c->btree_interior_update_lock);
+
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, b);
}
int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
- struct btree *b, struct bkey_i_extent *new_key)
+ struct btree *b,
+ struct bkey_i_btree_ptr *new_key)
{
struct btree *parent = btree_node_parent(iter, b);
struct btree_update *as = NULL;
goto err;
}
- ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
- extent_i_to_s_c(new_key).s_c);
+ ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&new_key->k_i));
if (ret)
goto err_free_update;
BUG_ON(btree_node_root(c, b));
__bch2_btree_set_root_inmem(c, b);
- bch2_btree_set_root_ondisk(c, b, READ);
}
void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
b->level = 0;
b->btree_id = id;
- bkey_extent_init(&b->key);
+ bkey_btree_ptr_init(&b->key);
b->key.k.p = POS_MAX;
- bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id;
+ PTR_HASH(&b->key) = U64_MAX - id;
bch2_bset_init_first(b, &b->data->keys);
bch2_btree_build_aux_trees(b);
ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf)
{
- char *out = buf, *end = buf + PAGE_SIZE;
+ struct printbuf out = _PBUF(buf, PAGE_SIZE);
struct btree_update *as;
mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
- out += scnprintf(out, end - out, "%p m %u w %u r %u j %llu\n",
- as,
- as->mode,
- as->nodes_written,
- atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
- as->journal.seq);
+ pr_buf(&out, "%p m %u w %u r %u j %llu\n",
+ as,
+ as->mode,
+ as->nodes_written,
+ atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
+ as->journal.seq);
mutex_unlock(&c->btree_interior_update_lock);
- return out - buf;
+ return out.pos - buf;
}
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)