static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
struct disk_reservation *res,
struct closure *cl,
+ bool interior_node,
unsigned flags)
{
struct write_point *wp;
if (flags & BTREE_INSERT_USE_RESERVE) {
nr_reserve = 0;
- alloc_reserve = RESERVE_BTREE_MOVINGGC;
+ alloc_reserve = RESERVE_btree_movinggc;
} else {
nr_reserve = BTREE_NODE_RESERVE;
- alloc_reserve = RESERVE_BTREE;
+ alloc_reserve = RESERVE_btree;
}
mutex_lock(&c->btree_reserve_cache_lock);
bch2_open_bucket_get(c, wp, &ob);
bch2_alloc_sectors_done(c, wp);
mem_alloc:
- b = bch2_btree_node_mem_alloc(c);
+ b = bch2_btree_node_mem_alloc(c, interior_node);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
{
struct bch_fs *c = as->c;
struct btree *b;
+ struct prealloc_nodes *p = &as->prealloc_nodes[!!level];
int ret;
BUG_ON(level >= BTREE_MAX_DEPTH);
- BUG_ON(!as->nr_prealloc_nodes);
+ BUG_ON(!p->nr);
- b = as->prealloc_nodes[--as->nr_prealloc_nodes];
+ b = p->b[--p->nr];
six_lock_intent(&b->c.lock, NULL, NULL);
six_lock_write(&b->c.lock, NULL, NULL);
static void bch2_btree_reserve_put(struct btree_update *as)
{
struct bch_fs *c = as->c;
+ struct prealloc_nodes *p;
- mutex_lock(&c->btree_reserve_cache_lock);
+ for (p = as->prealloc_nodes;
+ p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes);
+ p++) {
+ while (p->nr) {
+ struct btree *b = p->b[--p->nr];
- while (as->nr_prealloc_nodes) {
- struct btree *b = as->prealloc_nodes[--as->nr_prealloc_nodes];
+ mutex_lock(&c->btree_reserve_cache_lock);
- six_lock_intent(&b->c.lock, NULL, NULL);
- six_lock_write(&b->c.lock, NULL, NULL);
+ if (c->btree_reserve_cache_nr <
+ ARRAY_SIZE(c->btree_reserve_cache)) {
+ struct btree_alloc *a =
+ &c->btree_reserve_cache[c->btree_reserve_cache_nr++];
- if (c->btree_reserve_cache_nr <
- ARRAY_SIZE(c->btree_reserve_cache)) {
- struct btree_alloc *a =
- &c->btree_reserve_cache[c->btree_reserve_cache_nr++];
+ a->ob = b->ob;
+ b->ob.nr = 0;
+ bkey_copy(&a->k, &b->key);
+ } else {
+ bch2_open_buckets_put(c, &b->ob);
+ }
- a->ob = b->ob;
- b->ob.nr = 0;
- bkey_copy(&a->k, &b->key);
- } else {
- bch2_open_buckets_put(c, &b->ob);
- }
+ mutex_unlock(&c->btree_reserve_cache_lock);
- __btree_node_free(c, b);
- six_unlock_write(&b->c.lock);
- six_unlock_intent(&b->c.lock);
+ six_lock_intent(&b->c.lock, NULL, NULL);
+ six_lock_write(&b->c.lock, NULL, NULL);
+ __btree_node_free(c, b);
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ }
}
-
- mutex_unlock(&c->btree_reserve_cache_lock);
}
-static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
+static int bch2_btree_reserve_get(struct btree_update *as,
+ unsigned nr_nodes[2],
unsigned flags)
{
struct bch_fs *c = as->c;
struct closure cl;
struct btree *b;
+ unsigned interior;
int ret;
closure_init_stack(&cl);
retry:
- BUG_ON(nr_nodes > BTREE_RESERVE_MAX);
+ BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
/*
* Protects reaping from the btree node cache and using the btree node
if (ret)
goto err;
- while (as->nr_prealloc_nodes < nr_nodes) {
- b = __bch2_btree_node_alloc(c, &as->disk_res,
- flags & BTREE_INSERT_NOWAIT
- ? NULL : &cl, flags);
- if (IS_ERR(b)) {
- ret = PTR_ERR(b);
- goto err;
- }
+ for (interior = 0; interior < 2; interior++) {
+ struct prealloc_nodes *p = as->prealloc_nodes + interior;
+
+ while (p->nr < nr_nodes[interior]) {
+ b = __bch2_btree_node_alloc(c, &as->disk_res,
+ flags & BTREE_INSERT_NOWAIT
+ ? NULL : &cl,
+ interior, flags);
+ if (IS_ERR(b)) {
+ ret = PTR_ERR(b);
+ goto err;
+ }
- as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
+ p->b[p->nr++] = b;
+ }
}
bch2_btree_cache_cannibalize_unlock(c);
if (ret == -EAGAIN)
goto retry;
- trace_btree_reserve_get_fail(c, nr_nodes, &cl);
+ trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl);
return ret;
}
mutex_unlock(&c->btree_interior_update_lock);
}
-static void btree_update_will_delete_key(struct btree_update *as,
- struct bkey_i *k)
+static void btree_update_add_key(struct btree_update *as,
+ struct keylist *keys, struct btree *b)
{
- BUG_ON(bch2_keylist_u64s(&as->old_keys) + k->k.u64s >
+ struct bkey_i *k = &b->key;
+
+ BUG_ON(bch2_keylist_u64s(keys) + k->k.u64s >
ARRAY_SIZE(as->_old_keys));
- bch2_keylist_add(&as->old_keys, k);
-}
-static void btree_update_will_add_key(struct btree_update *as,
- struct bkey_i *k)
-{
- BUG_ON(bch2_keylist_u64s(&as->new_keys) + k->k.u64s >
- ARRAY_SIZE(as->_new_keys));
- bch2_keylist_add(&as->new_keys, k);
+ bkey_copy(keys->top, k);
+ bkey_i_to_btree_ptr_v2(keys->top)->v.mem_ptr = b->c.level + 1;
+
+ bch2_keylist_push(keys);
}
/*
struct bkey_i *k;
int ret;
- trans->extra_journal_entries = (void *) &as->journal_entries[0];
- trans->extra_journal_entry_u64s = as->journal_u64s;
+ ret = darray_make_room(&trans->extra_journal_entries, as->journal_u64s);
+ if (ret)
+ return ret;
+
+ memcpy(&darray_top(trans->extra_journal_entries),
+ as->journal_entries,
+ as->journal_u64s * sizeof(u64));
+ trans->extra_journal_entries.nr += as->journal_u64s;
+
trans->journal_pin = &as->journal;
- for_each_keylist_key(&as->new_keys, k) {
- ret = bch2_trans_mark_new(trans, k, 0);
+ for_each_keylist_key(&as->old_keys, k) {
+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
+
+ ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0);
if (ret)
return ret;
}
- for_each_keylist_key(&as->old_keys, k) {
- ret = bch2_trans_mark_old(trans, bkey_i_to_s_c(k), 0);
+ for_each_keylist_key(&as->new_keys, k) {
+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
+
+ ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0);
if (ret)
return ret;
}
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM|
- BTREE_INSERT_JOURNAL_RESERVED,
+ JOURNAL_WATERMARK_reserved,
btree_update_nodes_written_trans(&trans, as));
bch2_trans_exit(&trans);
if (!ret) {
i->journal_seq = cpu_to_le64(
- max(journal_seq,
- le64_to_cpu(i->journal_seq)));
+ max(journal_seq,
+ le64_to_cpu(i->journal_seq)));
bch2_btree_add_journal_pin(c, b, journal_seq);
} else {
mutex_unlock(&c->btree_interior_update_lock);
- btree_update_will_add_key(as, &b->key);
+ btree_update_add_key(as, &as->new_keys, b);
}
/*
* btree_updates to point to this btree_update:
*/
static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
- struct btree *b)
+ struct btree *b)
{
struct bch_fs *c = as->c;
struct btree_update *p, *n;
*/
btree_update_drop_new_node(c, b);
- btree_update_will_delete_key(as, &b->key);
+ btree_update_add_key(as, &as->old_keys, b);
as->old_nodes[as->nr_old_nodes] = b;
as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
- unsigned level, unsigned nr_nodes, unsigned flags)
+ unsigned level, bool split, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
u64 start_time = local_clock();
int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
? BCH_DISK_RESERVATION_NOFAIL : 0;
- int journal_flags = 0;
+ unsigned nr_nodes[2] = { 0, 0 };
+ unsigned update_level = level;
+ int journal_flags = flags & JOURNAL_WATERMARK_MASK;
int ret = 0;
BUG_ON(!path->should_be_locked);
- if (flags & BTREE_INSERT_JOURNAL_RESERVED)
- journal_flags |= JOURNAL_RES_GET_RESERVED;
if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
journal_flags |= JOURNAL_RES_GET_NONBLOCK;
- /*
- * XXX: figure out how far we might need to split,
- * instead of locking/reserving all the way to the root:
- */
+ while (1) {
+ nr_nodes[!!update_level] += 1 + split;
+ update_level++;
+
+ if (!btree_path_node(path, update_level))
+ break;
+
+ /*
+ * XXX: figure out how far we might need to split,
+ * instead of locking/reserving all the way to the root:
+ */
+ split = update_level + 1 < BTREE_MAX_DEPTH;
+ }
+
+ /* Might have to allocate a new root: */
+ if (update_level < BTREE_MAX_DEPTH)
+ nr_nodes[1] += 1;
+
if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) {
trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_,
path->btree_id, &path->pos);
}
ret = bch2_disk_reservation_get(c, &as->disk_res,
- nr_nodes * btree_sectors(c),
+ (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
c->opts.metadata_replicas,
disk_res_flags);
if (ret)
list_del_init(&b->list);
mutex_unlock(&c->btree_cache.lock);
- if (b->c.level)
- six_lock_pcpu_alloc(&b->c.lock);
- else
- six_lock_pcpu_free(&b->c.lock);
-
mutex_lock(&c->btree_root_lock);
BUG_ON(btree_node_root(c, b) &&
(b->c.level < btree_node_root(c, b)->c.level ||
{
struct bch_fs *c = as->c;
struct bkey_packed *k;
- const char *invalid;
+ struct printbuf buf = PRINTBUF;
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
!btree_ptr_sectors_written(insert));
if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
- invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
- bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
- if (invalid) {
- struct printbuf buf = PRINTBUF;
-
+ if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
+ btree_node_type(b), WRITE, &buf) ?:
+ bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf)) {
+ printbuf_reset(&buf);
+ pr_buf(&buf, "inserting invalid bkey\n ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
- bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf.buf, invalid);
- printbuf_exit(&buf);
+ pr_buf(&buf, "\n ");
+ bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
+ btree_node_type(b), WRITE, &buf);
+ bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf);
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
dump_stack();
}
bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
set_btree_node_dirty_acct(c, b);
set_btree_node_need_write(b);
+
+ printbuf_exit(&buf);
}
static void
struct bpos n1_pos;
n2 = bch2_btree_node_alloc(as, n1->c.level);
- bch2_btree_update_add_new_node(as, n2);
n2->data->max_key = n1->data->max_key;
n2->data->format = n1->format;
SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
n2->key.k.p = n1->key.k.p;
+ bch2_btree_update_add_new_node(as, n2);
+
set1 = btree_bset_first(n1);
set2 = btree_bset_first(n2);
bch2_btree_interior_update_will_free_node(as, b);
n1 = bch2_btree_node_alloc_replacement(as, b);
- bch2_btree_update_add_new_node(as, n1);
if (keys)
btree_split_insert_keys(as, trans, path, n1, keys);
six_unlock_write(&n2->c.lock);
six_unlock_write(&n1->c.lock);
+ bch2_btree_update_add_new_node(as, n1);
+
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
bch2_btree_build_aux_trees(n1);
six_unlock_write(&n1->c.lock);
+ bch2_btree_update_add_new_node(as, n1);
+
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
if (parent)
struct btree_path *path,
unsigned flags)
{
- struct bch_fs *c = trans->c;
struct btree *b = path_l(path)->b;
struct btree_update *as;
unsigned l;
int ret = 0;
as = bch2_btree_update_start(trans, path, path->level,
- btree_update_reserve_required(c, b), flags);
+ true, flags);
if (IS_ERR(as))
return PTR_ERR(as);
goto out;
parent = btree_node_parent(path, b);
- as = bch2_btree_update_start(trans, path, level,
- btree_update_reserve_required(c, parent) + 1,
- flags|
+ as = bch2_btree_update_start(trans, path, level, false,
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE);
+ BTREE_INSERT_USE_RESERVE|
+ flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto err;
bch2_btree_interior_update_will_free_node(as, m);
n = bch2_btree_node_alloc(as, b->c.level);
- bch2_btree_update_add_new_node(as, n);
SET_BTREE_NODE_SEQ(n->data,
max(BTREE_NODE_SEQ(b->data),
btree_set_min(n, prev->data->min_key);
btree_set_max(n, next->data->max_key);
- n->data->format = new_f;
+ bch2_btree_update_add_new_node(as, n);
+
+ n->data->format = new_f;
btree_node_set_format(n, new_f);
bch2_btree_sort_into(c, n, prev);
parent = btree_node_parent(iter->path, b);
as = bch2_btree_update_start(trans, iter->path, b->c.level,
- (parent
- ? btree_update_reserve_required(c, parent)
- : 0) + 1,
- flags);
+ false, flags);
ret = PTR_ERR_OR_ZERO(as);
- if (ret) {
- trace_btree_gc_rewrite_node_fail(c, b);
+ if (ret)
goto out;
- }
bch2_btree_interior_update_will_free_node(as, b);
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock);
- trace_btree_gc_rewrite_node(c, b);
+ trace_btree_rewrite(c, b);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
struct bch_fs *c = trans->c;
struct btree_iter iter2 = { NULL };
struct btree *parent;
- u64 journal_entries[BKEY_BTREE_PTR_U64s_MAX];
int ret;
if (!skip_triggers) {
- ret = bch2_trans_mark_new(trans, new_key, 0);
+ ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1,
+ bkey_i_to_s_c(&b->key), 0);
if (ret)
return ret;
- ret = bch2_trans_mark_old(trans, bkey_i_to_s_c(&b->key), 0);
+ ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1,
+ new_key, 0);
if (ret)
return ret;
}
btree_node_unlock(iter2.path, iter2.path->level);
path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP;
iter2.path->level++;
+ btree_path_set_dirty(iter2.path, BTREE_ITER_NEED_TRAVERSE);
bch2_btree_path_check_sort(trans, iter2.path, 0);
} else {
BUG_ON(btree_node_root(c, b) != b);
- trans->extra_journal_entries = (void *) &journal_entries[0];
- trans->extra_journal_entry_u64s =
- journal_entry_set((void *) &journal_entries[0],
- BCH_JSET_ENTRY_btree_root,
- b->c.btree_id, b->c.level,
- new_key, new_key->k.u64s);
+ ret = darray_make_room(&trans->extra_journal_entries,
+ jset_u64s(new_key->k.u64s));
+ if (ret)
+ return ret;
+
+ journal_entry_set((void *) &darray_top(trans->extra_journal_entries),
+ BCH_JSET_ENTRY_btree_root,
+ b->c.btree_id, b->c.level,
+ new_key, new_key->k.u64s);
+ trans->extra_journal_entries.nr += jset_u64s(new_key->k.u64s);
}
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_JOURNAL_RECLAIM|
- BTREE_INSERT_JOURNAL_RESERVED);
+ JOURNAL_WATERMARK_reserved);
if (ret)
goto err;
return -EINTR;
}
- new_hash = bch2_btree_node_mem_alloc(c);
+ new_hash = bch2_btree_node_mem_alloc(c, false);
}
path->intent_ref++;
closure_sync(&cl);
} while (ret);
- b = bch2_btree_node_mem_alloc(c);
+ b = bch2_btree_node_mem_alloc(c, false);
bch2_btree_cache_cannibalize_unlock(c);
set_btree_node_fake(b);
mutex_unlock(&c->btree_interior_update_lock);
}
-size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
+static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
{
- size_t ret = 0;
- struct list_head *i;
+ bool ret;
mutex_lock(&c->btree_interior_update_lock);
- list_for_each(i, &c->btree_interior_update_list)
- ret++;
+ ret = !list_empty(&c->btree_interior_update_list);
mutex_unlock(&c->btree_interior_update_lock);
return ret;
}
+bool bch2_btree_interior_updates_flush(struct bch_fs *c)
+{
+ bool ret = bch2_btree_interior_updates_pending(c);
+
+ if (ret)
+ closure_wait_event(&c->btree_interior_update_wait,
+ !bch2_btree_interior_updates_pending(c));
+ return ret;
+}
+
void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
{
struct btree_root *r;