X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_update_interior.c;h=66da1da2f0757eaa14b2417d7b305813bb32b77a;hb=17d1c4f4fe29453fbb4087eb5de8aa6f9e6605c7;hp=a40a0f5ca4bdb5d632eb1841fc3d17016042f685;hpb=284c1f5148afb088e64c8a77983a43732c4d499b;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index a40a0f5..66da1da 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -11,6 +11,7 @@ #include "btree_iter.h" #include "btree_locking.h" #include "buckets.h" +#include "clock.h" #include "error.h" #include "extents.h" #include "journal.h" @@ -19,9 +20,9 @@ #include "recovery.h" #include "replicas.h" #include "super-io.h" +#include "trace.h" #include -#include static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, struct btree_path *, struct btree *, @@ -242,7 +243,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct bch_fs *c = trans->c; struct write_point *wp; struct btree *b; - __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; + BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; struct open_buckets ob = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; unsigned nr_reserve; @@ -300,7 +301,7 @@ retry: bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); mem_alloc: - b = bch2_btree_node_mem_alloc(c, interior_node); + b = bch2_btree_node_mem_alloc(trans, interior_node); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -363,6 +364,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, BUG_ON(ret); trace_and_count(c, btree_node_alloc, c, b); + bch2_increment_clock(c, btree_sectors(c), WRITE); return b; } @@ -686,7 +688,8 @@ err: bch2_trans_unlock(&trans); btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent); mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent); - bch2_btree_path_level_init(&trans, path, b); + path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock); + path->l[b->c.level].b = b; bch2_btree_node_lock_write_nofail(&trans, path, &b->c); @@ -1080,16 +1083,14 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (flags & BTREE_INSERT_GC_LOCK_HELD) lockdep_assert_held(&c->gc_lock); else if (!down_read_trylock(&c->gc_lock)) { - bch2_trans_unlock(trans); - down_read(&c->gc_lock); - ret = bch2_trans_relock(trans); + ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0)); if (ret) { up_read(&c->gc_lock); return ERR_PTR(ret); } } - as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO); + as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS); memset(as, 0, sizeof(*as)); closure_init(&as->cl, NULL); as->c = c; @@ -1125,23 +1126,19 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, BTREE_UPDATE_JOURNAL_RES, journal_flags|JOURNAL_RES_GET_NONBLOCK); if (ret) { - bch2_trans_unlock(trans); - if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { ret = -BCH_ERR_journal_reclaim_would_deadlock; goto err; } - ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, + ret = drop_locks_do(trans, + bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, - journal_flags); - if (ret) { + journal_flags)); + if (ret == -BCH_ERR_journal_preres_get_blocked) { trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); - goto err; } - - ret = bch2_trans_relock(trans); if (ret) goto err; } @@ -1412,7 +1409,7 @@ static void __btree_split_node(struct btree_update *as, out[i]->needs_whiteout = false; btree_keys_account_key_add(&n[i]->nr, 0, out[i]); - out[i] = bkey_next(out[i]); + out[i] = bkey_p_next(out[i]); } for (i = 0; i < 2; i++) { @@ -1677,7 +1674,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - if (!(local_clock() & 63)) + if ((local_clock() & 63) == 63) return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); ret = bch2_btree_node_lock_write(trans, path, &b->c); @@ -1716,8 +1713,10 @@ split: * We could attempt to avoid the transaction restart, by calling * bch2_btree_path_upgrade() and allocating more nodes: */ - if (b->c.level >= as->update_level) + if (b->c.level >= as->update_level) { + trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b); return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); + } return btree_split(as, trans, path, b, keys, flags); } @@ -1997,6 +1996,7 @@ err: struct async_btree_rewrite { struct bch_fs *c; struct work_struct work; + struct list_head list; enum btree_id btree_id; unsigned level; struct bpos pos; @@ -2056,15 +2056,10 @@ void async_btree_node_rewrite_work(struct work_struct *work) void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) { struct async_btree_rewrite *a; - - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { - bch_err(c, "%s: error getting c->writes ref", __func__); - return; - } + int ret; a = kmalloc(sizeof(*a), GFP_NOFS); if (!a) { - bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); bch_err(c, "%s: error allocating memory", __func__); return; } @@ -2074,11 +2069,63 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) a->level = b->c.level; a->pos = b->key.k.p; a->seq = b->data->keys.seq; - INIT_WORK(&a->work, async_btree_node_rewrite_work); + + if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) { + mutex_lock(&c->pending_node_rewrites_lock); + list_add(&a->list, &c->pending_node_rewrites); + mutex_unlock(&c->pending_node_rewrites_lock); + return; + } + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { + if (test_bit(BCH_FS_STARTED, &c->flags)) { + bch_err(c, "%s: error getting c->writes ref", __func__); + kfree(a); + return; + } + + ret = bch2_fs_read_write_early(c); + if (ret) { + bch_err(c, "%s: error going read-write: %s", + __func__, bch2_err_str(ret)); + kfree(a); + return; + } + + bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); + } + queue_work(c->btree_interior_update_worker, &a->work); } +void bch2_do_pending_node_rewrites(struct bch_fs *c) +{ + struct async_btree_rewrite *a, *n; + + mutex_lock(&c->pending_node_rewrites_lock); + list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { + list_del(&a->list); + + bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); + queue_work(c->btree_interior_update_worker, &a->work); + } + mutex_unlock(&c->pending_node_rewrites_lock); +} + +void bch2_free_pending_node_rewrites(struct bch_fs *c) +{ + struct async_btree_rewrite *a, *n; + + mutex_lock(&c->pending_node_rewrites_lock); + list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { + list_del(&a->list); + + kfree(a); + } + mutex_unlock(&c->pending_node_rewrites_lock); +} + static int __bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *iter, struct btree *b, struct btree *new_hash, @@ -2203,14 +2250,12 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite if (btree_ptr_hash_val(new_key) != b->hash_val) { ret = bch2_btree_cache_cannibalize_lock(c, &cl); if (ret) { - bch2_trans_unlock(trans); - closure_sync(&cl); - ret = bch2_trans_relock(trans); + ret = drop_locks_do(trans, (closure_sync(&cl), 0)); if (ret) return ret; } - new_hash = bch2_btree_node_mem_alloc(c, false); + new_hash = bch2_btree_node_mem_alloc(trans, false); } path->intent_ref++; @@ -2273,8 +2318,9 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b) bch2_btree_set_root_inmem(c, b); } -void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) +static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) { + struct bch_fs *c = trans->c; struct closure cl; struct btree *b; int ret; @@ -2286,7 +2332,7 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) closure_sync(&cl); } while (ret); - b = bch2_btree_node_mem_alloc(c, false); + b = bch2_btree_node_mem_alloc(trans, false); bch2_btree_cache_cannibalize_unlock(c); set_btree_node_fake(b); @@ -2315,6 +2361,12 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); + return 0; +} + +void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) +{ + bch2_trans_run(c, __bch2_btree_root_alloc(&trans, id)); } void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) @@ -2353,20 +2405,15 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *c) return ret; } -void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset) +void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry) { - struct btree_root *r; - struct jset_entry *entry; + struct btree_root *r = &c->btree_roots[entry->btree_id]; mutex_lock(&c->btree_root_lock); - vstruct_for_each(jset, entry) - if (entry->type == BCH_JSET_ENTRY_btree_root) { - r = &c->btree_roots[entry->btree_id]; - r->level = entry->level; - r->alive = true; - bkey_copy(&r->key, &entry->start[0]); - } + r->level = entry->level; + r->alive = true; + bkey_copy(&r->key, &entry->start[0]); mutex_unlock(&c->btree_root_lock); } @@ -2392,7 +2439,7 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, BCH_JSET_ENTRY_btree_root, i, c->btree_roots[i].level, &c->btree_roots[i].key, - c->btree_roots[i].key.u64s); + c->btree_roots[i].key.k.u64s); end = vstruct_next(end); } @@ -2416,11 +2463,17 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c) mutex_init(&c->btree_interior_update_lock); INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); + INIT_LIST_HEAD(&c->pending_node_rewrites); + mutex_init(&c->pending_node_rewrites_lock); + c->btree_interior_update_worker = alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1); if (!c->btree_interior_update_worker) - return -ENOMEM; + return -BCH_ERR_ENOMEM_btree_interior_update_worker_init; + + if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, + sizeof(struct btree_update))) + return -BCH_ERR_ENOMEM_btree_interior_update_pool_init; - return mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, - sizeof(struct btree_update)); + return 0; }