- BUG_ON(c->btree_roots[b->btree_id].as != as);
- c->btree_roots[b->btree_id].as = NULL;
-
- bch2_btree_set_root_ondisk(c, b);
-
- /*
- * We don't have to wait anything anything here (before
- * btree_interior_update_nodes_reachable frees the old nodes
- * ondisk) - we've ensured that the very next journal write will
- * have the pointer to the new root, and before the allocator
- * can reuse the old nodes it'll have to do a journal commit:
- */
- six_unlock_read(&b->lock);
- }
- mutex_unlock(&c->btree_interior_update_lock);
-
- continue_at(cl, btree_interior_update_nodes_reachable, system_wq);
-}
-
-/*
- * We're updating @b with pointers to nodes that haven't finished writing yet:
- * block @b from being written until @as completes
- */
-static void btree_interior_update_updated_btree(struct bch_fs *c,
- struct btree_interior_update *as,
- struct btree *b)
-{
- mutex_lock(&c->btree_interior_update_lock);
-
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
- BUG_ON(!btree_node_dirty(b));
-
- as->mode = BTREE_INTERIOR_UPDATING_NODE;
- as->b = b;
- list_add(&as->write_blocked_list, &b->write_blocked);
-
- mutex_unlock(&c->btree_interior_update_lock);
-
- bch2_journal_wait_on_seq(&c->journal, as->journal_seq, &as->cl);
-
- continue_at(&as->cl, btree_interior_update_nodes_written,
- system_freezable_wq);
-}
-
-static void btree_interior_update_updated_root(struct bch_fs *c,
- struct btree_interior_update *as,
- enum btree_id btree_id)
-{
- struct btree_root *r = &c->btree_roots[btree_id];
-
- mutex_lock(&c->btree_interior_update_lock);
-
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-
- /*
- * Old root might not be persistent yet - if so, redirect its
- * btree_interior_update operation to point to us:
- */
- if (r->as) {
- BUG_ON(r->as->mode != BTREE_INTERIOR_UPDATING_ROOT);
-
- r->as->b = NULL;
- r->as->mode = BTREE_INTERIOR_UPDATING_AS;
- r->as->parent_as = as;
- closure_get(&as->cl);
- }
-
- as->mode = BTREE_INTERIOR_UPDATING_ROOT;
- as->b = r->b;
- r->as = as;
-
- mutex_unlock(&c->btree_interior_update_lock);
-
- bch2_journal_wait_on_seq(&c->journal, as->journal_seq, &as->cl);
-
- continue_at(&as->cl, btree_interior_update_nodes_written,
- system_freezable_wq);
-}
-
-static void interior_update_flush(struct journal *j, struct journal_entry_pin *pin)
-{
- struct btree_interior_update *as =
- container_of(pin, struct btree_interior_update, journal);
-
- bch2_journal_flush_seq_async(j, as->journal_seq, NULL);
-}
-
-/*
- * @b is being split/rewritten: it may have pointers to not-yet-written btree
- * nodes and thus outstanding btree_interior_updates - redirect @b's
- * btree_interior_updates to point to this btree_interior_update:
- */
-void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
- struct btree_interior_update *as,
- struct btree *b)
-{
- struct btree_interior_update *p, *n;
- struct pending_btree_node_free *d;
- struct bset_tree *t;
-
- /*
- * Does this node have data that hasn't been written in the journal?
- *
- * If so, we have to wait for the corresponding journal entry to be
- * written before making the new nodes reachable - we can't just carry
- * over the bset->journal_seq tracking, since we'll be mixing those keys
- * in with keys that aren't in the journal anymore:
- */
- for_each_bset(b, t)
- as->journal_seq = max(as->journal_seq, bset(b, t)->journal_seq);
-
- /*
- * Does this node have unwritten data that has a pin on the journal?
- *
- * If so, transfer that pin to the btree_interior_update operation -
- * note that if we're freeing multiple nodes, we only need to keep the
- * oldest pin of any of the nodes we're freeing. We'll release the pin
- * when the new nodes are persistent and reachable on disk:
- */
- bch2_journal_pin_add_if_older(&c->journal,
- &b->writes[0].journal,
- &as->journal, interior_update_flush);
- bch2_journal_pin_add_if_older(&c->journal,
- &b->writes[1].journal,
- &as->journal, interior_update_flush);
-
- mutex_lock(&c->btree_interior_update_lock);
-
- /*
- * Does this node have any btree_interior_update operations preventing
- * it from being written?
- *
- * If so, redirect them to point to this btree_interior_update: we can
- * write out our new nodes, but we won't make them visible until those
- * operations complete
- */
- list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
- BUG_ON(p->mode != BTREE_INTERIOR_UPDATING_NODE);
-
- p->mode = BTREE_INTERIOR_UPDATING_AS;
- list_del(&p->write_blocked_list);
- p->b = NULL;
- p->parent_as = as;
- closure_get(&as->cl);
- }
-
- /* Add this node to the list of nodes being freed: */
- BUG_ON(as->nr_pending >= ARRAY_SIZE(as->pending));
-
- d = &as->pending[as->nr_pending++];
- d->index_update_done = false;
- d->seq = b->data->keys.seq;
- d->btree_id = b->btree_id;
- d->level = b->level;
- bkey_copy(&d->key, &b->key);
-
- mutex_unlock(&c->btree_interior_update_lock);
-}
-
-static void btree_node_interior_verify(struct btree *b)
-{
- struct btree_node_iter iter;
- struct bkey_packed *k;
-
- BUG_ON(!b->level);
-
- bch2_btree_node_iter_init(&iter, b, b->key.k.p, false, false);
-#if 1
- BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) ||
- bkey_cmp_left_packed(b, k, &b->key.k.p));
-
- BUG_ON((bch2_btree_node_iter_advance(&iter, b),
- !bch2_btree_node_iter_end(&iter)));
-#else
- const char *msg;
-
- msg = "not found";
- k = bch2_btree_node_iter_peek(&iter, b);
- if (!k)
- goto err;
-
- msg = "isn't what it should be";
- if (bkey_cmp_left_packed(b, k, &b->key.k.p))
- goto err;
-
- bch2_btree_node_iter_advance(&iter, b);
-
- msg = "isn't last key";
- if (!bch2_btree_node_iter_end(&iter))
- goto err;
- return;
-err:
- bch2_dump_btree_node(b);
- printk(KERN_ERR "last key %llu:%llu %s\n", b->key.k.p.inode,
- b->key.k.p.offset, msg);
- BUG();
-#endif
-}
-
-static enum btree_insert_ret
-bch2_btree_insert_keys_interior(struct btree *b,
- struct btree_iter *iter,
- struct keylist *insert_keys,
- struct btree_interior_update *as,
- struct btree_reserve *res)
-{
- struct bch_fs *c = iter->c;
- struct btree_iter *linked;
- struct btree_node_iter node_iter;
- struct bkey_i *insert = bch2_keylist_front(insert_keys);
- struct bkey_packed *k;
-
- BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->level));
- BUG_ON(!b->level);
- BUG_ON(!as || as->b);
- verify_keys_sorted(insert_keys);
-
- btree_node_lock_for_insert(b, iter);
-
- if (bch_keylist_u64s(insert_keys) >
- bch_btree_keys_u64s_remaining(c, b)) {
- bch2_btree_node_unlock_write(b, iter);
- return BTREE_INSERT_BTREE_NODE_FULL;
- }
-
- /* Don't screw up @iter's position: */
- node_iter = iter->node_iters[b->level];
-
- /*
- * btree_split(), btree_gc_coalesce() will insert keys before
- * the iterator's current position - they know the keys go in
- * the node the iterator points to:
- */
- while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
- (bkey_cmp_packed(b, k, &insert->k) >= 0))
- ;
-
- while (!bch2_keylist_empty(insert_keys)) {
- insert = bch2_keylist_front(insert_keys);
-
- bch2_insert_fixup_btree_ptr(iter, b, insert,
- &node_iter, &res->disk_res);
- bch2_keylist_pop_front(insert_keys);
- }
-
- btree_interior_update_updated_btree(c, as, b);
-
- for_each_linked_btree_node(iter, b, linked)
- bch2_btree_node_iter_peek(&linked->node_iters[b->level],
- b);
- bch2_btree_node_iter_peek(&iter->node_iters[b->level], b);
-
- bch2_btree_iter_verify(iter, b);
-
- if (bch2_maybe_compact_whiteouts(c, b))
- bch2_btree_iter_reinit_node(iter, b);
-
- bch2_btree_node_unlock_write(b, iter);
-
- btree_node_interior_verify(b);
- return BTREE_INSERT_OK;
-}
-
-/*
- * Move keys from n1 (original replacement node, now lower node) to n2 (higher
- * node)
- */
-static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n1,
- struct btree_reserve *reserve)
-{
- size_t nr_packed = 0, nr_unpacked = 0;
- struct btree *n2;
- struct bset *set1, *set2;
- struct bkey_packed *k, *prev = NULL;
-
- n2 = bch2_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve);
- n2->data->max_key = n1->data->max_key;
- n2->data->format = n1->format;
- n2->key.k.p = n1->key.k.p;
-
- btree_node_set_format(n2, n2->data->format);
-
- set1 = btree_bset_first(n1);
- set2 = btree_bset_first(n2);
-
- /*
- * Has to be a linear search because we don't have an auxiliary
- * search tree yet
- */
- k = set1->start;
- while (1) {
- if (bkey_next(k) == vstruct_last(set1))
- break;
- if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)