+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_ptrs_c ptrs;
+ const struct bch_extent_ptr *ptr;
+ struct {
+ struct bpos b;
+ unsigned gen;
+ struct nocow_lock_bucket *l;
+ } buckets[BCH_REPLICAS_MAX];
+ unsigned nr_buckets = 0;
+ u32 snapshot;
+ int ret, i;
+
+ if (op->flags & BCH_WRITE_MOVE)
+ return;
+
+ bch2_trans_init(&trans, c, 0, 0);
+retry:
+ bch2_trans_begin(&trans);
+
+ ret = bch2_subvolume_get_snapshot(&trans, op->subvol, &snapshot);
+ if (unlikely(ret))
+ goto err;
+
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ SPOS(op->pos.inode, op->pos.offset, snapshot),
+ BTREE_ITER_SLOTS);
+ while (1) {
+ struct bio *bio = &op->wbio.bio;
+
+ nr_buckets = 0;
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ break;
+
+ /* fall back to normal cow write path? */
+ if (unlikely(k.k->p.snapshot != snapshot ||
+ !bch2_extent_is_writeable(op, k)))
+ break;
+
+ if (bch2_keylist_realloc(&op->insert_keys,
+ op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
+ k.k->u64s))
+ break;
+
+ /* Get iorefs before dropping btree locks: */
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr) {
+ buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
+ buckets[nr_buckets].gen = ptr->gen;
+ buckets[nr_buckets].l =
+ bucket_nocow_lock(&c->nocow_locks,
+ bucket_to_u64(buckets[nr_buckets].b));
+
+ prefetch(buckets[nr_buckets].l);
+
+ if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
+ goto err_get_ioref;
+
+ nr_buckets++;
+
+ if (ptr->unwritten)
+ op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
+ }
+
+ /* Unlock before taking nocow locks, doing IO: */
+ bkey_reassemble(op->insert_keys.top, k);
+ bch2_trans_unlock(&trans);
+
+ bch2_cut_front(op->pos, op->insert_keys.top);
+ if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
+ bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
+
+ for (i = 0; i < nr_buckets; i++) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
+ struct nocow_lock_bucket *l = buckets[i].l;
+ bool stale;
+
+ __bch2_bucket_nocow_lock(&c->nocow_locks, l,
+ bucket_to_u64(buckets[i].b),
+ BUCKET_NOCOW_LOCK_UPDATE);
+
+ rcu_read_lock();
+ stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
+ rcu_read_unlock();
+
+ if (unlikely(stale))
+ goto err_bucket_stale;
+ }
+
+ bio = &op->wbio.bio;
+ if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) {
+ bio = bio_split(bio, k.k->p.offset - op->pos.offset,
+ GFP_KERNEL, &c->bio_write);
+ wbio_init(bio)->put_bio = true;
+ bio->bi_opf = op->wbio.bio.bi_opf;
+ } else {
+ op->flags |= BCH_WRITE_DONE;
+ }
+
+ op->pos.offset += bio_sectors(bio);
+ op->written += bio_sectors(bio);
+
+ bio->bi_end_io = bch2_write_endio;
+ bio->bi_private = &op->cl;
+ bio->bi_opf |= REQ_OP_WRITE;
+ closure_get(&op->cl);
+ bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
+ op->insert_keys.top, true);
+
+ bch2_keylist_push(&op->insert_keys);
+ if (op->flags & BCH_WRITE_DONE)
+ break;
+ bch2_btree_iter_advance(&iter);
+ }
+out:
+ bch2_trans_iter_exit(&trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ if (ret) {
+ bch_err_inum_offset_ratelimited(c,
+ op->pos.inode,
+ op->pos.offset << 9,
+ "%s: btree lookup error %s",
+ __func__, bch2_err_str(ret));
+ op->error = ret;
+ op->flags |= BCH_WRITE_DONE;
+ }
+
+ bch2_trans_exit(&trans);
+
+ /* fallback to cow write path? */
+ if (!(op->flags & BCH_WRITE_DONE)) {
+ closure_sync(&op->cl);
+ __bch2_nocow_write_done(op);
+ op->insert_keys.top = op->insert_keys.keys;
+ } else if (op->flags & BCH_WRITE_SYNC) {
+ closure_sync(&op->cl);
+ bch2_nocow_write_done(&op->cl);
+ } else {
+ /*
+ * XXX
+ * needs to run out of process context because ei_quota_lock is
+ * a mutex
+ */
+ continue_at(&op->cl, bch2_nocow_write_done, index_update_wq(op));
+ }
+ return;
+err_get_ioref:
+ for (i = 0; i < nr_buckets; i++)
+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
+
+ /* Fall back to COW path: */
+ goto out;
+err_bucket_stale:
+ while (--i >= 0)
+ bch2_bucket_nocow_unlock(&c->nocow_locks,
+ buckets[i].b,
+ BUCKET_NOCOW_LOCK_UPDATE);
+ for (i = 0; i < nr_buckets; i++)
+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
+
+ /* We can retry this: */
+ ret = -BCH_ERR_transaction_restart;
+ goto out;
+}
+
+static void __bch2_write(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct write_point *wp = NULL;
+ struct bio *bio = NULL;