+ if (inode->k.p.snapshot != iter.snapshot) {
+ inode->k.p.snapshot = iter.snapshot;
+ inode_update_flags = 0;
+ }
+
+ ret = bch2_trans_update(trans, &iter, &inode->k_i,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ inode_update_flags);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_extent_update(struct btree_trans *trans,
+ subvol_inum inum,
+ struct btree_iter *iter,
+ struct bkey_i *k,
+ struct disk_reservation *disk_res,
+ u64 new_i_size,
+ s64 *i_sectors_delta_total,
+ bool check_enospc)
+{
+ struct bpos next_pos;
+ bool usage_increasing;
+ s64 i_sectors_delta = 0, disk_sectors_delta = 0;
+ int ret;
+
+ /*
+ * This traverses us the iterator without changing iter->path->pos to
+ * search_key() (which is pos + 1 for extents): we want there to be a
+ * path already traversed at iter->pos because
+ * bch2_trans_extent_update() will use it to attempt extent merging
+ */
+ ret = __bch2_btree_iter_traverse(iter);
+ if (ret)
+ return ret;
+
+ ret = bch2_extent_trim_atomic(trans, iter, k);
+ if (ret)
+ return ret;
+
+ next_pos = k->k.p;
+
+ ret = bch2_sum_sector_overwrites(trans, iter, k,
+ &usage_increasing,
+ &i_sectors_delta,
+ &disk_sectors_delta);
+ if (ret)
+ return ret;
+
+ if (disk_res &&
+ disk_sectors_delta > (s64) disk_res->sectors) {
+ ret = bch2_disk_reservation_add(trans->c, disk_res,
+ disk_sectors_delta - disk_res->sectors,
+ !check_enospc || !usage_increasing
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Note:
+ * We always have to do an inode update - even when i_size/i_sectors
+ * aren't changing - for fsync to work properly; fsync relies on
+ * inode->bi_journal_seq which is updated by the trigger code:
+ */
+ ret = bch2_extent_update_i_size_sectors(trans, iter,
+ min(k->k.p.offset << 9, new_i_size),
+ i_sectors_delta) ?:
+ bch2_trans_update(trans, iter, k, 0) ?:
+ bch2_trans_commit(trans, disk_res, NULL,
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_NOFAIL);
+ if (unlikely(ret))
+ return ret;
+
+ if (i_sectors_delta_total)
+ *i_sectors_delta_total += i_sectors_delta;
+ bch2_btree_iter_set_pos(iter, next_pos);
+ return 0;
+}
+
+/* Overwrites whatever was present with zeroes: */
+int bch2_extent_fallocate(struct btree_trans *trans,
+ subvol_inum inum,
+ struct btree_iter *iter,
+ unsigned sectors,
+ struct bch_io_opts opts,
+ s64 *i_sectors_delta,
+ struct write_point_specifier write_point)
+{
+ struct bch_fs *c = trans->c;
+ struct disk_reservation disk_res = { 0 };
+ struct closure cl;
+ struct open_buckets open_buckets = { 0 };
+ struct bkey_s_c k;
+ struct bkey_buf old, new;
+ unsigned sectors_allocated = 0;
+ bool have_reservation = false;
+ bool unwritten = opts.nocow &&
+ c->sb.version >= bcachefs_metadata_version_unwritten_extents;
+ int ret;
+
+ bch2_bkey_buf_init(&old);
+ bch2_bkey_buf_init(&new);
+ closure_init_stack(&cl);
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset);
+
+ if (!have_reservation) {
+ unsigned new_replicas =
+ max(0, (int) opts.data_replicas -
+ (int) bch2_bkey_nr_ptrs_fully_allocated(k));
+ /*
+ * Get a disk reservation before (in the nocow case) calling
+ * into the allocator:
+ */
+ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
+ if (unlikely(ret))
+ goto err;
+
+ bch2_bkey_buf_reassemble(&old, c, k);
+ }
+
+ if (have_reservation) {
+ if (!bch2_extents_match(k, bkey_i_to_s_c(old.k)))
+ goto err;
+
+ bch2_key_resize(&new.k->k, sectors);
+ } else if (!unwritten) {
+ struct bkey_i_reservation *reservation;
+
+ bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64));
+ reservation = bkey_reservation_init(new.k);
+ reservation->k.p = iter->pos;
+ bch2_key_resize(&reservation->k, sectors);
+ reservation->v.nr_replicas = opts.data_replicas;
+ } else {
+ struct bkey_i_extent *e;
+ struct bch_devs_list devs_have;
+ struct write_point *wp;
+ struct bch_extent_ptr *ptr;
+
+ devs_have.nr = 0;
+
+ bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX);
+
+ e = bkey_extent_init(new.k);
+ e->k.p = iter->pos;
+
+ ret = bch2_alloc_sectors_start_trans(trans,
+ opts.foreground_target,
+ false,
+ write_point,
+ &devs_have,
+ opts.data_replicas,
+ opts.data_replicas,
+ BCH_WATERMARK_normal, 0, &cl, &wp);
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ ret = -BCH_ERR_transaction_restart_nested;
+ if (ret)
+ goto err;
+
+ sectors = min(sectors, wp->sectors_free);
+ sectors_allocated = sectors;
+
+ bch2_key_resize(&e->k, sectors);
+
+ bch2_open_bucket_get(c, wp, &open_buckets);
+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
+ bch2_alloc_sectors_done(c, wp);
+
+ extent_for_each_ptr(extent_i_to_s(e), ptr)
+ ptr->unwritten = true;
+ }
+
+ have_reservation = true;
+
+ ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
+ 0, i_sectors_delta, true);
+err:
+ if (!ret && sectors_allocated)
+ bch2_increment_clock(c, sectors_allocated, WRITE);
+
+ bch2_open_buckets_put(c, &open_buckets);
+ bch2_disk_reservation_put(c, &disk_res);
+ bch2_bkey_buf_exit(&new, c);
+ bch2_bkey_buf_exit(&old, c);
+
+ if (closure_nr_remaining(&cl) != 1) {
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ }
+
+ return ret;
+}
+
+/*
+ * Returns -BCH_ERR_transacton_restart if we had to drop locks:
+ */
+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
+ subvol_inum inum, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
+ struct bpos end_pos = POS(inum.inum, end);
+ struct bkey_s_c k;
+ int ret = 0, ret2 = 0;
+ u32 snapshot;
+
+ while (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(c, 0);
+ struct bkey_i delete;
+
+ if (ret)
+ ret2 = ret;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ continue;
+
+ bch2_btree_iter_set_snapshot(iter, snapshot);
+
+ /*
+ * peek_upto() doesn't have ideal semantics for extents:
+ */
+ k = bch2_btree_iter_peek_upto(iter, end_pos);
+ if (!k.k)
+ break;
+
+ ret = bkey_err(k);
+ if (ret)
+ continue;
+
+ bkey_init(&delete.k);
+ delete.k.p = iter->pos;
+
+ /* create the biggest key we can */
+ bch2_key_resize(&delete.k, max_sectors);
+ bch2_cut_back(end_pos, &delete);
+
+ ret = bch2_extent_update(trans, inum, iter, &delete,
+ &disk_res, 0, i_sectors_delta, false);
+ bch2_disk_reservation_put(c, &disk_res);
+ }
+
+ return ret ?: ret2;
+}
+
+int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ POS(inum.inum, start),
+ BTREE_ITER_INTENT);
+
+ ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta);
+
+ bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_exit(&trans);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+
+ return ret;
+}
+
+static int bch2_write_index_default(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct bkey_buf sk;
+ struct keylist *keys = &op->insert_keys;
+ struct bkey_i *k = bch2_keylist_front(keys);
+ struct btree_trans trans;
+ struct btree_iter iter;
+ subvol_inum inum = {
+ .subvol = op->subvol,
+ .inum = k->k.p.inode,
+ };
+ int ret;
+
+ BUG_ON(!inum.subvol);
+
+ bch2_bkey_buf_init(&sk);
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+
+ do {
+ bch2_trans_begin(&trans);
+
+ k = bch2_keylist_front(keys);
+ bch2_bkey_buf_copy(&sk, c, k);
+
+ ret = bch2_subvolume_get_snapshot(&trans, inum.subvol,
+ &sk.k->k.p.snapshot);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ bkey_start_pos(&sk.k->k),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ ret = bch2_extent_update(&trans, inum, &iter, sk.k,
+ &op->res,
+ op->new_i_size, &op->i_sectors_delta,
+ op->flags & BCH_WRITE_CHECK_ENOSPC);
+ bch2_trans_iter_exit(&trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+
+ if (bkey_ge(iter.pos, k->k.p))
+ bch2_keylist_pop_front(&op->insert_keys);
+ else
+ bch2_cut_front(iter.pos, k);
+ } while (!bch2_keylist_empty(keys));
+
+ bch2_trans_exit(&trans);
+ bch2_bkey_buf_exit(&sk, c);
+
+ return ret;