+#define WRITE_BATCH_PAGES 32
+
+static int __bch2_buffered_write(struct bch_inode_info *inode,
+ struct address_space *mapping,
+ struct iov_iter *iter,
+ loff_t pos, unsigned len)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct page *pages[WRITE_BATCH_PAGES];
+ struct bch2_page_reservation res;
+ unsigned long index = pos >> PAGE_SHIFT;
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ unsigned i, reserved = 0, set_dirty = 0;
+ unsigned copied = 0, nr_pages_copied = 0;
+ int ret = 0;
+
+ BUG_ON(!len);
+ BUG_ON(nr_pages > ARRAY_SIZE(pages));
+
+ bch2_page_reservation_init(c, inode, &res);
+
+ for (i = 0; i < nr_pages; i++) {
+ pages[i] = grab_cache_page_write_begin(mapping, index + i, 0);
+ if (!pages[i]) {
+ nr_pages = i;
+ if (!i) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ len = min_t(unsigned, len,
+ nr_pages * PAGE_SIZE - offset);
+ break;
+ }
+ }
+
+ if (offset && !PageUptodate(pages[0])) {
+ ret = bch2_read_single_page(pages[0], mapping);
+ if (ret)
+ goto out;
+ }
+
+ if ((pos + len) & (PAGE_SIZE - 1) &&
+ !PageUptodate(pages[nr_pages - 1])) {
+ if ((index + nr_pages - 1) << PAGE_SHIFT >= inode->v.i_size) {
+ zero_user(pages[nr_pages - 1], 0, PAGE_SIZE);
+ } else {
+ ret = bch2_read_single_page(pages[nr_pages - 1], mapping);
+ if (ret)
+ goto out;
+ }
+ }
+
+ while (reserved < len) {
+ struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
+ unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
+ unsigned pg_len = min_t(unsigned, len - reserved,
+ PAGE_SIZE - pg_offset);
+retry_reservation:
+ ret = bch2_page_reservation_get(c, inode, page, &res,
+ pg_offset, pg_len, true);
+
+ if (ret && !PageUptodate(page)) {
+ ret = bch2_read_single_page(page, mapping);
+ if (!ret)
+ goto retry_reservation;
+ }
+
+ if (ret)
+ goto out;
+
+ reserved += pg_len;
+ }
+
+ if (mapping_writably_mapped(mapping))
+ for (i = 0; i < nr_pages; i++)
+ flush_dcache_page(pages[i]);
+
+ while (copied < len) {
+ struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
+ unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
+ unsigned pg_len = min_t(unsigned, len - copied,
+ PAGE_SIZE - pg_offset);
+ unsigned pg_copied = iov_iter_copy_from_user_atomic(page,
+ iter, pg_offset, pg_len);
+
+ if (!pg_copied)
+ break;
+
+ if (!PageUptodate(page) &&
+ pg_copied != PAGE_SIZE &&
+ pos + copied + pg_copied < inode->v.i_size) {
+ zero_user(page, 0, PAGE_SIZE);
+ break;
+ }
+
+ flush_dcache_page(page);
+ iov_iter_advance(iter, pg_copied);
+ copied += pg_copied;
+
+ if (pg_copied != pg_len)
+ break;
+ }
+
+ if (!copied)
+ goto out;
+
+ spin_lock(&inode->v.i_lock);
+ if (pos + copied > inode->v.i_size)
+ i_size_write(&inode->v, pos + copied);
+ spin_unlock(&inode->v.i_lock);
+
+ while (set_dirty < copied) {
+ struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
+ unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
+ unsigned pg_len = min_t(unsigned, copied - set_dirty,
+ PAGE_SIZE - pg_offset);
+
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+
+ bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
+ unlock_page(page);
+ put_page(page);
+
+ set_dirty += pg_len;
+ }
+
+ nr_pages_copied = DIV_ROUND_UP(offset + copied, PAGE_SIZE);
+ inode->ei_last_dirtied = (unsigned long) current;
+out:
+ for (i = nr_pages_copied; i < nr_pages; i++) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ }
+
+ bch2_page_reservation_put(c, inode, &res);
+
+ return copied ?: ret;
+}
+
+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ loff_t pos = iocb->ki_pos;
+ ssize_t written = 0;
+ int ret = 0;
+
+ bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+
+ do {
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ unsigned bytes = min_t(unsigned long, iov_iter_count(iter),
+ PAGE_SIZE * WRITE_BATCH_PAGES - offset);
+again:
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ *
+ * Not only is this an optimisation, but it is also required
+ * to check that the address is actually valid, when atomic
+ * usercopies are used, below.
+ */
+ if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+ bytes = min_t(unsigned long, iov_iter_count(iter),
+ PAGE_SIZE - offset);
+
+ if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+ ret = -EFAULT;
+ break;
+ }
+ }
+
+ if (unlikely(fatal_signal_pending(current))) {
+ ret = -EINTR;
+ break;
+ }
+
+ ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
+ if (unlikely(ret < 0))
+ break;
+
+ cond_resched();
+
+ if (unlikely(ret == 0)) {
+ /*
+ * If we were unable to copy any data at all, we must
+ * fall back to a single segment length write.
+ *
+ * If we didn't fallback here, we could livelock
+ * because not all segments in the iov can be copied at
+ * once without a pagefault.
+ */
+ bytes = min_t(unsigned long, PAGE_SIZE - offset,
+ iov_iter_single_seg_count(iter));
+ goto again;
+ }
+ pos += ret;
+ written += ret;
+ ret = 0;
+
+ balance_dirty_pages_ratelimited(mapping);
+ } while (iov_iter_count(iter));
+
+ bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+
+ return written ? written : ret;
+}
+