#include "fs-io-buffered.h"
#include "fs-io-direct.h"
#include "fs-io-pagecache.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include <linux/backing-dev.h>
#include <linux/pagemap.h>
static int readpages_iter_init(struct readpages_iter *iter,
struct readahead_control *ractl)
{
- struct folio **fi;
- int ret;
-
- memset(iter, 0, sizeof(*iter));
+ struct folio *folio;
- iter->mapping = ractl->mapping;
+ *iter = (struct readpages_iter) { ractl->mapping };
- ret = bch2_filemap_get_contig_folios_d(iter->mapping,
- ractl->_index << PAGE_SHIFT,
- (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT,
- 0, mapping_gfp_mask(iter->mapping),
- &iter->folios);
- if (ret)
- return ret;
+ while ((folio = __readahead_folio(ractl))) {
+ if (!bch2_folio_create(folio, GFP_KERNEL) ||
+ darray_push(&iter->folios, folio)) {
+ bch2_folio_release(folio);
+ ractl->_nr_pages += folio_nr_pages(folio);
+ ractl->_index -= folio_nr_pages(folio);
+ return iter->folios.nr ? 0 : -ENOMEM;
+ }
- darray_for_each(iter->folios, fi) {
- ractl->_nr_pages -= 1U << folio_order(*fi);
- __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL);
- folio_put(*fi);
- folio_put(*fi);
+ folio_put(folio);
}
return 0;
struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct folio *folio;
struct readpages_iter readpages_iter;
- int ret;
bch2_inode_opts_get(&opts, c, &inode->ei_inode);
- ret = readpages_iter_init(&readpages_iter, ractl);
- BUG_ON(ret);
-
- bch2_trans_init(&trans, c, 0, 0);
+ int ret = readpages_iter_init(&readpages_iter, ractl);
+ if (ret)
+ return;
bch2_pagecache_add_get(inode);
rbio->bio.bi_end_io = bch2_readpages_end_io;
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
- bchfs_read(&trans, rbio, inode_inum(inode),
+ bchfs_read(trans, rbio, inode_inum(inode),
&readpages_iter);
- bch2_trans_unlock(&trans);
+ bch2_trans_unlock(trans);
}
bch2_pagecache_add_put(inode);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
darray_exit(&readpages_iter.folios);
}
static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum, struct folio *folio)
{
- struct btree_trans trans;
-
bch2_folio_create(folio, __GFP_NOFAIL);
rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
rbio->bio.bi_iter.bi_sector = folio_sector(folio);
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
- bch2_trans_init(&trans, c, 0, 0);
- bchfs_read(&trans, rbio, inum, NULL);
- bch2_trans_exit(&trans);
+ bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0));
}
static void bch2_read_single_folio_end_io(struct bio *bio)
return ret;
}
+/*
+ * Determine when a writepage io is full. We have to limit writepage bios to a
+ * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to
+ * what the bounce path in bch2_write_extent() can handle. In theory we could
+ * loosen this restriction for non-bounce I/O, but we don't have that context
+ * here. Ideally, we can up this limit and make it configurable in the future
+ * when the bounce path can be enhanced to accommodate larger source bios.
+ */
+static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len)
+{
+ struct bio *bio = &io->op.wbio.bio;
+ return bio_full(bio, len) ||
+ (bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE);
+}
+
static void bch2_writepage_io_done(struct bch_write_op *op)
{
struct bch_writepage_io *io =
if (w->io &&
(w->io->op.res.nr_replicas != nr_replicas_this_write ||
- bio_full(&w->io->op.wbio.bio, sectors << 9) ||
- w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
- (BIO_MAX_VECS * PAGE_SIZE) ||
+ bch_io_full(w->io, sectors << 9) ||
bio_end_sector(&w->io->op.wbio.bio) != sector))
bch2_writepage_do_io(w);
/* Check for writing past i_size: */
WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
round_up(i_size, block_bytes(c)) &&
- !test_bit(BCH_FS_EMERGENCY_RO, &c->flags),
+ !test_bit(BCH_FS_emergency_ro, &c->flags),
"writing past i_size: %llu > %llu (unrounded %llu)\n",
bio_end_sector(&w->io->op.wbio.bio) << 9,
round_up(i_size, block_bytes(c)),
if (IS_ERR_OR_NULL(folio))
goto err_unlock;
- if (folio_test_uptodate(folio))
- goto out;
-
offset = pos - folio_pos(folio);
len = min_t(size_t, len, folio_end_pos(folio) - pos);
+ if (folio_test_uptodate(folio))
+ goto out;
+
/* If we're writing entire folio, don't need to read it in first: */
if (!offset && len == folio_size(folio))
goto out;
return copied;
}
-static noinline void folios_trunc(folios *folios, struct folio **fi)
+static noinline void folios_trunc(folios *fs, struct folio **fi)
{
- while (folios->data + folios->nr > fi) {
- struct folio *f = darray_pop(folios);
+ while (fs->data + fs->nr > fi) {
+ struct folio *f = darray_pop(fs);
folio_unlock(f);
folio_put(f);
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation res;
- folios folios;
- struct folio **fi, *f;
- unsigned copied = 0, f_offset;
- u64 end = pos + len, f_pos;
+ folios fs;
+ struct folio *f;
+ unsigned copied = 0, f_offset, f_copied;
+ u64 end = pos + len, f_pos, f_len;
loff_t last_folio_pos = inode->v.i_size;
int ret = 0;
BUG_ON(!len);
bch2_folio_reservation_init(c, inode, &res);
- darray_init(&folios);
+ darray_init(&fs);
ret = bch2_filemap_get_contig_folios_d(mapping, pos, end,
FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT,
mapping_gfp_mask(mapping),
- &folios);
+ &fs);
if (ret)
goto out;
- BUG_ON(!folios.nr);
+ BUG_ON(!fs.nr);
- f = darray_first(folios);
+ f = darray_first(fs);
if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
ret = bch2_read_single_folio(f, mapping);
if (ret)
goto out;
}
- f = darray_last(folios);
+ f = darray_last(fs);
end = min(end, folio_end_pos(f));
last_folio_pos = folio_pos(f);
if (end != folio_end_pos(f) && !folio_test_uptodate(f)) {
}
}
- ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr);
+ ret = bch2_folio_set(c, inode_inum(inode), fs.data, fs.nr);
if (ret)
goto out;
f_pos = pos;
- f_offset = pos - folio_pos(darray_first(folios));
- darray_for_each(folios, fi) {
- struct folio *f = *fi;
- u64 f_len = min(end, folio_end_pos(f)) - f_pos;
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
/*
* XXX: per POSIX and fstests generic/275, on -ENOSPC we're
*/
ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
if (unlikely(ret)) {
- folios_trunc(&folios, fi);
- if (!folios.nr)
+ folios_trunc(&fs, fi);
+ if (!fs.nr)
goto out;
- end = min(end, folio_end_pos(darray_last(folios)));
+ end = min(end, folio_end_pos(darray_last(fs)));
break;
}
}
if (mapping_writably_mapped(mapping))
- darray_for_each(folios, fi)
+ darray_for_each(fs, fi)
flush_dcache_folio(*fi);
f_pos = pos;
- f_offset = pos - folio_pos(darray_first(folios));
- darray_for_each(folios, fi) {
- struct folio *f = *fi;
- u64 f_len = min(end, folio_end_pos(f)) - f_pos;
- unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
-
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
+ f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
if (!f_copied) {
- folios_trunc(&folios, fi);
+ folios_trunc(&fs, fi);
break;
}
pos + copied + f_copied < inode->v.i_size) {
iov_iter_revert(iter, f_copied);
folio_zero_range(f, 0, folio_size(f));
- folios_trunc(&folios, fi);
+ folios_trunc(&fs, fi);
break;
}
copied += f_copied;
if (f_copied != f_len) {
- folios_trunc(&folios, fi + 1);
+ folios_trunc(&fs, fi + 1);
break;
}
spin_unlock(&inode->v.i_lock);
f_pos = pos;
- f_offset = pos - folio_pos(darray_first(folios));
- darray_for_each(folios, fi) {
- struct folio *f = *fi;
- u64 f_len = min(end, folio_end_pos(f)) - f_pos;
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
if (!folio_test_uptodate(f))
folio_mark_uptodate(f);
inode->ei_last_dirtied = (unsigned long) current;
out:
- darray_for_each(folios, fi) {
+ darray_for_each(fs, fi) {
folio_unlock(*fi);
folio_put(*fi);
}
if (last_folio_pos >= inode->v.i_size)
truncate_pagecache(&inode->v, inode->v.i_size);
- darray_exit(&folios);
+ darray_exit(&fs);
bch2_folio_reservation_put(c, inode, &res);
return copied ?: ret;
goto out;
}
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(&inode->v);
inode_lock(&inode->v);
ret = generic_write_checks(iocb, from);
iocb->ki_pos += ret;
unlock:
inode_unlock(&inode->v);
- current->backing_dev_info = NULL;
if (ret > 0)
ret = generic_write_sync(iocb, ret);