#include <libaio.h>
+#ifdef CONFIG_VALGRIND
#include <valgrind/memcheck.h>
+#endif
#include <linux/bio.h>
#include <linux/blkdev.h>
#include "tools-util.h"
+struct fops {
+ void (*init)(void);
+ void (*cleanup)(void);
+ void (*read)(struct bio *bio, struct iovec * iov, unsigned i);
+ void (*write)(struct bio *bio, struct iovec * iov, unsigned i);
+};
+
+static struct fops *fops;
static io_context_t aio_ctx;
+static atomic_t running_requests;
void generic_make_request(struct bio *bio)
{
.iov_len = len,
};
+#ifdef CONFIG_VALGRIND
/* To be pedantic it should only be on IO completion. */
if (bio_op(bio) == REQ_OP_READ)
VALGRIND_MAKE_MEM_DEFINED(start, len);
+#endif
}
- struct iocb iocb = {
- .data = bio,
- .aio_fildes = bio->bi_opf & REQ_FUA
- ? bio->bi_bdev->bd_sync_fd
- : bio->bi_bdev->bd_fd,
- }, *iocbp = &iocb;
-
switch (bio_op(bio)) {
case REQ_OP_READ:
- iocb.aio_lio_opcode = IO_CMD_PREADV;
- iocb.u.v.vec = iov;
- iocb.u.v.nr = i;
- iocb.u.v.offset = bio->bi_iter.bi_sector << 9;
-
- ret = io_submit(aio_ctx, 1, &iocbp);
- if (ret != 1)
- die("io_submit err: %s", strerror(-ret));
+ fops->read(bio, iov, i);
break;
case REQ_OP_WRITE:
- iocb.aio_lio_opcode = IO_CMD_PWRITEV;
- iocb.u.v.vec = iov;
- iocb.u.v.nr = i;
- iocb.u.v.offset = bio->bi_iter.bi_sector << 9;
-
- ret = io_submit(aio_ctx, 1, &iocbp);
- if (ret != 1)
- die("io_submit err: %s", strerror(-ret));
+ fops->write(bio, iov, i);
break;
case REQ_OP_FLUSH:
ret = fsync(bio->bi_bdev->bd_fd);
strncpy(bdev->name, path, sizeof(bdev->name));
bdev->name[sizeof(bdev->name) - 1] = '\0';
+ bdev->bd_dev = xfstat(fd).st_rdev;
bdev->bd_fd = fd;
bdev->bd_sync_fd = sync_fd;
bdev->bd_holder = holder;
BUG();
}
-struct block_device *lookup_bdev(const char *path)
+int lookup_bdev(const char *path, dev_t *dev)
{
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
-static atomic_t aio_thread_stop;
+static void io_fallback(void)
+{
+ fops++;
+ if (fops->init == NULL)
+ die("no fallback possible, something is very wrong");
+ fops->init();
+}
+
+static void sync_check(struct bio *bio, int ret)
+{
+ if (ret != bio->bi_iter.bi_size) {
+ die("IO error: %s\n", strerror(-ret));
+ }
+
+ if (bio->bi_opf & REQ_FUA) {
+ ret = fdatasync(bio->bi_bdev->bd_fd);
+ if (ret)
+ die("fsync error: %s\n", strerror(-ret));
+ }
+ bio_endio(bio);
+}
+
+static void sync_init(void) {}
+
+static void sync_cleanup(void)
+{
+ /* not necessary? */
+ sync();
+}
+
+static void sync_read(struct bio *bio, struct iovec * iov, unsigned i)
+{
+
+ int fd = bio->bi_opf & REQ_FUA
+ ? bio->bi_bdev->bd_sync_fd
+ : bio->bi_bdev->bd_fd;
+ ssize_t ret = preadv(fd, iov, i, bio->bi_iter.bi_sector << 9);
+ sync_check(bio, ret);
+}
+
+static void sync_write(struct bio *bio, struct iovec * iov, unsigned i)
+{
+ int fd = bio->bi_opf & REQ_FUA
+ ? bio->bi_bdev->bd_sync_fd
+ : bio->bi_bdev->bd_fd;
+ ssize_t ret = pwritev(fd, iov, i, bio->bi_iter.bi_sector << 9);
+ sync_check(bio, ret);
+}
static int aio_completion_thread(void *arg)
{
struct io_event events[8], *ev;
int ret;
+ bool stop = false;
- while (1) {
+ while (!stop) {
ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events),
events, NULL);
- if (atomic_read(&aio_thread_stop))
- break;
-
if (ret < 0 && ret == -EINTR)
continue;
if (ret < 0)
for (ev = events; ev < events + ret; ev++) {
struct bio *bio = (struct bio *) ev->data;
+ /* This should only happen during blkdev_cleanup() */
+ if (!bio) {
+ BUG_ON(atomic_read(&running_requests) != 0);
+ stop = true;
+ continue;
+ }
+
if (ev->res != bio->bi_iter.bi_size)
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
+ atomic_dec(&running_requests);
}
}
static struct task_struct *aio_task = NULL;
-__attribute__((constructor(102)))
-static void blkdev_init(void)
+static void aio_init(void)
{
struct task_struct *p;
+ long err = io_setup(256, &aio_ctx);
+ if (!err) {
+ p = kthread_run(aio_completion_thread, NULL, "aio_completion");
+ BUG_ON(IS_ERR(p));
- if (io_setup(256, &aio_ctx))
- die("io_setup() error: %m");
+ aio_task = p;
- p = kthread_run(aio_completion_thread, NULL, "aio_completion");
- BUG_ON(IS_ERR(p));
-
- aio_task = p;
+ } else if (err == -ENOSYS) {
+ io_fallback();
+ } else {
+ die("io_setup() error: %s", strerror(err));
+ }
}
-__attribute__((destructor(102)))
-static void blkdev_cleanup(void)
+static void aio_cleanup(void)
{
struct task_struct *p = NULL;
swap(aio_task, p);
get_task_struct(p);
- atomic_set(&aio_thread_stop, 1);
-
/* I mean, really?! IO_CMD_NOOP is even defined, but not implemented. */
int fds[2];
int ret = pipe(fds);
int junk = 0;
struct iocb iocb = {
.aio_lio_opcode = IO_CMD_PWRITE,
+ .data = NULL, /* Signal to stop */
.aio_fildes = fds[1],
.u.c.buf = &junk,
.u.c.nbytes = 1,
close(fds[0]);
close(fds[1]);
}
+
+static void aio_op(struct bio *bio, struct iovec *iov, unsigned i, int opcode)
+{
+ ssize_t ret;
+ struct iocb iocb = {
+ .data = bio,
+ .aio_fildes = bio->bi_opf & REQ_FUA
+ ? bio->bi_bdev->bd_sync_fd
+ : bio->bi_bdev->bd_fd,
+ .aio_lio_opcode = opcode,
+ .u.v.vec = iov,
+ .u.v.nr = i,
+ .u.v.offset = bio->bi_iter.bi_sector << 9,
+
+ }, *iocbp = &iocb;
+
+ atomic_inc(&running_requests);
+ ret = io_submit(aio_ctx, 1, &iocbp);
+ if (ret != 1)
+ die("io_submit err: %s", strerror(-ret));
+}
+
+static void aio_read(struct bio *bio, struct iovec *iov, unsigned i)
+{
+ aio_op(bio, iov, i, IO_CMD_PREADV);
+}
+
+static void aio_write(struct bio *bio, struct iovec * iov, unsigned i)
+{
+ aio_op(bio, iov, i, IO_CMD_PWRITEV);
+}
+
+
+/* not implemented */
+static void uring_init(void) {
+ io_fallback();
+}
+
+struct fops fops_list[] = {
+ {
+ .init = uring_init,
+ }, {
+ .init = aio_init,
+ .cleanup = aio_cleanup,
+ .read = aio_read,
+ .write = aio_write,
+ }, {
+ .init = sync_init,
+ .cleanup = sync_cleanup,
+ .read = sync_read,
+ .write = sync_write,
+ }, {
+ /* NULL */
+ }
+};
+
+__attribute__((constructor(102)))
+static void blkdev_init(void)
+{
+ fops = fops_list;
+ fops->init();
+}
+
+__attribute__((destructor(102)))
+static void blkdev_cleanup(void)
+{
+ fops->cleanup();
+}