#include <libaio.h>
+#ifdef CONFIG_VALGRIND
+#include <valgrind/memcheck.h>
+#endif
+
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/completion.h>
#include "tools-util.h"
static io_context_t aio_ctx;
+static atomic_t running_requests;
void generic_make_request(struct bio *bio)
{
ret = fdatasync(bio->bi_bdev->bd_fd);
if (ret) {
fprintf(stderr, "fsync error: %m\n");
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
}
iov = alloca(sizeof(*iov) * i);
i = 0;
- bio_for_each_segment(bv, bio, iter)
+ bio_for_each_segment(bv, bio, iter) {
+ void *start = page_address(bv.bv_page) + bv.bv_offset;
+ size_t len = bv.bv_len;
+
iov[i++] = (struct iovec) {
- .iov_base = page_address(bv.bv_page) + bv.bv_offset,
- .iov_len = bv.bv_len,
+ .iov_base = start,
+ .iov_len = len,
};
+#ifdef CONFIG_VALGRIND
+ /* To be pedantic it should only be on IO completion. */
+ if (bio_op(bio) == REQ_OP_READ)
+ VALGRIND_MAKE_MEM_DEFINED(start, len);
+#endif
+ }
+
struct iocb iocb = {
.data = bio,
.aio_fildes = bio->bi_opf & REQ_FUA
iocb.u.v.nr = i;
iocb.u.v.offset = bio->bi_iter.bi_sector << 9;
- if (io_submit(aio_ctx, 1, &iocbp) != 1)
- die("io_submit err: %m");
+ atomic_inc(&running_requests);
+ ret = io_submit(aio_ctx, 1, &iocbp);
+ if (ret != 1)
+ die("io_submit err: %s", strerror(-ret));
break;
case REQ_OP_WRITE:
iocb.aio_lio_opcode = IO_CMD_PWRITEV;
iocb.u.v.nr = i;
iocb.u.v.offset = bio->bi_iter.bi_sector << 9;
- if (io_submit(aio_ctx, 1, &iocbp) != 1)
- die("io_submit err: %m");
+ atomic_inc(&running_requests);
+ ret = io_submit(aio_ctx, 1, &iocbp);
+ if (ret != 1)
+ die("io_submit err: %s", strerror(-ret));
+ break;
+ case REQ_OP_FLUSH:
+ ret = fsync(bio->bi_bdev->bd_fd);
+ if (ret)
+ die("fsync error: %m");
+ bio_endio(bio);
break;
default:
BUG();
submit_bio(bio);
wait_for_completion(&done);
- return bio->bi_error;
+ return blk_status_to_errno(bio->bi_status);
}
int blkdev_issue_discard(struct block_device *bdev,
bdev->bd_sync_fd = sync_fd;
bdev->bd_holder = holder;
bdev->bd_disk = &bdev->__bd_disk;
+ bdev->bd_bdi = &bdev->__bd_bdi;
+ bdev->queue.backing_dev_info = bdev->bd_bdi;
return bdev;
}
{
struct io_event events[8], *ev;
int ret;
+ bool stop = false;
- while (1) {
+ while (!stop) {
ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events),
events, NULL);
- if (ret < 0 && errno == EINTR)
+ if (ret < 0 && ret == -EINTR)
continue;
if (ret < 0)
- die("io_getevents() error: %m");
+ die("io_getevents() error: %s", strerror(-ret));
for (ev = events; ev < events + ret; ev++) {
struct bio *bio = (struct bio *) ev->data;
- if (ev->res < 0)
- bio->bi_error = ev->res;
- else if (ev->res != bio->bi_iter.bi_size)
- bio->bi_error = -EIO;
+ /* This should only happen during blkdev_cleanup() */
+ if (!bio) {
+ BUG_ON(atomic_read(&running_requests) != 0);
+ stop = true;
+ continue;
+ }
+
+ if (ev->res != bio->bi_iter.bi_size)
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
+ atomic_dec(&running_requests);
}
}
return 0;
}
+static struct task_struct *aio_task = NULL;
+
__attribute__((constructor(102)))
static void blkdev_init(void)
{
p = kthread_run(aio_completion_thread, NULL, "aio_completion");
BUG_ON(IS_ERR(p));
+
+ aio_task = p;
+}
+
+__attribute__((destructor(102)))
+static void blkdev_cleanup(void)
+{
+ struct task_struct *p = NULL;
+ swap(aio_task, p);
+ get_task_struct(p);
+
+ /* I mean, really?! IO_CMD_NOOP is even defined, but not implemented. */
+ int fds[2];
+ int ret = pipe(fds);
+ if (ret != 0)
+ die("pipe err: %s", strerror(ret));
+
+ /* Wake up the completion thread with spurious work. */
+ int junk = 0;
+ struct iocb iocb = {
+ .aio_lio_opcode = IO_CMD_PWRITE,
+ .data = NULL, /* Signal to stop */
+ .aio_fildes = fds[1],
+ .u.c.buf = &junk,
+ .u.c.nbytes = 1,
+ }, *iocbp = &iocb;
+ ret = io_submit(aio_ctx, 1, &iocbp);
+ if (ret != 1)
+ die("io_submit cleanup err: %s", strerror(-ret));
+
+ ret = kthread_stop(p);
+ BUG_ON(ret);
+
+ put_task_struct(p);
+
+ close(fds[0]);
+ close(fds[1]);
}