13 #ifdef CONFIG_VALGRIND
14 #include <valgrind/memcheck.h>
17 #include <linux/bio.h>
18 #include <linux/blkdev.h>
19 #include <linux/completion.h>
21 #include <linux/kthread.h>
23 #include "tools-util.h"
27 void (*cleanup)(void);
28 void (*read)(struct bio *bio, struct iovec * iov, unsigned i);
29 void (*write)(struct bio *bio, struct iovec * iov, unsigned i);
32 static struct fops *fops;
33 static io_context_t aio_ctx;
34 static atomic_t running_requests;
36 void generic_make_request(struct bio *bio)
39 struct bvec_iter iter;
44 if (bio->bi_opf & REQ_PREFLUSH) {
45 ret = fdatasync(bio->bi_bdev->bd_fd);
47 fprintf(stderr, "fsync error: %m\n");
48 bio->bi_status = BLK_STS_IOERR;
55 bio_for_each_segment(bv, bio, iter)
58 iov = alloca(sizeof(*iov) * i);
61 bio_for_each_segment(bv, bio, iter) {
62 void *start = page_address(bv.bv_page) + bv.bv_offset;
63 size_t len = bv.bv_len;
65 iov[i++] = (struct iovec) {
70 #ifdef CONFIG_VALGRIND
71 /* To be pedantic it should only be on IO completion. */
72 if (bio_op(bio) == REQ_OP_READ)
73 VALGRIND_MAKE_MEM_DEFINED(start, len);
77 switch (bio_op(bio)) {
79 fops->read(bio, iov, i);
82 fops->write(bio, iov, i);
85 ret = fsync(bio->bi_bdev->bd_fd);
87 die("fsync error: %m");
95 static void submit_bio_wait_endio(struct bio *bio)
97 complete(bio->bi_private);
100 int submit_bio_wait(struct bio *bio)
102 struct completion done;
104 init_completion(&done);
105 bio->bi_private = &done;
106 bio->bi_end_io = submit_bio_wait_endio;
107 bio->bi_opf |= REQ_SYNC;
109 wait_for_completion(&done);
111 return blk_status_to_errno(bio->bi_status);
114 int blkdev_issue_discard(struct block_device *bdev,
115 sector_t sector, sector_t nr_sects,
121 int blkdev_issue_zeroout(struct block_device *bdev,
122 sector_t sector, sector_t nr_sects,
123 gfp_t gfp_mask, unsigned flags)
125 /* Not yet implemented: */
129 unsigned bdev_logical_block_size(struct block_device *bdev)
135 ret = fstat(bdev->bd_fd, &statbuf);
138 if (!S_ISBLK(statbuf.st_mode))
139 return statbuf.st_blksize;
141 xioctl(bdev->bd_fd, BLKPBSZGET, &blksize);
145 sector_t get_capacity(struct gendisk *disk)
147 struct block_device *bdev =
148 container_of(disk, struct block_device, __bd_disk);
153 ret = fstat(bdev->bd_fd, &statbuf);
156 if (!S_ISBLK(statbuf.st_mode))
157 return statbuf.st_size >> 9;
159 ret = ioctl(bdev->bd_fd, BLKGETSIZE64, &bytes);
165 void blkdev_put(struct block_device *bdev, void *holder)
167 fdatasync(bdev->bd_fd);
168 close(bdev->bd_sync_fd);
173 struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
174 void *holder, const struct blk_holder_ops *hop)
176 struct block_device *bdev;
177 int fd, sync_fd, buffered_fd, flags = 0;
179 if ((mode & (BLK_OPEN_READ|BLK_OPEN_WRITE)) == (BLK_OPEN_READ|BLK_OPEN_WRITE))
181 else if (mode & BLK_OPEN_READ)
183 else if (mode & BLK_OPEN_WRITE)
186 if (!(mode & BLK_OPEN_BUFFERED))
190 /* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */
191 if (mode & BLK_OPEN_EXCL)
194 buffered_fd = open(path, flags & ~O_DIRECT);
196 return ERR_PTR(-errno);
198 fd = open(path, flags);
200 fd = dup(buffered_fd);
203 return ERR_PTR(-errno);
206 sync_fd = open(path, flags|O_SYNC);
208 sync_fd = open(path, (flags & ~O_DIRECT)|O_SYNC);
212 return ERR_PTR(-errno);
215 bdev = malloc(sizeof(*bdev));
216 memset(bdev, 0, sizeof(*bdev));
218 strncpy(bdev->name, path, sizeof(bdev->name));
219 bdev->name[sizeof(bdev->name) - 1] = '\0';
221 bdev->bd_dev = xfstat(fd).st_rdev;
223 bdev->bd_sync_fd = sync_fd;
224 bdev->bd_buffered_fd = buffered_fd;
225 bdev->bd_holder = holder;
226 bdev->bd_disk = &bdev->__bd_disk;
227 bdev->bd_disk->bdi = &bdev->bd_disk->__bdi;
228 bdev->queue.backing_dev_info = bdev->bd_disk->bdi;
233 void bdput(struct block_device *bdev)
238 int lookup_bdev(const char *path, dev_t *dev)
243 static void io_fallback(void)
246 if (fops->init == NULL)
247 die("no fallback possible, something is very wrong");
251 static void sync_check(struct bio *bio, int ret)
253 if (ret != bio->bi_iter.bi_size) {
254 die("IO error: %s\n", strerror(-ret));
257 if (bio->bi_opf & REQ_FUA) {
258 ret = fdatasync(bio->bi_bdev->bd_fd);
260 die("fsync error: %s\n", strerror(-ret));
265 static void sync_init(void) {}
267 static void sync_cleanup(void)
273 static void sync_read(struct bio *bio, struct iovec * iov, unsigned i)
276 int fd = bio->bi_opf & REQ_FUA
277 ? bio->bi_bdev->bd_sync_fd
278 : bio->bi_bdev->bd_fd;
279 ssize_t ret = preadv(fd, iov, i, bio->bi_iter.bi_sector << 9);
280 sync_check(bio, ret);
283 static void sync_write(struct bio *bio, struct iovec * iov, unsigned i)
285 int fd = bio->bi_opf & REQ_FUA
286 ? bio->bi_bdev->bd_sync_fd
287 : bio->bi_bdev->bd_fd;
288 ssize_t ret = pwritev(fd, iov, i, bio->bi_iter.bi_sector << 9);
289 sync_check(bio, ret);
292 static DECLARE_WAIT_QUEUE_HEAD(aio_events_completed);
294 static int aio_completion_thread(void *arg)
296 struct io_event events[8], *ev;
301 ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events),
304 if (ret < 0 && ret == -EINTR)
307 die("io_getevents() error: %s", strerror(-ret));
309 wake_up(&aio_events_completed);
311 for (ev = events; ev < events + ret; ev++) {
312 struct bio *bio = (struct bio *) ev->data;
314 /* This should only happen during blkdev_cleanup() */
316 BUG_ON(atomic_read(&running_requests) != 0);
321 if (ev->res != bio->bi_iter.bi_size)
322 bio->bi_status = BLK_STS_IOERR;
325 atomic_dec(&running_requests);
332 static struct task_struct *aio_task = NULL;
334 static void aio_init(void)
336 struct task_struct *p;
337 long err = io_setup(256, &aio_ctx);
339 p = kthread_run(aio_completion_thread, NULL, "aio_completion");
344 } else if (err == -ENOSYS) {
347 die("io_setup() error: %s", strerror(err));
351 static void aio_cleanup(void)
353 struct task_struct *p = NULL;
357 /* I mean, really?! IO_CMD_NOOP is even defined, but not implemented. */
361 die("pipe err: %s", strerror(ret));
363 /* Wake up the completion thread with spurious work. */
366 .aio_lio_opcode = IO_CMD_PWRITE,
367 .data = NULL, /* Signal to stop */
368 .aio_fildes = fds[1],
372 ret = io_submit(aio_ctx, 1, &iocbp);
374 die("io_submit cleanup err: %s", strerror(-ret));
376 ret = kthread_stop(p);
385 static void aio_op(struct bio *bio, struct iovec *iov, unsigned i, int opcode)
390 .aio_fildes = bio->bi_opf & REQ_FUA
391 ? bio->bi_bdev->bd_sync_fd
392 : bio->bi_bdev->bd_fd,
393 .aio_lio_opcode = opcode,
396 .u.c.offset = bio->bi_iter.bi_sector << 9,
400 atomic_inc(&running_requests);
402 wait_event(aio_events_completed,
403 (ret = io_submit(aio_ctx, 1, &iocbp)) != -EAGAIN);;
406 die("io_submit err: %s", strerror(-ret));
409 static void aio_read(struct bio *bio, struct iovec *iov, unsigned i)
411 aio_op(bio, iov, i, IO_CMD_PREADV);
414 static void aio_write(struct bio *bio, struct iovec * iov, unsigned i)
416 aio_op(bio, iov, i, IO_CMD_PWRITEV);
420 /* not implemented */
421 static void uring_init(void) {
425 struct fops fops_list[] = {
430 .cleanup = aio_cleanup,
435 .cleanup = sync_cleanup,
443 __attribute__((constructor(102)))
444 static void blkdev_init(void)
450 __attribute__((destructor(102)))
451 static void blkdev_cleanup(void)