13 #include <valgrind/memcheck.h>
15 #include <linux/bio.h>
16 #include <linux/blkdev.h>
17 #include <linux/completion.h>
19 #include <linux/kthread.h>
21 #include "tools-util.h"
23 static io_context_t aio_ctx;
25 void generic_make_request(struct bio *bio)
28 struct bvec_iter iter;
33 if (bio->bi_opf & REQ_PREFLUSH) {
34 ret = fdatasync(bio->bi_bdev->bd_fd);
36 fprintf(stderr, "fsync error: %m\n");
37 bio->bi_status = BLK_STS_IOERR;
44 bio_for_each_segment(bv, bio, iter)
47 iov = alloca(sizeof(*iov) * i);
50 bio_for_each_segment(bv, bio, iter) {
51 void *start = page_address(bv.bv_page) + bv.bv_offset;
52 size_t len = bv.bv_len;
54 iov[i++] = (struct iovec) {
59 /* To be pedantic it should only be on IO completion. */
60 if (bio_op(bio) == REQ_OP_READ)
61 VALGRIND_MAKE_MEM_DEFINED(start, len);
66 .aio_fildes = bio->bi_opf & REQ_FUA
67 ? bio->bi_bdev->bd_sync_fd
68 : bio->bi_bdev->bd_fd,
71 switch (bio_op(bio)) {
73 iocb.aio_lio_opcode = IO_CMD_PREADV;
76 iocb.u.v.offset = bio->bi_iter.bi_sector << 9;
78 ret = io_submit(aio_ctx, 1, &iocbp);
80 die("io_submit err: %s", strerror(-ret));
83 iocb.aio_lio_opcode = IO_CMD_PWRITEV;
86 iocb.u.v.offset = bio->bi_iter.bi_sector << 9;
88 ret = io_submit(aio_ctx, 1, &iocbp);
90 die("io_submit err: %s", strerror(-ret));
93 ret = fsync(bio->bi_bdev->bd_fd);
95 die("fsync error: %m");
103 static void submit_bio_wait_endio(struct bio *bio)
105 complete(bio->bi_private);
108 int submit_bio_wait(struct bio *bio)
110 struct completion done;
112 init_completion(&done);
113 bio->bi_private = &done;
114 bio->bi_end_io = submit_bio_wait_endio;
115 bio->bi_opf |= REQ_SYNC;
117 wait_for_completion(&done);
119 return blk_status_to_errno(bio->bi_status);
122 int blkdev_issue_discard(struct block_device *bdev,
123 sector_t sector, sector_t nr_sects,
124 gfp_t gfp_mask, unsigned long flags)
129 unsigned bdev_logical_block_size(struct block_device *bdev)
135 ret = fstat(bdev->bd_fd, &statbuf);
138 if (!S_ISBLK(statbuf.st_mode))
139 return statbuf.st_blksize >> 9;
141 ret = ioctl(bdev->bd_fd, BLKPBSZGET, &blksize);
147 sector_t get_capacity(struct gendisk *disk)
149 struct block_device *bdev =
150 container_of(disk, struct block_device, __bd_disk);
155 ret = fstat(bdev->bd_fd, &statbuf);
158 if (!S_ISBLK(statbuf.st_mode))
159 return statbuf.st_size >> 9;
161 ret = ioctl(bdev->bd_fd, BLKGETSIZE64, &bytes);
167 void blkdev_put(struct block_device *bdev, fmode_t mode)
169 fdatasync(bdev->bd_fd);
170 close(bdev->bd_sync_fd);
175 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
178 struct block_device *bdev;
179 int fd, sync_fd, flags = O_DIRECT;
181 if ((mode & (FMODE_READ|FMODE_WRITE)) == (FMODE_READ|FMODE_WRITE))
183 else if (mode & FMODE_READ)
185 else if (mode & FMODE_WRITE)
189 /* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */
190 if (mode & FMODE_EXCL)
194 fd = open(path, flags);
196 return ERR_PTR(-errno);
198 sync_fd = open(path, flags|O_SYNC);
202 return ERR_PTR(-errno);
205 bdev = malloc(sizeof(*bdev));
206 memset(bdev, 0, sizeof(*bdev));
208 strncpy(bdev->name, path, sizeof(bdev->name));
209 bdev->name[sizeof(bdev->name) - 1] = '\0';
212 bdev->bd_sync_fd = sync_fd;
213 bdev->bd_holder = holder;
214 bdev->bd_disk = &bdev->__bd_disk;
215 bdev->bd_bdi = &bdev->__bd_bdi;
216 bdev->queue.backing_dev_info = bdev->bd_bdi;
221 void bdput(struct block_device *bdev)
226 struct block_device *lookup_bdev(const char *path)
228 return ERR_PTR(-EINVAL);
231 static int aio_completion_thread(void *arg)
233 struct io_event events[8], *ev;
237 ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events),
240 if (ret < 0 && ret == -EINTR)
243 die("io_getevents() error: %s", strerror(-ret));
245 for (ev = events; ev < events + ret; ev++) {
246 struct bio *bio = (struct bio *) ev->data;
248 if (ev->res != bio->bi_iter.bi_size)
249 bio->bi_status = BLK_STS_IOERR;
258 __attribute__((constructor(102)))
259 static void blkdev_init(void)
261 struct task_struct *p;
263 if (io_setup(256, &aio_ctx))
264 die("io_setup() error: %m");
266 p = kthread_run(aio_completion_thread, NULL, "aio_completion");