X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=linux%2Fblkdev.c;h=45b03fbac1e9a15a50988c06b886f44fdd41b4ef;hb=d22c79d2fff10dd782caf5668fd019387914a5bb;hp=370f08fc5596e0d877d73de3f82b60885a7f1b7e;hpb=d79d57ef89000f857158875055b977dbc54354da;p=bcachefs-tools-debian diff --git a/linux/blkdev.c b/linux/blkdev.c index 370f08f..45b03fb 100644 --- a/linux/blkdev.c +++ b/linux/blkdev.c @@ -10,7 +10,9 @@ #include +#ifdef CONFIG_VALGRIND #include +#endif #include #include @@ -20,7 +22,16 @@ #include "tools-util.h" +struct fops { + void (*init)(void); + void (*cleanup)(void); + void (*read)(struct bio *bio, struct iovec * iov, unsigned i); + void (*write)(struct bio *bio, struct iovec * iov, unsigned i); +}; + +static struct fops *fops; static io_context_t aio_ctx; +static atomic_t running_requests; void generic_make_request(struct bio *bio) { @@ -56,38 +67,19 @@ void generic_make_request(struct bio *bio) .iov_len = len, }; +#ifdef CONFIG_VALGRIND /* To be pedantic it should only be on IO completion. */ if (bio_op(bio) == REQ_OP_READ) VALGRIND_MAKE_MEM_DEFINED(start, len); +#endif } - struct iocb iocb = { - .data = bio, - .aio_fildes = bio->bi_opf & REQ_FUA - ? bio->bi_bdev->bd_sync_fd - : bio->bi_bdev->bd_fd, - }, *iocbp = &iocb; - switch (bio_op(bio)) { case REQ_OP_READ: - iocb.aio_lio_opcode = IO_CMD_PREADV; - iocb.u.v.vec = iov; - iocb.u.v.nr = i; - iocb.u.v.offset = bio->bi_iter.bi_sector << 9; - - ret = io_submit(aio_ctx, 1, &iocbp); - if (ret != 1) - die("io_submit err: %s", strerror(-ret)); + fops->read(bio, iov, i); break; case REQ_OP_WRITE: - iocb.aio_lio_opcode = IO_CMD_PWRITEV; - iocb.u.v.vec = iov; - iocb.u.v.nr = i; - iocb.u.v.offset = bio->bi_iter.bi_sector << 9; - - ret = io_submit(aio_ctx, 1, &iocbp); - if (ret != 1) - die("io_submit err: %s", strerror(-ret)); + fops->write(bio, iov, i); break; case REQ_OP_FLUSH: ret = fsync(bio->bi_bdev->bd_fd); @@ -121,11 +113,19 @@ int submit_bio_wait(struct bio *bio) int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, - gfp_t gfp_mask, unsigned long flags) + gfp_t gfp_mask) { return 0; } +int blkdev_issue_zeroout(struct block_device *bdev, + sector_t sector, sector_t nr_sects, + gfp_t gfp_mask, unsigned flags) +{ + /* Not yet implemented: */ + BUG(); +} + unsigned bdev_logical_block_size(struct block_device *bdev) { struct stat statbuf; @@ -136,12 +136,10 @@ unsigned bdev_logical_block_size(struct block_device *bdev) BUG_ON(ret); if (!S_ISBLK(statbuf.st_mode)) - return statbuf.st_blksize >> 9; - - ret = ioctl(bdev->bd_fd, BLKPBSZGET, &blksize); - BUG_ON(ret); + return statbuf.st_blksize; - return blksize >> 9; + xioctl(bdev->bd_fd, BLKPBSZGET, &blksize); + return blksize; } sector_t get_capacity(struct gendisk *disk) @@ -176,7 +174,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; - int fd, sync_fd, flags = O_DIRECT; + int fd, sync_fd, buffered_fd, flags = 0; if ((mode & (FMODE_READ|FMODE_WRITE)) == (FMODE_READ|FMODE_WRITE)) flags = O_RDWR; @@ -190,15 +188,24 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, if (mode & FMODE_EXCL) flags |= O_EXCL; #endif + buffered_fd = open(path, flags); + if (buffered_fd < 0) + return ERR_PTR(-errno); - fd = open(path, flags); + fd = open(path, flags|O_DIRECT); if (fd < 0) + fd = dup(buffered_fd); + if (fd < 0) { + close(buffered_fd); return ERR_PTR(-errno); + } - sync_fd = open(path, flags|O_SYNC); + sync_fd = open(path, flags|O_DIRECT|O_SYNC); + if (sync_fd < 0) + sync_fd = open(path, flags|O_SYNC); if (sync_fd < 0) { - assert(0); close(fd); + close(buffered_fd); return ERR_PTR(-errno); } @@ -208,12 +215,14 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, strncpy(bdev->name, path, sizeof(bdev->name)); bdev->name[sizeof(bdev->name) - 1] = '\0'; + bdev->bd_dev = xfstat(fd).st_rdev; bdev->bd_fd = fd; bdev->bd_sync_fd = sync_fd; + bdev->bd_buffered_fd = buffered_fd; bdev->bd_holder = holder; bdev->bd_disk = &bdev->__bd_disk; - bdev->bd_bdi = &bdev->__bd_bdi; - bdev->queue.backing_dev_info = bdev->bd_bdi; + bdev->bd_disk->bdi = &bdev->bd_disk->__bdi; + bdev->queue.backing_dev_info = bdev->bd_disk->bdi; return bdev; } @@ -223,25 +232,70 @@ void bdput(struct block_device *bdev) BUG(); } -struct block_device *lookup_bdev(const char *path) +int lookup_bdev(const char *path, dev_t *dev) +{ + return -EINVAL; +} + +static void io_fallback(void) { - return ERR_PTR(-EINVAL); + fops++; + if (fops->init == NULL) + die("no fallback possible, something is very wrong"); + fops->init(); } -static atomic_t aio_thread_stop; +static void sync_check(struct bio *bio, int ret) +{ + if (ret != bio->bi_iter.bi_size) { + die("IO error: %s\n", strerror(-ret)); + } + + if (bio->bi_opf & REQ_FUA) { + ret = fdatasync(bio->bi_bdev->bd_fd); + if (ret) + die("fsync error: %s\n", strerror(-ret)); + } + bio_endio(bio); +} + +static void sync_init(void) {} + +static void sync_cleanup(void) +{ + /* not necessary? */ + sync(); +} + +static void sync_read(struct bio *bio, struct iovec * iov, unsigned i) +{ + + int fd = bio->bi_opf & REQ_FUA + ? bio->bi_bdev->bd_sync_fd + : bio->bi_bdev->bd_fd; + ssize_t ret = preadv(fd, iov, i, bio->bi_iter.bi_sector << 9); + sync_check(bio, ret); +} + +static void sync_write(struct bio *bio, struct iovec * iov, unsigned i) +{ + int fd = bio->bi_opf & REQ_FUA + ? bio->bi_bdev->bd_sync_fd + : bio->bi_bdev->bd_fd; + ssize_t ret = pwritev(fd, iov, i, bio->bi_iter.bi_sector << 9); + sync_check(bio, ret); +} static int aio_completion_thread(void *arg) { struct io_event events[8], *ev; int ret; + bool stop = false; - while (1) { + while (!stop) { ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events), events, NULL); - if (atomic_read(&aio_thread_stop)) - break; - if (ret < 0 && ret == -EINTR) continue; if (ret < 0) @@ -250,10 +304,18 @@ static int aio_completion_thread(void *arg) for (ev = events; ev < events + ret; ev++) { struct bio *bio = (struct bio *) ev->data; + /* This should only happen during blkdev_cleanup() */ + if (!bio) { + BUG_ON(atomic_read(&running_requests) != 0); + stop = true; + continue; + } + if (ev->res != bio->bi_iter.bi_size) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); + atomic_dec(&running_requests); } } @@ -262,27 +324,28 @@ static int aio_completion_thread(void *arg) static struct task_struct *aio_task = NULL; -__attribute__((constructor(102))) -static void blkdev_init(void) +static void aio_init(void) { struct task_struct *p; + long err = io_setup(256, &aio_ctx); + if (!err) { + p = kthread_run(aio_completion_thread, NULL, "aio_completion"); + BUG_ON(IS_ERR(p)); - if (io_setup(256, &aio_ctx)) - die("io_setup() error: %m"); - - p = kthread_run(aio_completion_thread, NULL, "aio_completion"); - BUG_ON(IS_ERR(p)); + aio_task = p; - aio_task = p; + } else if (err == -ENOSYS) { + io_fallback(); + } else { + die("io_setup() error: %s", strerror(err)); + } } -__attribute__((destructor(102))) -static void blkdev_cleanup(void) +static void aio_cleanup(void) { struct task_struct *p = NULL; swap(aio_task, p); - - atomic_set(&aio_thread_stop, 1); + get_task_struct(p); /* I mean, really?! IO_CMD_NOOP is even defined, but not implemented. */ int fds[2]; @@ -294,6 +357,7 @@ static void blkdev_cleanup(void) int junk = 0; struct iocb iocb = { .aio_lio_opcode = IO_CMD_PWRITE, + .data = NULL, /* Signal to stop */ .aio_fildes = fds[1], .u.c.buf = &junk, .u.c.nbytes = 1, @@ -305,6 +369,76 @@ static void blkdev_cleanup(void) ret = kthread_stop(p); BUG_ON(ret); + put_task_struct(p); + close(fds[0]); close(fds[1]); } + +static void aio_op(struct bio *bio, struct iovec *iov, unsigned i, int opcode) +{ + ssize_t ret; + struct iocb iocb = { + .data = bio, + .aio_fildes = bio->bi_opf & REQ_FUA + ? bio->bi_bdev->bd_sync_fd + : bio->bi_bdev->bd_fd, + .aio_lio_opcode = opcode, + .u.c.buf = iov, + .u.c.nbytes = i, + .u.c.offset = bio->bi_iter.bi_sector << 9, + + }, *iocbp = &iocb; + + atomic_inc(&running_requests); + ret = io_submit(aio_ctx, 1, &iocbp); + if (ret != 1) + die("io_submit err: %s", strerror(-ret)); +} + +static void aio_read(struct bio *bio, struct iovec *iov, unsigned i) +{ + aio_op(bio, iov, i, IO_CMD_PREADV); +} + +static void aio_write(struct bio *bio, struct iovec * iov, unsigned i) +{ + aio_op(bio, iov, i, IO_CMD_PWRITEV); +} + + +/* not implemented */ +static void uring_init(void) { + io_fallback(); +} + +struct fops fops_list[] = { + { + .init = uring_init, + }, { + .init = aio_init, + .cleanup = aio_cleanup, + .read = aio_read, + .write = aio_write, + }, { + .init = sync_init, + .cleanup = sync_cleanup, + .read = sync_read, + .write = sync_write, + }, { + /* NULL */ + } +}; + +__attribute__((constructor(102))) +static void blkdev_init(void) +{ + fops = fops_list; + fops->init(); +} + +__attribute__((destructor(102))) +static void blkdev_cleanup(void) +{ + fops->cleanup(); +}