X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=cmd_fusemount.c;h=f7e8b0d38f7eb53f5172b133a42329d796428b2a;hb=648b871d3d75c5aca86cdead5af90897b0b9d29a;hp=dc5b563070be7672b6bec03c1b0e3977bdbe6d87;hpb=81551c8f1bcc11526be15b43526fa702f92aa983;p=bcachefs-tools-debian diff --git a/cmd_fusemount.c b/cmd_fusemount.c index dc5b563..f7e8b0d 100644 --- a/cmd_fusemount.c +++ b/cmd_fusemount.c @@ -44,6 +44,7 @@ static struct stat inode_to_stat(struct bch_fs *c, struct bch_inode_unpacked *bi) { return (struct stat) { + .st_ino = unmap_root_ino(bi->bi_inum), .st_size = bi->bi_size, .st_mode = bi->bi_mode, .st_uid = bi->bi_uid, @@ -62,7 +63,7 @@ static struct fuse_entry_param inode_to_entry(struct bch_fs *c, struct bch_inode_unpacked *bi) { return (struct fuse_entry_param) { - .ino = bi->bi_inum, + .ino = unmap_root_ino(bi->bi_inum), .generation = bi->bi_generation, .attr = inode_to_stat(c, bi), .attr_timeout = DBL_MAX, @@ -72,7 +73,11 @@ static struct fuse_entry_param inode_to_entry(struct bch_fs *c, static void bcachefs_fuse_init(void *arg, struct fuse_conn_info *conn) { - conn->want |= FUSE_CAP_WRITEBACK_CACHE; + if (conn->capable & FUSE_CAP_WRITEBACK_CACHE) { + fuse_log(FUSE_LOG_DEBUG, "fuse_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } else + fuse_log(FUSE_LOG_DEBUG, "fuse_init: writeback not capable\n"); //conn->want |= FUSE_CAP_POSIX_ACL; } @@ -93,6 +98,9 @@ static void bcachefs_fuse_lookup(fuse_req_t req, fuse_ino_t dir, u64 inum; int ret; + fuse_log(FUSE_LOG_DEBUG, "fuse_lookup(dir=%llu name=%s)\n", + dir, name); + dir = map_root_ino(dir); ret = bch2_inode_find_by_inum(c, dir, &bi); @@ -105,20 +113,26 @@ static void bcachefs_fuse_lookup(fuse_req_t req, fuse_ino_t dir, inum = bch2_dirent_lookup(c, dir, &hash_info, &qstr); if (!inum) { - ret = -ENOENT; - goto err; + struct fuse_entry_param e = { + .attr_timeout = DBL_MAX, + .entry_timeout = DBL_MAX, + }; + fuse_reply_entry(req, &e); + return; } ret = bch2_inode_find_by_inum(c, inum, &bi); if (ret) goto err; - bi.bi_inum = unmap_root_ino(bi.bi_inum); + fuse_log(FUSE_LOG_DEBUG, "fuse_lookup ret(inum=%llu)\n", + bi.bi_inum); struct fuse_entry_param e = inode_to_entry(c, &bi); fuse_reply_entry(req, &e); return; err: + fuse_log(FUSE_LOG_DEBUG, "fuse_lookup error %i\n", ret); fuse_reply_err(req, -ret); } @@ -130,15 +144,19 @@ static void bcachefs_fuse_getattr(fuse_req_t req, fuse_ino_t inum, struct stat attr; int ret; + fuse_log(FUSE_LOG_DEBUG, "fuse_getattr(inum=%llu)\n", + inum); + inum = map_root_ino(inum); ret = bch2_inode_find_by_inum(c, inum, &bi); if (ret) { + fuse_log(FUSE_LOG_DEBUG, "fuse_getattr error %i\n", ret); fuse_reply_err(req, -ret); return; } - bi.bi_inum = unmap_root_ino(bi.bi_inum); + fuse_log(FUSE_LOG_DEBUG, "fuse_getattr success\n"); attr = inode_to_stat(c, &bi); fuse_reply_attr(req, &attr, DBL_MAX); @@ -183,6 +201,7 @@ retry: inode_u.bi_atime = now; if (to_set & FUSE_SET_ATTR_MTIME_NOW) inode_u.bi_mtime = now; + /* TODO: CTIME? */ ret = bch2_inode_write(&trans, iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, NULL, @@ -202,15 +221,6 @@ err: } } -static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t inum) -{ - //struct bch_fs *c = fuse_req_userdata(req); - - //char *link = malloc(); - - //fuse_reply_readlink(req, link); -} - static int do_create(struct bch_fs *c, u64 dir, const char *name, mode_t mode, dev_t rdev, struct bch_inode_unpacked *new_inode) @@ -251,6 +261,9 @@ err: static void bcachefs_fuse_mkdir(fuse_req_t req, fuse_ino_t dir, const char *name, mode_t mode) { + BUG_ON(mode & S_IFMT); + + mode |= S_IFDIR; bcachefs_fuse_mknod(req, dir, name, mode, 0); } @@ -279,14 +292,6 @@ static void bcachefs_fuse_rmdir(fuse_req_t req, fuse_ino_t dir, bcachefs_fuse_unlink(req, dir, name); } -#if 0 -static void bcachefs_fuse_symlink(fuse_req_t req, const char *link, - fuse_ino_t parent, const char *name) -{ - struct bch_fs *c = fuse_req_userdata(req); -} -#endif - static void bcachefs_fuse_rename(fuse_req_t req, fuse_ino_t src_dir, const char *srcname, fuse_ino_t dst_dir, const char *dstname, @@ -322,6 +327,8 @@ static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t inum, struct qstr qstr = QSTR(newname); int ret; + newparent = map_root_ino(newparent); + ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC, bch2_link_trans(&trans, newparent, inum, &inode_u, &qstr)); @@ -371,51 +378,202 @@ static void bcachefs_fuse_read_endio(struct bio *bio) closure_put(bio->bi_private); } +struct fuse_align_io { + off_t start; + size_t pad_start; + off_t end; + size_t pad_end; + size_t size; +}; + +/* Handle unaligned start and end */ +/* TODO: align to block_bytes, sector size, or page size? */ +static struct fuse_align_io align_io(const struct bch_fs *c, size_t size, + off_t offset) +{ + struct fuse_align_io align; + + BUG_ON(offset < 0); + + align.start = round_down(offset, block_bytes(c)); + align.pad_start = offset - align.start; + + off_t end = offset + size; + align.end = round_up(end, block_bytes(c)); + align.pad_end = align.end - end; + + align.size = align.end - align.start; + + return align; +} + +/* + * Given an aligned number of bytes transferred, figure out how many unaligned + * bytes were transferred. + */ +static size_t align_fix_up_bytes(const struct fuse_align_io *align, + size_t align_bytes) +{ + size_t bytes = 0; + + if (align_bytes > align->pad_start) { + bytes = align_bytes - align->pad_start; + bytes = bytes > align->pad_end ? bytes - align->pad_end : 0; + } + + return bytes; +} + +/* + * Read aligned data. + */ +static int read_aligned(struct bch_fs *c, fuse_ino_t inum, size_t aligned_size, + off_t aligned_offset, void *buf) +{ + BUG_ON(aligned_size & (block_bytes(c) - 1)); + BUG_ON(aligned_offset & (block_bytes(c) - 1)); + + struct bch_io_opts io_opts; + if (get_inode_io_opts(c, inum, &io_opts)) + return -ENOENT; + + struct bch_read_bio rbio; + struct bio_vec bv; + userbio_init(&rbio.bio, &bv, buf, aligned_size); + bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC); + rbio.bio.bi_iter.bi_sector = aligned_offset >> 9; + + struct closure cl; + closure_init_stack(&cl); + + closure_get(&cl); + rbio.bio.bi_end_io = bcachefs_fuse_read_endio; + rbio.bio.bi_private = &cl; + + bch2_read(c, rbio_init(&rbio.bio, io_opts), inum); + + closure_sync(&cl); + + return -blk_status_to_errno(rbio.bio.bi_status); +} + static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t inum, size_t size, off_t offset, struct fuse_file_info *fi) { struct bch_fs *c = fuse_req_userdata(req); - if ((size|offset) & block_bytes(c)) { - fuse_reply_err(req, EINVAL); + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_read(%llu, %zd, %lld)\n", + inum, size, offset); + + /* Check inode size. */ + struct bch_inode_unpacked bi; + int ret = bch2_inode_find_by_inum(c, inum, &bi); + if (ret) { + fuse_reply_err(req, -ret); return; } - struct bch_io_opts io_opts; - if (get_inode_io_opts(c, inum, &io_opts)) { - fuse_reply_err(req, ENOENT); + off_t end = min_t(u64, bi.bi_size, offset + size); + if (end <= offset) { + fuse_reply_buf(req, NULL, 0); return; } + size = end - offset; - void *buf = aligned_alloc(max(PAGE_SIZE, size), size); + struct fuse_align_io align = align_io(c, size, offset); + + void *buf = aligned_alloc(PAGE_SIZE, align.size); if (!buf) { fuse_reply_err(req, ENOMEM); return; } - struct bch_read_bio rbio; + ret = read_aligned(c, inum, align.size, align.start, buf); + + if (likely(!ret)) + fuse_reply_buf(req, buf + align.pad_start, size); + else + fuse_reply_err(req, -ret); + + free(buf); +} + +static int inode_update_times(struct bch_fs *c, fuse_ino_t inum) +{ + struct btree_trans trans; + struct btree_iter *iter; + struct bch_inode_unpacked inode_u; + int ret = 0; + u64 now; + + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + now = bch2_current_time(c); + + iter = bch2_inode_peek(&trans, &inode_u, inum, BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + goto err; + + inode_u.bi_mtime = now; + inode_u.bi_ctime = now; + + ret = bch2_inode_write(&trans, iter, &inode_u); + if (ret) + goto err; + + ret = bch2_trans_commit(&trans, NULL, NULL, + BTREE_INSERT_ATOMIC|BTREE_INSERT_NOFAIL); + +err: + if (ret == -EINTR) + goto retry; + + bch2_trans_exit(&trans); + return ret; +} + +static int write_aligned(struct bch_fs *c, fuse_ino_t inum, + struct bch_io_opts io_opts, void *buf, + size_t aligned_size, off_t aligned_offset, + off_t new_i_size, size_t *written_out) +{ + struct bch_write_op op = { 0 }; struct bio_vec bv; struct closure cl; + BUG_ON(aligned_size & (block_bytes(c) - 1)); + BUG_ON(aligned_offset & (block_bytes(c) - 1)); + + *written_out = 0; + closure_init_stack(&cl); - userbio_init(&rbio.bio, &bv, buf, size); - bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC); - rbio.bio.bi_iter.bi_sector = offset >> 9; - rbio.bio.bi_end_io = bcachefs_fuse_read_endio; - rbio.bio.bi_private = &cl; - bch2_read(c, rbio_init(&rbio.bio, io_opts), inum); + bch2_write_op_init(&op, c, io_opts); /* XXX reads from op?! */ + op.write_point = writepoint_hashed(0); + op.nr_replicas = io_opts.data_replicas; + op.target = io_opts.foreground_target; + op.pos = POS(inum, aligned_offset >> 9); + op.new_i_size = new_i_size; - closure_sync(&cl); + userbio_init(&op.wbio.bio, &bv, buf, aligned_size); + bio_set_op_attrs(&op.wbio.bio, REQ_OP_WRITE, REQ_SYNC); - if (likely(!rbio.bio.bi_status)) { - fuse_reply_buf(req, buf, size); - } else { - fuse_reply_err(req, -blk_status_to_errno(rbio.bio.bi_status)); + if (bch2_disk_reservation_get(c, &op.res, aligned_size >> 9, + op.nr_replicas, 0)) { + /* XXX: use check_range_allocated like dio write path */ + return -ENOSPC; } - free(buf); + closure_call(&op.cl, bch2_write, NULL, &cl); + closure_sync(&cl); + + if (!op.error) + *written_out = op.written << 9; + + return op.error; } static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t inum, @@ -425,50 +583,175 @@ static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t inum, { struct bch_fs *c = fuse_req_userdata(req); struct bch_io_opts io_opts; - struct bch_write_op op; - struct bio_vec bv; - struct closure cl; + size_t aligned_written; + int ret = 0; - if ((size|offset) & block_bytes(c)) { - fuse_reply_err(req, EINVAL); - return; - } + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write(%llu, %zd, %lld)\n", + inum, size, offset); - closure_init_stack(&cl); + struct fuse_align_io align = align_io(c, size, offset); if (get_inode_io_opts(c, inum, &io_opts)) { - fuse_reply_err(req, ENOENT); - return; + ret = -ENOENT; + goto err; } - bch2_write_op_init(&op, c, io_opts); - op.write_point = writepoint_hashed(0); - op.nr_replicas = io_opts.data_replicas; - op.target = io_opts.foreground_target; + /* Realign the data and read in start and end, if needed */ + void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size); - userbio_init(&op.wbio.bio, &bv, (void *) buf, size); - bio_set_op_attrs(&op.wbio.bio, REQ_OP_WRITE, REQ_SYNC); - op.wbio.bio.bi_iter.bi_sector = offset >> 9; + /* Read partial start data. */ + if (align.pad_start) { + memset(aligned_buf, 0, block_bytes(c)); - if (bch2_disk_reservation_get(c, &op.res, size >> 9, - op.nr_replicas, 0)) { - /* XXX: use check_range_allocated like dio write path */ - fuse_reply_err(req, ENOSPC); - return; + ret = read_aligned(c, inum, block_bytes(c), align.start, + aligned_buf); + if (ret) + goto err; } - closure_call(&op.cl, bch2_write, NULL, &cl); - closure_sync(&cl); + /* + * Read partial end data. If the whole write fits in one block, the + * start data and the end data are the same so this isn't needed. + */ + if (align.pad_end && + !(align.pad_start && align.size == block_bytes(c))) { + off_t partial_end_start = align.end - block_bytes(c); + size_t buf_offset = align.size - block_bytes(c); - if (op.written) { - fuse_reply_write(req, (size_t) op.written << 9); - } else { - BUG_ON(!op.error); - fuse_reply_err(req, -op.error); + memset(aligned_buf + buf_offset, 0, block_bytes(c)); + + ret = read_aligned(c, inum, block_bytes(c), partial_end_start, + aligned_buf + buf_offset); + if (ret) + goto err; + } + + /* Overlay what we want to write. */ + memcpy(aligned_buf + align.pad_start, buf, size); + + /* Actually write. */ + ret = write_aligned(c, inum, io_opts, aligned_buf, + align.size, align.start, + offset + size, &aligned_written); + + /* Figure out how many unaligned bytes were written. */ + size_t written = align_fix_up_bytes(&align, aligned_written); + BUG_ON(written > size); + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write: wrote %zd bytes\n", + written); + + if (written > 0) + ret = 0; + + /* + * Update inode times. + * TODO: Integrate with bch2_extent_update() + */ + if (!ret) + ret = inode_update_times(c, inum); + + if (!ret) { + BUG_ON(written == 0); + fuse_reply_write(req, written); + return; } + +err: + fuse_reply_err(req, -ret); +} + +static void bcachefs_fuse_symlink(fuse_req_t req, const char *link, + fuse_ino_t dir, const char *name) +{ + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked new_inode; + size_t link_len = strlen(link); + int ret; + + dir = map_root_ino(dir); + + ret = do_create(c, dir, name, S_IFLNK|S_IRWXUGO, 0, &new_inode); + if (ret) + goto err; + + struct bch_io_opts io_opts; + ret = get_inode_io_opts(c, new_inode.bi_inum, &io_opts); + if (ret) + goto err; + + struct fuse_align_io align = align_io(c, link_len + 1, 0); + + void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size); + memset(aligned_buf, 0, align.size); + memcpy(aligned_buf, link, link_len); /* already terminated */ + + size_t aligned_written; + ret = write_aligned(c, new_inode.bi_inum, io_opts, aligned_buf, + align.size, align.start, link_len + 1, + &aligned_written); + free(aligned_buf); + + if (ret) + goto err; + + size_t written = align_fix_up_bytes(&align, aligned_written); + BUG_ON(written != link_len + 1); // TODO: handle short + + ret = inode_update_times(c, new_inode.bi_inum); + if (ret) + goto err; + + new_inode.bi_size = written; + + struct fuse_entry_param e = inode_to_entry(c, &new_inode); + fuse_reply_entry(req, &e); + return; + +err: + fuse_reply_err(req, -ret); +} + +static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t inum) +{ + struct bch_fs *c = fuse_req_userdata(req); + char *buf = NULL; + + struct bch_inode_unpacked bi; + int ret = bch2_inode_find_by_inum(c, inum, &bi); + if (ret) + goto err; + + struct fuse_align_io align = align_io(c, bi.bi_size, 0); + + ret = -ENOMEM; + buf = aligned_alloc(PAGE_SIZE, align.size); + if (!buf) + goto err; + + ret = read_aligned(c, inum, align.size, align.start, buf); + if (ret) + goto err; + + BUG_ON(buf[align.size - 1] != 0); + + fuse_reply_readlink(req, buf); + +err: + if (ret) + fuse_reply_err(req, -ret); + + free(buf); } #if 0 +/* + * FUSE flush is essentially the close() call, however it is not guaranteed + * that one flush happens per open/create. + * + * It doesn't have to do anything, and is mostly relevant for NFS-style + * filesystems where close has some relationship to caching. + */ static void bcachefs_fuse_flush(fuse_req_t req, fuse_ino_t inum, struct fuse_file_info *fi) { @@ -494,43 +777,44 @@ static void bcachefs_fuse_opendir(fuse_req_t req, fuse_ino_t inum, } #endif -struct fuse_dir_entry { - u64 ino; - unsigned type; - char name[0]; -}; - struct fuse_dir_context { struct dir_context ctx; fuse_req_t req; char *buf; size_t bufsize; - - struct fuse_dir_entry *prev; }; -static int fuse_send_dir_entry(struct fuse_dir_context *ctx, loff_t pos) -{ - struct fuse_dir_entry *de = ctx->prev; - ctx->prev = NULL; - - struct stat statbuf = { - .st_ino = unmap_root_ino(de->ino), - .st_mode = de->type << 12, - }; +struct fuse_dirent { + uint64_t ino; + uint64_t off; + uint32_t namelen; + uint32_t type; + char name[]; +}; - size_t len = fuse_add_direntry(ctx->req, ctx->buf, ctx->bufsize, - de->name, &statbuf, pos); +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) - free(de); +static size_t fuse_add_direntry2(char *buf, size_t bufsize, + const char *name, int namelen, + const struct stat *stbuf, off_t off) +{ + size_t entlen = FUSE_NAME_OFFSET + namelen; + size_t entlen_padded = FUSE_DIRENT_ALIGN(entlen); + struct fuse_dirent *dirent = (struct fuse_dirent *) buf; - if (len > ctx->bufsize) - return -EINVAL; + if ((buf == NULL) || (entlen_padded > bufsize)) + return entlen_padded; - ctx->buf += len; - ctx->bufsize -= len; + dirent->ino = stbuf->st_ino; + dirent->off = off; + dirent->namelen = namelen; + dirent->type = (stbuf->st_mode & S_IFMT) >> 12; + memcpy(dirent->name, name, namelen); + memset(dirent->name + namelen, 0, entlen_padded - entlen); - return 0; + return entlen_padded; } static int fuse_filldir(struct dir_context *_ctx, @@ -540,47 +824,41 @@ static int fuse_filldir(struct dir_context *_ctx, struct fuse_dir_context *ctx = container_of(_ctx, struct fuse_dir_context, ctx); - fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(ctx={.ctx={.pos=%llu}}, " - "name=%s, namelen=%d, pos=%lld, dir=%llu, type=%u)\n", - ctx->ctx.pos, name, namelen, pos, ino, type); + struct stat statbuf = { + .st_ino = unmap_root_ino(ino), + .st_mode = type << 12, + }; - /* - * We have to emit directory entries after reading the next entry, - * because the previous entry contains a pointer to next. - */ - if (ctx->prev) { - int ret = fuse_send_dir_entry(ctx, pos); - if (ret) - return ret; - } + fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(name=%s inum=%llu pos=%llu)\n", + name, statbuf.st_ino, pos); - struct fuse_dir_entry *cur = malloc(sizeof *cur + namelen + 1); - cur->ino = ino; - cur->type = type; - memcpy(cur->name, name, namelen); - cur->name[namelen] = 0; + size_t len = fuse_add_direntry2(ctx->buf, + ctx->bufsize, + name, + namelen, + &statbuf, + pos + 1); - ctx->prev = cur; + if (len > ctx->bufsize) + return -1; + ctx->buf += len; + ctx->bufsize -= len; return 0; } static bool handle_dots(struct fuse_dir_context *ctx, fuse_ino_t dir) { - int ret = 0; - if (ctx->ctx.pos == 0) { - ret = fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos, - unmap_root_ino(dir), DT_DIR); - if (ret < 0) + if (fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos, + dir, DT_DIR) < 0) return false; ctx->ctx.pos = 1; } if (ctx->ctx.pos == 1) { - ret = fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos, - /*TODO: parent*/ 1, DT_DIR); - if (ret < 0) + if (fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos, + /*TODO: parent*/ 1, DT_DIR) < 0) return false; ctx->ctx.pos = 2; } @@ -622,20 +900,7 @@ static void bcachefs_fuse_readdir(fuse_req_t req, fuse_ino_t dir, goto reply; ret = bch2_readdir(c, dir, &ctx.ctx); - reply: - /* - * If we have something to send, the error above doesn't matter. - * - * Alternatively, if this send fails, but we previously sent something, - * then this is a success. - */ - if (ctx.prev) { - ret = fuse_send_dir_entry(&ctx, ctx.ctx.pos); - if (ret && ctx.buf != buf) - ret = 0; - } - if (!ret) { fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir reply %zd\n", ctx.buf - buf); @@ -762,7 +1027,7 @@ static const struct fuse_lowlevel_ops bcachefs_fuse_ops = { .mkdir = bcachefs_fuse_mkdir, .unlink = bcachefs_fuse_unlink, .rmdir = bcachefs_fuse_rmdir, - //.symlink = bcachefs_fuse_symlink, + .symlink = bcachefs_fuse_symlink, .rename = bcachefs_fuse_rename, .link = bcachefs_fuse_link, .open = bcachefs_fuse_open,