struct bch_inode_unpacked *bi)
{
return (struct stat) {
+ .st_ino = unmap_root_ino(bi->bi_inum),
.st_size = bi->bi_size,
.st_mode = bi->bi_mode,
.st_uid = bi->bi_uid,
struct bch_inode_unpacked *bi)
{
return (struct fuse_entry_param) {
- .ino = bi->bi_inum,
+ .ino = unmap_root_ino(bi->bi_inum),
.generation = bi->bi_generation,
.attr = inode_to_stat(c, bi),
.attr_timeout = DBL_MAX,
static void bcachefs_fuse_init(void *arg, struct fuse_conn_info *conn)
{
- conn->want |= FUSE_CAP_WRITEBACK_CACHE;
+ if (conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
+ fuse_log(FUSE_LOG_DEBUG, "fuse_init: activating writeback\n");
+ conn->want |= FUSE_CAP_WRITEBACK_CACHE;
+ } else
+ fuse_log(FUSE_LOG_DEBUG, "fuse_init: writeback not capable\n");
//conn->want |= FUSE_CAP_POSIX_ACL;
}
u64 inum;
int ret;
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup(dir=%llu name=%s)\n",
+ dir, name);
+
dir = map_root_ino(dir);
ret = bch2_inode_find_by_inum(c, dir, &bi);
inum = bch2_dirent_lookup(c, dir, &hash_info, &qstr);
if (!inum) {
- ret = -ENOENT;
- goto err;
+ struct fuse_entry_param e = {
+ .attr_timeout = DBL_MAX,
+ .entry_timeout = DBL_MAX,
+ };
+ fuse_reply_entry(req, &e);
+ return;
}
ret = bch2_inode_find_by_inum(c, inum, &bi);
if (ret)
goto err;
- bi.bi_inum = unmap_root_ino(bi.bi_inum);
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup ret(inum=%llu)\n",
+ bi.bi_inum);
struct fuse_entry_param e = inode_to_entry(c, &bi);
fuse_reply_entry(req, &e);
return;
err:
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup error %i\n", ret);
fuse_reply_err(req, -ret);
}
struct stat attr;
int ret;
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr(inum=%llu)\n",
+ inum);
+
inum = map_root_ino(inum);
ret = bch2_inode_find_by_inum(c, inum, &bi);
if (ret) {
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr error %i\n", ret);
fuse_reply_err(req, -ret);
return;
}
- bi.bi_inum = unmap_root_ino(bi.bi_inum);
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr success\n");
attr = inode_to_stat(c, &bi);
fuse_reply_attr(req, &attr, DBL_MAX);
inode_u.bi_atime = now;
if (to_set & FUSE_SET_ATTR_MTIME_NOW)
inode_u.bi_mtime = now;
+ /* TODO: CTIME? */
ret = bch2_inode_write(&trans, iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, NULL,
}
}
-static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t inum)
-{
- //struct bch_fs *c = fuse_req_userdata(req);
-
- //char *link = malloc();
-
- //fuse_reply_readlink(req, link);
-}
-
static int do_create(struct bch_fs *c, u64 dir,
const char *name, mode_t mode, dev_t rdev,
struct bch_inode_unpacked *new_inode)
static void bcachefs_fuse_mkdir(fuse_req_t req, fuse_ino_t dir,
const char *name, mode_t mode)
{
+ BUG_ON(mode & S_IFMT);
+
+ mode |= S_IFDIR;
bcachefs_fuse_mknod(req, dir, name, mode, 0);
}
bcachefs_fuse_unlink(req, dir, name);
}
-#if 0
-static void bcachefs_fuse_symlink(fuse_req_t req, const char *link,
- fuse_ino_t parent, const char *name)
-{
- struct bch_fs *c = fuse_req_userdata(req);
-}
-#endif
-
static void bcachefs_fuse_rename(fuse_req_t req,
fuse_ino_t src_dir, const char *srcname,
fuse_ino_t dst_dir, const char *dstname,
struct qstr qstr = QSTR(newname);
int ret;
+ newparent = map_root_ino(newparent);
+
ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
bch2_link_trans(&trans, newparent,
inum, &inode_u, &qstr));
closure_put(bio->bi_private);
}
+struct fuse_align_io {
+ off_t start;
+ size_t pad_start;
+ off_t end;
+ size_t pad_end;
+ size_t size;
+};
+
+/* Handle unaligned start and end */
+/* TODO: align to block_bytes, sector size, or page size? */
+static struct fuse_align_io align_io(const struct bch_fs *c, size_t size,
+ off_t offset)
+{
+ struct fuse_align_io align;
+
+ BUG_ON(offset < 0);
+
+ align.start = round_down(offset, block_bytes(c));
+ align.pad_start = offset - align.start;
+
+ off_t end = offset + size;
+ align.end = round_up(end, block_bytes(c));
+ align.pad_end = align.end - end;
+
+ align.size = align.end - align.start;
+
+ return align;
+}
+
+/*
+ * Given an aligned number of bytes transferred, figure out how many unaligned
+ * bytes were transferred.
+ */
+static size_t align_fix_up_bytes(const struct fuse_align_io *align,
+ size_t align_bytes)
+{
+ size_t bytes = 0;
+
+ if (align_bytes > align->pad_start) {
+ bytes = align_bytes - align->pad_start;
+ bytes = bytes > align->pad_end ? bytes - align->pad_end : 0;
+ }
+
+ return bytes;
+}
+
+/*
+ * Read aligned data.
+ */
+static int read_aligned(struct bch_fs *c, fuse_ino_t inum, size_t aligned_size,
+ off_t aligned_offset, void *buf)
+{
+ BUG_ON(aligned_size & (block_bytes(c) - 1));
+ BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+ struct bch_io_opts io_opts;
+ if (get_inode_io_opts(c, inum, &io_opts))
+ return -ENOENT;
+
+ struct bch_read_bio rbio;
+ struct bio_vec bv;
+ userbio_init(&rbio.bio, &bv, buf, aligned_size);
+ bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC);
+ rbio.bio.bi_iter.bi_sector = aligned_offset >> 9;
+
+ struct closure cl;
+ closure_init_stack(&cl);
+
+ closure_get(&cl);
+ rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
+ rbio.bio.bi_private = &cl;
+
+ bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
+
+ closure_sync(&cl);
+
+ return -blk_status_to_errno(rbio.bio.bi_status);
+}
+
static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t inum,
size_t size, off_t offset,
struct fuse_file_info *fi)
{
struct bch_fs *c = fuse_req_userdata(req);
- if ((size|offset) & block_bytes(c)) {
- fuse_reply_err(req, EINVAL);
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_read(%llu, %zd, %lld)\n",
+ inum, size, offset);
+
+ /* Check inode size. */
+ struct bch_inode_unpacked bi;
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret) {
+ fuse_reply_err(req, -ret);
return;
}
- struct bch_io_opts io_opts;
- if (get_inode_io_opts(c, inum, &io_opts)) {
- fuse_reply_err(req, ENOENT);
+ off_t end = min_t(u64, bi.bi_size, offset + size);
+ if (end <= offset) {
+ fuse_reply_buf(req, NULL, 0);
return;
}
+ size = end - offset;
- void *buf = aligned_alloc(max(PAGE_SIZE, size), size);
+ struct fuse_align_io align = align_io(c, size, offset);
+
+ void *buf = aligned_alloc(PAGE_SIZE, align.size);
if (!buf) {
fuse_reply_err(req, ENOMEM);
return;
}
- struct bch_read_bio rbio;
+ ret = read_aligned(c, inum, align.size, align.start, buf);
+
+ if (likely(!ret))
+ fuse_reply_buf(req, buf + align.pad_start, size);
+ else
+ fuse_reply_err(req, -ret);
+
+ free(buf);
+}
+
+static int inode_update_times(struct bch_fs *c, fuse_ino_t inum)
+{
+ struct btree_trans trans;
+ struct btree_iter *iter;
+ struct bch_inode_unpacked inode_u;
+ int ret = 0;
+ u64 now;
+
+ bch2_trans_init(&trans, c, 0, 0);
+retry:
+ bch2_trans_begin(&trans);
+ now = bch2_current_time(c);
+
+ iter = bch2_inode_peek(&trans, &inode_u, inum, BTREE_ITER_INTENT);
+ ret = PTR_ERR_OR_ZERO(iter);
+ if (ret)
+ goto err;
+
+ inode_u.bi_mtime = now;
+ inode_u.bi_ctime = now;
+
+ ret = bch2_inode_write(&trans, iter, &inode_u);
+ if (ret)
+ goto err;
+
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_ATOMIC|BTREE_INSERT_NOFAIL);
+
+err:
+ if (ret == -EINTR)
+ goto retry;
+
+ bch2_trans_exit(&trans);
+ return ret;
+}
+
+static int write_aligned(struct bch_fs *c, fuse_ino_t inum,
+ struct bch_io_opts io_opts, void *buf,
+ size_t aligned_size, off_t aligned_offset,
+ off_t new_i_size, size_t *written_out)
+{
+ struct bch_write_op op = { 0 };
struct bio_vec bv;
struct closure cl;
+ BUG_ON(aligned_size & (block_bytes(c) - 1));
+ BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+ *written_out = 0;
+
closure_init_stack(&cl);
- userbio_init(&rbio.bio, &bv, buf, size);
- bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC);
- rbio.bio.bi_iter.bi_sector = offset >> 9;
- rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
- rbio.bio.bi_private = &cl;
- bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
+ bch2_write_op_init(&op, c, io_opts); /* XXX reads from op?! */
+ op.write_point = writepoint_hashed(0);
+ op.nr_replicas = io_opts.data_replicas;
+ op.target = io_opts.foreground_target;
+ op.pos = POS(inum, aligned_offset >> 9);
+ op.new_i_size = new_i_size;
- closure_sync(&cl);
+ userbio_init(&op.wbio.bio, &bv, buf, aligned_size);
+ bio_set_op_attrs(&op.wbio.bio, REQ_OP_WRITE, REQ_SYNC);
- if (likely(!rbio.bio.bi_status)) {
- fuse_reply_buf(req, buf, size);
- } else {
- fuse_reply_err(req, -blk_status_to_errno(rbio.bio.bi_status));
+ if (bch2_disk_reservation_get(c, &op.res, aligned_size >> 9,
+ op.nr_replicas, 0)) {
+ /* XXX: use check_range_allocated like dio write path */
+ return -ENOSPC;
}
- free(buf);
+ closure_call(&op.cl, bch2_write, NULL, &cl);
+ closure_sync(&cl);
+
+ if (!op.error)
+ *written_out = op.written << 9;
+
+ return op.error;
}
static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t inum,
{
struct bch_fs *c = fuse_req_userdata(req);
struct bch_io_opts io_opts;
- struct bch_write_op op;
- struct bio_vec bv;
- struct closure cl;
+ size_t aligned_written;
+ int ret = 0;
- if ((size|offset) & block_bytes(c)) {
- fuse_reply_err(req, EINVAL);
- return;
- }
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write(%llu, %zd, %lld)\n",
+ inum, size, offset);
- closure_init_stack(&cl);
+ struct fuse_align_io align = align_io(c, size, offset);
if (get_inode_io_opts(c, inum, &io_opts)) {
- fuse_reply_err(req, ENOENT);
- return;
+ ret = -ENOENT;
+ goto err;
}
- bch2_write_op_init(&op, c, io_opts);
- op.write_point = writepoint_hashed(0);
- op.nr_replicas = io_opts.data_replicas;
- op.target = io_opts.foreground_target;
+ /* Realign the data and read in start and end, if needed */
+ void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
- userbio_init(&op.wbio.bio, &bv, (void *) buf, size);
- bio_set_op_attrs(&op.wbio.bio, REQ_OP_WRITE, REQ_SYNC);
- op.wbio.bio.bi_iter.bi_sector = offset >> 9;
+ /* Read partial start data. */
+ if (align.pad_start) {
+ memset(aligned_buf, 0, block_bytes(c));
- if (bch2_disk_reservation_get(c, &op.res, size >> 9,
- op.nr_replicas, 0)) {
- /* XXX: use check_range_allocated like dio write path */
- fuse_reply_err(req, ENOSPC);
- return;
+ ret = read_aligned(c, inum, block_bytes(c), align.start,
+ aligned_buf);
+ if (ret)
+ goto err;
}
- closure_call(&op.cl, bch2_write, NULL, &cl);
- closure_sync(&cl);
+ /*
+ * Read partial end data. If the whole write fits in one block, the
+ * start data and the end data are the same so this isn't needed.
+ */
+ if (align.pad_end &&
+ !(align.pad_start && align.size == block_bytes(c))) {
+ off_t partial_end_start = align.end - block_bytes(c);
+ size_t buf_offset = align.size - block_bytes(c);
- if (op.written) {
- fuse_reply_write(req, (size_t) op.written << 9);
- } else {
- BUG_ON(!op.error);
- fuse_reply_err(req, -op.error);
+ memset(aligned_buf + buf_offset, 0, block_bytes(c));
+
+ ret = read_aligned(c, inum, block_bytes(c), partial_end_start,
+ aligned_buf + buf_offset);
+ if (ret)
+ goto err;
+ }
+
+ /* Overlay what we want to write. */
+ memcpy(aligned_buf + align.pad_start, buf, size);
+
+ /* Actually write. */
+ ret = write_aligned(c, inum, io_opts, aligned_buf,
+ align.size, align.start,
+ offset + size, &aligned_written);
+
+ /* Figure out how many unaligned bytes were written. */
+ size_t written = align_fix_up_bytes(&align, aligned_written);
+ BUG_ON(written > size);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write: wrote %zd bytes\n",
+ written);
+
+ if (written > 0)
+ ret = 0;
+
+ /*
+ * Update inode times.
+ * TODO: Integrate with bch2_extent_update()
+ */
+ if (!ret)
+ ret = inode_update_times(c, inum);
+
+ if (!ret) {
+ BUG_ON(written == 0);
+ fuse_reply_write(req, written);
+ return;
}
+
+err:
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_symlink(fuse_req_t req, const char *link,
+ fuse_ino_t dir, const char *name)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked new_inode;
+ size_t link_len = strlen(link);
+ int ret;
+
+ dir = map_root_ino(dir);
+
+ ret = do_create(c, dir, name, S_IFLNK|S_IRWXUGO, 0, &new_inode);
+ if (ret)
+ goto err;
+
+ struct bch_io_opts io_opts;
+ ret = get_inode_io_opts(c, new_inode.bi_inum, &io_opts);
+ if (ret)
+ goto err;
+
+ struct fuse_align_io align = align_io(c, link_len + 1, 0);
+
+ void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
+ memset(aligned_buf, 0, align.size);
+ memcpy(aligned_buf, link, link_len); /* already terminated */
+
+ size_t aligned_written;
+ ret = write_aligned(c, new_inode.bi_inum, io_opts, aligned_buf,
+ align.size, align.start, link_len + 1,
+ &aligned_written);
+ free(aligned_buf);
+
+ if (ret)
+ goto err;
+
+ size_t written = align_fix_up_bytes(&align, aligned_written);
+ BUG_ON(written != link_len + 1); // TODO: handle short
+
+ ret = inode_update_times(c, new_inode.bi_inum);
+ if (ret)
+ goto err;
+
+ new_inode.bi_size = written;
+
+ struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+ fuse_reply_entry(req, &e);
+ return;
+
+err:
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t inum)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ char *buf = NULL;
+
+ struct bch_inode_unpacked bi;
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret)
+ goto err;
+
+ struct fuse_align_io align = align_io(c, bi.bi_size, 0);
+
+ ret = -ENOMEM;
+ buf = aligned_alloc(PAGE_SIZE, align.size);
+ if (!buf)
+ goto err;
+
+ ret = read_aligned(c, inum, align.size, align.start, buf);
+ if (ret)
+ goto err;
+
+ BUG_ON(buf[align.size - 1] != 0);
+
+ fuse_reply_readlink(req, buf);
+
+err:
+ if (ret)
+ fuse_reply_err(req, -ret);
+
+ free(buf);
}
#if 0
+/*
+ * FUSE flush is essentially the close() call, however it is not guaranteed
+ * that one flush happens per open/create.
+ *
+ * It doesn't have to do anything, and is mostly relevant for NFS-style
+ * filesystems where close has some relationship to caching.
+ */
static void bcachefs_fuse_flush(fuse_req_t req, fuse_ino_t inum,
struct fuse_file_info *fi)
{
}
#endif
-struct fuse_dir_entry {
- u64 ino;
- unsigned type;
- char name[0];
-};
-
struct fuse_dir_context {
struct dir_context ctx;
fuse_req_t req;
char *buf;
size_t bufsize;
-
- struct fuse_dir_entry *prev;
};
-static int fuse_send_dir_entry(struct fuse_dir_context *ctx, loff_t pos)
-{
- struct fuse_dir_entry *de = ctx->prev;
- ctx->prev = NULL;
-
- struct stat statbuf = {
- .st_ino = unmap_root_ino(de->ino),
- .st_mode = de->type << 12,
- };
+struct fuse_dirent {
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
+};
- size_t len = fuse_add_direntry(ctx->req, ctx->buf, ctx->bufsize,
- de->name, &statbuf, pos);
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
- free(de);
+static size_t fuse_add_direntry2(char *buf, size_t bufsize,
+ const char *name, int namelen,
+ const struct stat *stbuf, off_t off)
+{
+ size_t entlen = FUSE_NAME_OFFSET + namelen;
+ size_t entlen_padded = FUSE_DIRENT_ALIGN(entlen);
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
- if (len > ctx->bufsize)
- return -EINVAL;
+ if ((buf == NULL) || (entlen_padded > bufsize))
+ return entlen_padded;
- ctx->buf += len;
- ctx->bufsize -= len;
+ dirent->ino = stbuf->st_ino;
+ dirent->off = off;
+ dirent->namelen = namelen;
+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
+ memcpy(dirent->name, name, namelen);
+ memset(dirent->name + namelen, 0, entlen_padded - entlen);
- return 0;
+ return entlen_padded;
}
static int fuse_filldir(struct dir_context *_ctx,
struct fuse_dir_context *ctx =
container_of(_ctx, struct fuse_dir_context, ctx);
- fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(ctx={.ctx={.pos=%llu}}, "
- "name=%s, namelen=%d, pos=%lld, dir=%llu, type=%u)\n",
- ctx->ctx.pos, name, namelen, pos, ino, type);
+ struct stat statbuf = {
+ .st_ino = unmap_root_ino(ino),
+ .st_mode = type << 12,
+ };
- /*
- * We have to emit directory entries after reading the next entry,
- * because the previous entry contains a pointer to next.
- */
- if (ctx->prev) {
- int ret = fuse_send_dir_entry(ctx, pos);
- if (ret)
- return ret;
- }
+ fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(name=%s inum=%llu pos=%llu)\n",
+ name, statbuf.st_ino, pos);
- struct fuse_dir_entry *cur = malloc(sizeof *cur + namelen + 1);
- cur->ino = ino;
- cur->type = type;
- memcpy(cur->name, name, namelen);
- cur->name[namelen] = 0;
+ size_t len = fuse_add_direntry2(ctx->buf,
+ ctx->bufsize,
+ name,
+ namelen,
+ &statbuf,
+ pos + 1);
- ctx->prev = cur;
+ if (len > ctx->bufsize)
+ return -1;
+ ctx->buf += len;
+ ctx->bufsize -= len;
return 0;
}
static bool handle_dots(struct fuse_dir_context *ctx, fuse_ino_t dir)
{
- int ret = 0;
-
if (ctx->ctx.pos == 0) {
- ret = fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos,
- unmap_root_ino(dir), DT_DIR);
- if (ret < 0)
+ if (fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos,
+ dir, DT_DIR) < 0)
return false;
ctx->ctx.pos = 1;
}
if (ctx->ctx.pos == 1) {
- ret = fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos,
- /*TODO: parent*/ 1, DT_DIR);
- if (ret < 0)
+ if (fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos,
+ /*TODO: parent*/ 1, DT_DIR) < 0)
return false;
ctx->ctx.pos = 2;
}
goto reply;
ret = bch2_readdir(c, dir, &ctx.ctx);
-
reply:
- /*
- * If we have something to send, the error above doesn't matter.
- *
- * Alternatively, if this send fails, but we previously sent something,
- * then this is a success.
- */
- if (ctx.prev) {
- ret = fuse_send_dir_entry(&ctx, ctx.ctx.pos);
- if (ret && ctx.buf != buf)
- ret = 0;
- }
-
if (!ret) {
fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir reply %zd\n",
ctx.buf - buf);
.mkdir = bcachefs_fuse_mkdir,
.unlink = bcachefs_fuse_unlink,
.rmdir = bcachefs_fuse_rmdir,
- //.symlink = bcachefs_fuse_symlink,
+ .symlink = bcachefs_fuse_symlink,
.rename = bcachefs_fuse_rename,
.link = bcachefs_fuse_link,
.open = bcachefs_fuse_open,