]> git.sesse.net Git - bcachefs-tools-debian/blob - cmd_fusemount.c
Update bcachefs sources to 44ac32df8e0c bcachefs: Split brain detection
[bcachefs-tools-debian] / cmd_fusemount.c
1 #ifdef BCACHEFS_FUSE
2
3 #include <errno.h>
4 #include <float.h>
5 #include <getopt.h>
6 #include <stdio.h>
7 #include <sys/statvfs.h>
8
9 #include <fuse_lowlevel.h>
10
11 #include "cmds.h"
12 #include "libbcachefs.h"
13 #include "tools-util.h"
14
15 #include "libbcachefs/bcachefs.h"
16 #include "libbcachefs/alloc_foreground.h"
17 #include "libbcachefs/btree_iter.h"
18 #include "libbcachefs/buckets.h"
19 #include "libbcachefs/dirent.h"
20 #include "libbcachefs/errcode.h"
21 #include "libbcachefs/error.h"
22 #include "libbcachefs/fs-common.h"
23 #include "libbcachefs/inode.h"
24 #include "libbcachefs/io_read.h"
25 #include "libbcachefs/io_write.h"
26 #include "libbcachefs/opts.h"
27 #include "libbcachefs/super.h"
28
29 /* mode_to_type(): */
30 #include "libbcachefs/fs.h"
31
32 #include <linux/dcache.h>
33
34 /* XXX cut and pasted from fsck.c */
35 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
36
37 /* used by write_aligned function for waiting on bch2_write closure */
38 struct write_aligned_op_t {
39         struct closure cl;
40
41         /* must be last: */
42         struct bch_write_op             op;
43 };
44
45
46 static inline subvol_inum map_root_ino(u64 ino)
47 {
48         return (subvol_inum) { 1, ino == 1 ? 4096 : ino };
49 }
50
51 static inline u64 unmap_root_ino(u64 ino)
52 {
53         return ino == 4096 ? 1 : ino;
54 }
55
56 static struct stat inode_to_stat(struct bch_fs *c,
57                                  struct bch_inode_unpacked *bi)
58 {
59         return (struct stat) {
60                 .st_ino         = unmap_root_ino(bi->bi_inum),
61                 .st_size        = bi->bi_size,
62                 .st_mode        = bi->bi_mode,
63                 .st_uid         = bi->bi_uid,
64                 .st_gid         = bi->bi_gid,
65                 .st_nlink       = bch2_inode_nlink_get(bi),
66                 .st_rdev        = bi->bi_dev,
67                 .st_blksize     = block_bytes(c),
68                 .st_blocks      = bi->bi_sectors,
69                 .st_atim        = bch2_time_to_timespec(c, bi->bi_atime),
70                 .st_mtim        = bch2_time_to_timespec(c, bi->bi_mtime),
71                 .st_ctim        = bch2_time_to_timespec(c, bi->bi_ctime),
72         };
73 }
74
75 static struct fuse_entry_param inode_to_entry(struct bch_fs *c,
76                                               struct bch_inode_unpacked *bi)
77 {
78         return (struct fuse_entry_param) {
79                 .ino            = unmap_root_ino(bi->bi_inum),
80                 .generation     = bi->bi_generation,
81                 .attr           = inode_to_stat(c, bi),
82                 .attr_timeout   = DBL_MAX,
83                 .entry_timeout  = DBL_MAX,
84         };
85 }
86
87 static void bcachefs_fuse_init(void *arg, struct fuse_conn_info *conn)
88 {
89         if (conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
90                 fuse_log(FUSE_LOG_DEBUG, "fuse_init: activating writeback\n");
91                 conn->want |= FUSE_CAP_WRITEBACK_CACHE;
92         } else
93                 fuse_log(FUSE_LOG_DEBUG, "fuse_init: writeback not capable\n");
94
95         //conn->want |= FUSE_CAP_POSIX_ACL;
96 }
97
98 static void bcachefs_fuse_destroy(void *arg)
99 {
100         struct bch_fs *c = arg;
101
102         bch2_fs_stop(c);
103 }
104
105 static void bcachefs_fuse_lookup(fuse_req_t req, fuse_ino_t dir_ino,
106                                  const char *name)
107 {
108         subvol_inum dir = map_root_ino(dir_ino);
109         struct bch_fs *c = fuse_req_userdata(req);
110         struct bch_inode_unpacked bi;
111         struct qstr qstr = QSTR(name);
112         subvol_inum inum;
113         int ret;
114
115         fuse_log(FUSE_LOG_DEBUG, "fuse_lookup(dir=%llu name=%s)\n",
116                  dir.inum, name);
117
118         ret = bch2_inode_find_by_inum(c, dir, &bi);
119         if (ret) {
120                 fuse_reply_err(req, -ret);
121                 return;
122         }
123
124         struct bch_hash_info hash_info = bch2_hash_info_init(c, &bi);
125
126         ret = bch2_dirent_lookup(c, dir, &hash_info, &qstr, &inum);
127         if (ret) {
128                 struct fuse_entry_param e = {
129                         .attr_timeout   = DBL_MAX,
130                         .entry_timeout  = DBL_MAX,
131                 };
132                 fuse_reply_entry(req, &e);
133                 return;
134         }
135
136         ret = bch2_inode_find_by_inum(c, inum, &bi);
137         if (ret)
138                 goto err;
139
140         fuse_log(FUSE_LOG_DEBUG, "fuse_lookup ret(inum=%llu)\n",
141                  bi.bi_inum);
142
143         struct fuse_entry_param e = inode_to_entry(c, &bi);
144         fuse_reply_entry(req, &e);
145         return;
146 err:
147         fuse_log(FUSE_LOG_DEBUG, "fuse_lookup error %i\n", ret);
148         fuse_reply_err(req, -ret);
149 }
150
151 static void bcachefs_fuse_getattr(fuse_req_t req, fuse_ino_t ino,
152                                   struct fuse_file_info *fi)
153 {
154         subvol_inum inum = map_root_ino(ino);
155         struct bch_fs *c = fuse_req_userdata(req);
156         struct bch_inode_unpacked bi;
157         struct stat attr;
158
159         fuse_log(FUSE_LOG_DEBUG, "fuse_getattr(inum=%llu)\n", inum.inum);
160
161         int ret = bch2_inode_find_by_inum(c, inum, &bi);
162         if (ret) {
163                 fuse_log(FUSE_LOG_DEBUG, "fuse_getattr error %i\n", ret);
164                 fuse_reply_err(req, -ret);
165                 return;
166         }
167
168         fuse_log(FUSE_LOG_DEBUG, "fuse_getattr success\n");
169
170         attr = inode_to_stat(c, &bi);
171         fuse_reply_attr(req, &attr, DBL_MAX);
172 }
173
174 static void bcachefs_fuse_setattr(fuse_req_t req, fuse_ino_t ino,
175                                   struct stat *attr, int to_set,
176                                   struct fuse_file_info *fi)
177 {
178         struct bch_fs *c = fuse_req_userdata(req);
179         struct bch_inode_unpacked inode_u;
180         struct btree_trans *trans;
181         struct btree_iter iter;
182         u64 now;
183         int ret;
184
185         subvol_inum inum = map_root_ino(ino);
186
187         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_setattr(%llu, %x)\n", inum.inum, to_set);
188
189         trans = bch2_trans_get(c);
190 retry:
191         bch2_trans_begin(trans);
192         now = bch2_current_time(c);
193
194         ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
195         if (ret)
196                 goto err;
197
198         if (to_set & FUSE_SET_ATTR_MODE)
199                 inode_u.bi_mode = attr->st_mode;
200         if (to_set & FUSE_SET_ATTR_UID)
201                 inode_u.bi_uid  = attr->st_uid;
202         if (to_set & FUSE_SET_ATTR_GID)
203                 inode_u.bi_gid  = attr->st_gid;
204         if (to_set & FUSE_SET_ATTR_SIZE)
205                 inode_u.bi_size = attr->st_size;
206         if (to_set & FUSE_SET_ATTR_ATIME)
207                 inode_u.bi_atime = timespec_to_bch2_time(c, attr->st_atim);
208         if (to_set & FUSE_SET_ATTR_MTIME)
209                 inode_u.bi_mtime = timespec_to_bch2_time(c, attr->st_mtim);
210         if (to_set & FUSE_SET_ATTR_ATIME_NOW)
211                 inode_u.bi_atime = now;
212         if (to_set & FUSE_SET_ATTR_MTIME_NOW)
213                 inode_u.bi_mtime = now;
214         /* TODO: CTIME? */
215
216         ret   = bch2_inode_write(trans, &iter, &inode_u) ?:
217                 bch2_trans_commit(trans, NULL, NULL,
218                                   BCH_TRANS_COMMIT_no_enospc);
219 err:
220         bch2_trans_iter_exit(trans, &iter);
221         if (ret == -EINTR)
222                 goto retry;
223
224         bch2_trans_put(trans);
225
226         if (!ret) {
227                 *attr = inode_to_stat(c, &inode_u);
228                 fuse_reply_attr(req, attr, DBL_MAX);
229         } else {
230                 fuse_reply_err(req, -ret);
231         }
232 }
233
234 static int do_create(struct bch_fs *c, subvol_inum dir,
235                      const char *name, mode_t mode, dev_t rdev,
236                      struct bch_inode_unpacked *new_inode)
237 {
238         struct qstr qstr = QSTR(name);
239         struct bch_inode_unpacked dir_u;
240         uid_t uid = 0;
241         gid_t gid = 0;
242
243         bch2_inode_init_early(c, new_inode);
244
245         return bch2_trans_do(c, NULL, NULL, 0,
246                         bch2_create_trans(trans,
247                                 dir, &dir_u,
248                                 new_inode, &qstr,
249                                 uid, gid, mode, rdev, NULL, NULL,
250                                 (subvol_inum) { 0 }, 0));
251 }
252
253 static void bcachefs_fuse_mknod(fuse_req_t req, fuse_ino_t dir_ino,
254                                 const char *name, mode_t mode,
255                                 dev_t rdev)
256 {
257         subvol_inum dir = map_root_ino(dir_ino);
258         struct bch_fs *c = fuse_req_userdata(req);
259         struct bch_inode_unpacked new_inode;
260         int ret;
261
262         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mknod(%llu, %s, %x, %x)\n",
263                  dir.inum, name, mode, rdev);
264
265         ret = do_create(c, dir, name, mode, rdev, &new_inode);
266         if (ret)
267                 goto err;
268
269         struct fuse_entry_param e = inode_to_entry(c, &new_inode);
270         fuse_reply_entry(req, &e);
271         return;
272 err:
273         fuse_reply_err(req, -ret);
274 }
275
276 static void bcachefs_fuse_mkdir(fuse_req_t req, fuse_ino_t dir,
277                                 const char *name, mode_t mode)
278 {
279         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mkdir(%llu, %s, %x)\n",
280                  dir, name, mode);
281
282         BUG_ON(mode & S_IFMT);
283
284         mode |= S_IFDIR;
285         bcachefs_fuse_mknod(req, dir, name, mode, 0);
286 }
287
288 static void bcachefs_fuse_unlink(fuse_req_t req, fuse_ino_t dir_ino,
289                                  const char *name)
290 {
291         struct bch_fs *c = fuse_req_userdata(req);
292         struct bch_inode_unpacked dir_u, inode_u;
293         struct qstr qstr = QSTR(name);
294         subvol_inum dir = map_root_ino(dir_ino);
295
296         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_unlink(%llu, %s)\n", dir.inum, name);
297
298         int ret = bch2_trans_do(c, NULL, NULL,
299                                 BCH_TRANS_COMMIT_no_enospc,
300                             bch2_unlink_trans(trans, dir, &dir_u,
301                                               &inode_u, &qstr, false));
302
303         fuse_reply_err(req, -ret);
304 }
305
306 static void bcachefs_fuse_rmdir(fuse_req_t req, fuse_ino_t dir,
307                                 const char *name)
308 {
309         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_rmdir(%llu, %s)\n", dir, name);
310
311         bcachefs_fuse_unlink(req, dir, name);
312 }
313
314 static void bcachefs_fuse_rename(fuse_req_t req,
315                                  fuse_ino_t src_dir_ino, const char *srcname,
316                                  fuse_ino_t dst_dir_ino, const char *dstname,
317                                  unsigned flags)
318 {
319         struct bch_fs *c = fuse_req_userdata(req);
320         struct bch_inode_unpacked dst_dir_u, src_dir_u;
321         struct bch_inode_unpacked src_inode_u, dst_inode_u;
322         struct qstr dst_name = QSTR(srcname);
323         struct qstr src_name = QSTR(dstname);
324         subvol_inum src_dir = map_root_ino(src_dir_ino);
325         subvol_inum dst_dir = map_root_ino(dst_dir_ino);
326         int ret;
327
328         fuse_log(FUSE_LOG_DEBUG,
329                  "bcachefs_fuse_rename(%llu, %s, %llu, %s, %x)\n",
330                  src_dir.inum, srcname, dst_dir.inum, dstname, flags);
331
332         /* XXX handle overwrites */
333         ret = bch2_trans_do(c, NULL, NULL, 0,
334                 bch2_rename_trans(trans,
335                                   src_dir, &src_dir_u,
336                                   dst_dir, &dst_dir_u,
337                                   &src_inode_u, &dst_inode_u,
338                                   &src_name, &dst_name,
339                                   BCH_RENAME));
340
341         fuse_reply_err(req, -ret);
342 }
343
344 static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t ino,
345                                fuse_ino_t newparent_ino, const char *newname)
346 {
347         struct bch_fs *c = fuse_req_userdata(req);
348         struct bch_inode_unpacked dir_u, inode_u;
349         struct qstr qstr = QSTR(newname);
350         subvol_inum newparent   = map_root_ino(newparent_ino);
351         subvol_inum inum        = map_root_ino(ino);
352         int ret;
353
354         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_link(%llu, %llu, %s)\n",
355                  inum.inum, newparent.inum, newname);
356
357         ret = bch2_trans_do(c, NULL, NULL, 0,
358                             bch2_link_trans(trans, newparent, &dir_u,
359                                             inum, &inode_u, &qstr));
360
361         if (!ret) {
362                 struct fuse_entry_param e = inode_to_entry(c, &inode_u);
363                 fuse_reply_entry(req, &e);
364         } else {
365                 fuse_reply_err(req, -ret);
366         }
367 }
368
369 static void bcachefs_fuse_open(fuse_req_t req, fuse_ino_t inum,
370                                struct fuse_file_info *fi)
371 {
372         fi->direct_io           = false;
373         fi->keep_cache          = true;
374         fi->cache_readdir       = true;
375
376         fuse_reply_open(req, fi);
377 }
378
379 static void userbio_init(struct bio *bio, struct bio_vec *bv,
380                          void *buf, size_t size)
381 {
382         bio_init(bio, NULL, bv, 1, 0);
383         bio->bi_iter.bi_size    = size;
384         bv->bv_page             = buf;
385         bv->bv_len              = size;
386         bv->bv_offset           = 0;
387 }
388
389 static int get_inode_io_opts(struct bch_fs *c, subvol_inum inum, struct bch_io_opts *opts)
390 {
391         struct bch_inode_unpacked inode;
392         if (bch2_inode_find_by_inum(c, inum, &inode))
393                 return -EINVAL;
394
395         bch2_inode_opts_get(opts, c, &inode);
396         return 0;
397 }
398
399 static void bcachefs_fuse_read_endio(struct bio *bio)
400 {
401         closure_put(bio->bi_private);
402 }
403
404
405 static void bcachefs_fuse_write_endio(struct bch_write_op *op)
406 {
407        struct write_aligned_op_t *w = container_of(op,struct write_aligned_op_t,op);
408        closure_put(&w->cl);
409 }
410
411
412 struct fuse_align_io {
413         off_t           start;
414         size_t          pad_start;
415         off_t           end;
416         size_t          pad_end;
417         size_t          size;
418 };
419
420 /* Handle unaligned start and end */
421 /* TODO: align to block_bytes, sector size, or page size? */
422 static struct fuse_align_io align_io(const struct bch_fs *c, size_t size,
423                                      off_t offset)
424 {
425         struct fuse_align_io align;
426
427         BUG_ON(offset < 0);
428
429         align.start = round_down(offset, block_bytes(c));
430         align.pad_start = offset - align.start;
431
432         off_t end = offset + size;
433         align.end = round_up(end, block_bytes(c));
434         align.pad_end = align.end - end;
435
436         align.size = align.end - align.start;
437
438         return align;
439 }
440
441 /*
442  * Given an aligned number of bytes transferred, figure out how many unaligned
443  * bytes were transferred.
444  */
445 static size_t align_fix_up_bytes(const struct fuse_align_io *align,
446                                  size_t align_bytes)
447 {
448         size_t bytes = 0;
449
450         if (align_bytes > align->pad_start) {
451                 bytes = align_bytes - align->pad_start;
452                 bytes = bytes > align->pad_end ? bytes - align->pad_end : 0;
453         }
454
455         return bytes;
456 }
457
458 /*
459  * Read aligned data.
460  */
461 static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size,
462                         off_t aligned_offset, void *buf)
463 {
464         BUG_ON(aligned_size & (block_bytes(c) - 1));
465         BUG_ON(aligned_offset & (block_bytes(c) - 1));
466
467         struct bch_io_opts io_opts;
468         if (get_inode_io_opts(c, inum, &io_opts))
469                 return -ENOENT;
470
471         struct bch_read_bio rbio;
472         struct bio_vec bv;
473         userbio_init(&rbio.bio, &bv, buf, aligned_size);
474         bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC);
475         rbio.bio.bi_iter.bi_sector      = aligned_offset >> 9;
476
477         struct closure cl;
478         closure_init_stack(&cl);
479
480         closure_get(&cl);
481         rbio.bio.bi_end_io              = bcachefs_fuse_read_endio;
482         rbio.bio.bi_private             = &cl;
483
484         bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
485
486         closure_sync(&cl);
487
488         return -blk_status_to_errno(rbio.bio.bi_status);
489 }
490
491 static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t ino,
492                                size_t size, off_t offset,
493                                struct fuse_file_info *fi)
494 {
495         subvol_inum inum = map_root_ino(ino);
496         struct bch_fs *c = fuse_req_userdata(req);
497
498         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_read(%llu, %zd, %lld)\n",
499                  inum, size, offset);
500
501         /* Check inode size. */
502         struct bch_inode_unpacked bi;
503         int ret = bch2_inode_find_by_inum(c, inum, &bi);
504         if (ret) {
505                 fuse_reply_err(req, -ret);
506                 return;
507         }
508
509         off_t end = min_t(u64, bi.bi_size, offset + size);
510         if (end <= offset) {
511                 fuse_reply_buf(req, NULL, 0);
512                 return;
513         }
514         size = end - offset;
515
516         struct fuse_align_io align = align_io(c, size, offset);
517
518         void *buf = aligned_alloc(PAGE_SIZE, align.size);
519         if (!buf) {
520                 fuse_reply_err(req, ENOMEM);
521                 return;
522         }
523
524         ret = read_aligned(c, inum, align.size, align.start, buf);
525
526         if (likely(!ret))
527                 fuse_reply_buf(req, buf + align.pad_start, size);
528         else
529                 fuse_reply_err(req, -ret);
530
531         free(buf);
532 }
533
534 static int inode_update_times(struct bch_fs *c, subvol_inum inum)
535 {
536         struct btree_trans *trans;
537         struct btree_iter iter;
538         struct bch_inode_unpacked inode_u;
539         int ret = 0;
540         u64 now;
541
542         trans = bch2_trans_get(c);
543 retry:
544         bch2_trans_begin(trans);
545         now = bch2_current_time(c);
546
547         ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
548         if (ret)
549                 goto err;
550
551         inode_u.bi_mtime = now;
552         inode_u.bi_ctime = now;
553
554         ret = bch2_inode_write(trans, &iter, &inode_u);
555         if (ret)
556                 goto err;
557
558         ret = bch2_trans_commit(trans, NULL, NULL,
559                                 BCH_TRANS_COMMIT_no_enospc);
560 err:
561         bch2_trans_iter_exit(trans, &iter);
562         if (ret == -EINTR)
563                 goto retry;
564
565         bch2_trans_put(trans);
566         return ret;
567 }
568
569 static int write_aligned(struct bch_fs *c, subvol_inum inum,
570                          struct bch_io_opts io_opts, void *buf,
571                          size_t aligned_size, off_t aligned_offset,
572                          off_t new_i_size, size_t *written_out)
573 {
574
575         struct write_aligned_op_t w = { 0 }
576 ;
577         struct bch_write_op     *op = &w.op;
578         struct bio_vec          bv;
579
580         BUG_ON(aligned_size & (block_bytes(c) - 1));
581         BUG_ON(aligned_offset & (block_bytes(c) - 1));
582
583         *written_out = 0;
584
585         closure_init_stack(&w.cl);
586
587         bch2_write_op_init(op, c, io_opts); /* XXX reads from op?! */
588         op->write_point = writepoint_hashed(0);
589         op->nr_replicas = io_opts.data_replicas;
590         op->target      = io_opts.foreground_target;
591         op->subvol      = inum.subvol;
592         op->pos         = POS(inum.inum, aligned_offset >> 9);
593         op->new_i_size  = new_i_size;
594         op->end_io = bcachefs_fuse_write_endio;
595
596         userbio_init(&op->wbio.bio, &bv, buf, aligned_size);
597         bio_set_op_attrs(&op->wbio.bio, REQ_OP_WRITE, REQ_SYNC);
598
599         if (bch2_disk_reservation_get(c, &op->res, aligned_size >> 9,
600                                       op->nr_replicas, 0)) {
601                 /* XXX: use check_range_allocated like dio write path */
602                 return -ENOSPC;
603         }
604
605         closure_get(&w.cl);
606
607         closure_call(&op->cl, bch2_write, NULL, NULL);
608
609         closure_sync(&w.cl);
610
611         if (!op->error)
612                 *written_out = op->written << 9;
613
614         return op->error;
615 }
616
617 static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t ino,
618                                 const char *buf, size_t size,
619                                 off_t offset,
620                                 struct fuse_file_info *fi)
621 {
622         subvol_inum inum = map_root_ino(ino);
623         struct bch_fs *c        = fuse_req_userdata(req);
624         struct bch_io_opts      io_opts;
625         size_t                  aligned_written;
626         int                     ret = 0;
627
628         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write(%llu, %zd, %lld)\n",
629                  inum, size, offset);
630
631         struct fuse_align_io align = align_io(c, size, offset);
632         void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
633         BUG_ON(!aligned_buf);
634
635         if (get_inode_io_opts(c, inum, &io_opts)) {
636                 ret = -ENOENT;
637                 goto err;
638         }
639
640         /* Realign the data and read in start and end, if needed */
641
642         /* Read partial start data. */
643         if (align.pad_start) {
644                 memset(aligned_buf, 0, block_bytes(c));
645
646                 ret = read_aligned(c, inum, block_bytes(c), align.start,
647                                    aligned_buf);
648                 if (ret)
649                         goto err;
650         }
651
652         /*
653          * Read partial end data. If the whole write fits in one block, the
654          * start data and the end data are the same so this isn't needed.
655          */
656         if (align.pad_end &&
657             !(align.pad_start && align.size == block_bytes(c))) {
658                 off_t partial_end_start = align.end - block_bytes(c);
659                 size_t buf_offset = align.size - block_bytes(c);
660
661                 memset(aligned_buf + buf_offset, 0, block_bytes(c));
662
663                 ret = read_aligned(c, inum, block_bytes(c), partial_end_start,
664                                    aligned_buf + buf_offset);
665                 if (ret)
666                         goto err;
667         }
668
669         /* Overlay what we want to write. */
670         memcpy(aligned_buf + align.pad_start, buf, size);
671
672         /* Actually write. */
673         ret = write_aligned(c, inum, io_opts, aligned_buf,
674                             align.size, align.start,
675                             offset + size, &aligned_written);
676
677         /* Figure out how many unaligned bytes were written. */
678         size_t written = align_fix_up_bytes(&align, aligned_written);
679         BUG_ON(written > size);
680
681         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write: wrote %zd bytes\n",
682                  written);
683
684         if (written > 0)
685                 ret = 0;
686
687         /*
688          * Update inode times.
689          * TODO: Integrate with bch2_extent_update()
690          */
691         if (!ret)
692                 ret = inode_update_times(c, inum);
693
694         if (!ret) {
695                 BUG_ON(written == 0);
696                 fuse_reply_write(req, written);
697                 free(aligned_buf);
698                 return;
699         }
700
701 err:
702         fuse_reply_err(req, -ret);
703         free(aligned_buf);
704 }
705
706 static void bcachefs_fuse_symlink(fuse_req_t req, const char *link,
707                                   fuse_ino_t dir_ino, const char *name)
708 {
709         subvol_inum dir = map_root_ino(dir_ino);
710         struct bch_fs *c = fuse_req_userdata(req);
711         struct bch_inode_unpacked new_inode;
712         size_t link_len = strlen(link);
713         int ret;
714
715         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_symlink(%s, %llu, %s)\n",
716                  link, dir.inum, name);
717
718         ret = do_create(c, dir, name, S_IFLNK|S_IRWXUGO, 0, &new_inode);
719         if (ret)
720                 goto err;
721
722         struct bch_io_opts io_opts;
723         ret = get_inode_io_opts(c, dir, &io_opts);
724         if (ret)
725                 goto err;
726
727         struct fuse_align_io align = align_io(c, link_len + 1, 0);
728
729         void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
730         BUG_ON(!aligned_buf);
731
732         memset(aligned_buf, 0, align.size);
733         memcpy(aligned_buf, link, link_len); /* already terminated */
734
735         subvol_inum inum = (subvol_inum) { dir.subvol, new_inode.bi_inum };
736
737         size_t aligned_written;
738         ret = write_aligned(c, inum, io_opts, aligned_buf,
739                             align.size, align.start, link_len + 1,
740                             &aligned_written);
741         free(aligned_buf);
742
743         if (ret)
744                 goto err;
745
746         size_t written = align_fix_up_bytes(&align, aligned_written);
747         BUG_ON(written != link_len + 1); // TODO: handle short
748
749         ret = inode_update_times(c, inum);
750         if (ret)
751                 goto err;
752
753         new_inode.bi_size = written;
754
755         struct fuse_entry_param e = inode_to_entry(c, &new_inode);
756         fuse_reply_entry(req, &e);
757         return;
758
759 err:
760         fuse_reply_err(req, -ret);
761 }
762
763 static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t ino)
764 {
765         subvol_inum inum = map_root_ino(ino);
766         struct bch_fs *c = fuse_req_userdata(req);
767         char *buf = NULL;
768
769         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readlink(%llu)\n", inum.inum);
770
771         struct bch_inode_unpacked bi;
772         int ret = bch2_inode_find_by_inum(c, inum, &bi);
773         if (ret)
774                 goto err;
775
776         struct fuse_align_io align = align_io(c, bi.bi_size, 0);
777
778         ret = -ENOMEM;
779         buf = aligned_alloc(PAGE_SIZE, align.size);
780         if (!buf)
781                 goto err;
782
783         ret = read_aligned(c, inum, align.size, align.start, buf);
784         if (ret)
785                 goto err;
786
787         BUG_ON(buf[align.size - 1] != 0);
788
789         fuse_reply_readlink(req, buf);
790
791 err:
792         if (ret)
793                 fuse_reply_err(req, -ret);
794
795         free(buf);
796 }
797
798 #if 0
799 /*
800  * FUSE flush is essentially the close() call, however it is not guaranteed
801  * that one flush happens per open/create.
802  *
803  * It doesn't have to do anything, and is mostly relevant for NFS-style
804  * filesystems where close has some relationship to caching.
805  */
806 static void bcachefs_fuse_flush(fuse_req_t req, fuse_ino_t inum,
807                                 struct fuse_file_info *fi)
808 {
809         struct bch_fs *c = fuse_req_userdata(req);
810 }
811
812 static void bcachefs_fuse_release(fuse_req_t req, fuse_ino_t inum,
813                                   struct fuse_file_info *fi)
814 {
815         struct bch_fs *c = fuse_req_userdata(req);
816 }
817
818 static void bcachefs_fuse_fsync(fuse_req_t req, fuse_ino_t inum, int datasync,
819                                 struct fuse_file_info *fi)
820 {
821         struct bch_fs *c = fuse_req_userdata(req);
822 }
823
824 static void bcachefs_fuse_opendir(fuse_req_t req, fuse_ino_t inum,
825                                   struct fuse_file_info *fi)
826 {
827         struct bch_fs *c = fuse_req_userdata(req);
828 }
829 #endif
830
831 struct fuse_dir_context {
832         struct dir_context      ctx;
833         fuse_req_t              req;
834         char                    *buf;
835         size_t                  bufsize;
836 };
837
838 struct fuse_dirent {
839         uint64_t        ino;
840         uint64_t        off;
841         uint32_t        namelen;
842         uint32_t        type;
843         char name[];
844 };
845
846 #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
847 #define FUSE_DIRENT_ALIGN(x) \
848         (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
849
850 static size_t fuse_add_direntry2(char *buf, size_t bufsize,
851                                  const char *name, int namelen,
852                                  const struct stat *stbuf, off_t off)
853 {
854         size_t entlen           = FUSE_NAME_OFFSET + namelen;
855         size_t entlen_padded    = FUSE_DIRENT_ALIGN(entlen);
856         struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
857
858         if ((buf == NULL) || (entlen_padded > bufsize))
859                 return entlen_padded;
860
861         dirent->ino = stbuf->st_ino;
862         dirent->off = off;
863         dirent->namelen = namelen;
864         dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
865         memcpy(dirent->name, name, namelen);
866         memset(dirent->name + namelen, 0, entlen_padded - entlen);
867
868         return entlen_padded;
869 }
870
871 static int fuse_filldir(struct dir_context *_ctx,
872                         const char *name, int namelen,
873                         loff_t pos, u64 ino, unsigned type)
874 {
875         struct fuse_dir_context *ctx =
876                 container_of(_ctx, struct fuse_dir_context, ctx);
877
878         struct stat statbuf = {
879                 .st_ino         = unmap_root_ino(ino),
880                 .st_mode        = type << 12,
881         };
882
883         fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(name=%s inum=%llu pos=%llu)\n",
884                  name, statbuf.st_ino, pos);
885
886         size_t len = fuse_add_direntry2(ctx->buf,
887                                         ctx->bufsize,
888                                         name,
889                                         namelen,
890                                         &statbuf,
891                                         pos + 1);
892
893         if (len > ctx->bufsize)
894                 return -1;
895
896         ctx->buf        += len;
897         ctx->bufsize    -= len;
898
899         return 0;
900 }
901
902 static bool handle_dots(struct fuse_dir_context *ctx, fuse_ino_t dir)
903 {
904         if (ctx->ctx.pos == 0) {
905                 if (fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos,
906                                  dir, DT_DIR) < 0)
907                         return false;
908                 ctx->ctx.pos = 1;
909         }
910
911         if (ctx->ctx.pos == 1) {
912                 if (fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos,
913                                  /*TODO: parent*/ 1, DT_DIR) < 0)
914                         return false;
915                 ctx->ctx.pos = 2;
916         }
917
918         return true;
919 }
920
921 static void bcachefs_fuse_readdir(fuse_req_t req, fuse_ino_t dir_ino,
922                                   size_t size, off_t off,
923                                   struct fuse_file_info *fi)
924 {
925         subvol_inum dir = map_root_ino(dir_ino);
926         struct bch_fs *c = fuse_req_userdata(req);
927         struct bch_inode_unpacked bi;
928         char *buf = calloc(size, 1);
929         struct fuse_dir_context ctx = {
930                 .ctx.actor      = fuse_filldir,
931                 .ctx.pos        = off,
932                 .req            = req,
933                 .buf            = buf,
934                 .bufsize        = size,
935         };
936         int ret = 0;
937
938         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir(dir=%llu, size=%zu, "
939                  "off=%lld)\n", dir.inum, size, off);
940
941         ret = bch2_inode_find_by_inum(c, dir, &bi);
942         if (ret)
943                 goto reply;
944
945         if (!S_ISDIR(bi.bi_mode)) {
946                 ret = -ENOTDIR;
947                 goto reply;
948         }
949
950         if (!handle_dots(&ctx, dir.inum))
951                 goto reply;
952
953         ret = bch2_readdir(c, dir, &ctx.ctx);
954 reply:
955         if (!ret) {
956                 fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir reply %zd\n",
957                                         ctx.buf - buf);
958                 fuse_reply_buf(req, buf, ctx.buf - buf);
959         } else {
960                 fuse_reply_err(req, -ret);
961         }
962
963         free(buf);
964 }
965
966 #if 0
967 static void bcachefs_fuse_readdirplus(fuse_req_t req, fuse_ino_t dir,
968                                       size_t size, off_t off,
969                                       struct fuse_file_info *fi)
970 {
971
972 }
973
974 static void bcachefs_fuse_releasedir(fuse_req_t req, fuse_ino_t inum,
975                                      struct fuse_file_info *fi)
976 {
977         struct bch_fs *c = fuse_req_userdata(req);
978 }
979
980 static void bcachefs_fuse_fsyncdir(fuse_req_t req, fuse_ino_t inum, int datasync,
981                                    struct fuse_file_info *fi)
982 {
983         struct bch_fs *c = fuse_req_userdata(req);
984 }
985 #endif
986
987 static void bcachefs_fuse_statfs(fuse_req_t req, fuse_ino_t inum)
988 {
989         struct bch_fs *c = fuse_req_userdata(req);
990         struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
991         unsigned shift = c->block_bits;
992         struct statvfs statbuf = {
993                 .f_bsize        = block_bytes(c),
994                 .f_frsize       = block_bytes(c),
995                 .f_blocks       = usage.capacity >> shift,
996                 .f_bfree        = (usage.capacity - usage.used) >> shift,
997                 //.f_bavail     = statbuf.f_bfree,
998                 .f_files        = usage.nr_inodes,
999                 .f_ffree        = U64_MAX,
1000                 .f_namemax      = BCH_NAME_MAX,
1001         };
1002
1003         fuse_reply_statfs(req, &statbuf);
1004 }
1005
1006 #if 0
1007 static void bcachefs_fuse_setxattr(fuse_req_t req, fuse_ino_t inum,
1008                                    const char *name, const char *value,
1009                                    size_t size, int flags)
1010 {
1011         struct bch_fs *c = fuse_req_userdata(req);
1012 }
1013
1014 static void bcachefs_fuse_getxattr(fuse_req_t req, fuse_ino_t inum,
1015                                    const char *name, size_t size)
1016 {
1017         struct bch_fs *c = fuse_req_userdata(req);
1018
1019         fuse_reply_xattr(req, );
1020 }
1021
1022 static void bcachefs_fuse_listxattr(fuse_req_t req, fuse_ino_t inum, size_t size)
1023 {
1024         struct bch_fs *c = fuse_req_userdata(req);
1025 }
1026
1027 static void bcachefs_fuse_removexattr(fuse_req_t req, fuse_ino_t inum,
1028                                       const char *name)
1029 {
1030         struct bch_fs *c = fuse_req_userdata(req);
1031 }
1032 #endif
1033
1034 static void bcachefs_fuse_create(fuse_req_t req, fuse_ino_t dir_ino,
1035                                  const char *name, mode_t mode,
1036                                  struct fuse_file_info *fi)
1037 {
1038         subvol_inum dir = map_root_ino(dir_ino);
1039         struct bch_fs *c = fuse_req_userdata(req);
1040         struct bch_inode_unpacked new_inode;
1041         int ret;
1042
1043         fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_create(%llu, %s, %x)\n",
1044                  dir.inum, name, mode);
1045
1046         ret = do_create(c, dir, name, mode, 0, &new_inode);
1047         if (ret)
1048                 goto err;
1049
1050         struct fuse_entry_param e = inode_to_entry(c, &new_inode);
1051         fuse_reply_create(req, &e, fi);
1052         return;
1053 err:
1054         fuse_reply_err(req, -ret);
1055 }
1056
1057 #if 0
1058 static void bcachefs_fuse_write_buf(fuse_req_t req, fuse_ino_t inum,
1059                                     struct fuse_bufvec *bufv, off_t off,
1060                                     struct fuse_file_info *fi)
1061 {
1062         struct bch_fs *c = fuse_req_userdata(req);
1063 }
1064
1065 static void bcachefs_fuse_fallocate(fuse_req_t req, fuse_ino_t inum, int mode,
1066                                     off_t offset, off_t length,
1067                                     struct fuse_file_info *fi)
1068 {
1069         struct bch_fs *c = fuse_req_userdata(req);
1070 }
1071 #endif
1072
1073 static const struct fuse_lowlevel_ops bcachefs_fuse_ops = {
1074         .init           = bcachefs_fuse_init,
1075         .destroy        = bcachefs_fuse_destroy,
1076         .lookup         = bcachefs_fuse_lookup,
1077         .getattr        = bcachefs_fuse_getattr,
1078         .setattr        = bcachefs_fuse_setattr,
1079         .readlink       = bcachefs_fuse_readlink,
1080         .mknod          = bcachefs_fuse_mknod,
1081         .mkdir          = bcachefs_fuse_mkdir,
1082         .unlink         = bcachefs_fuse_unlink,
1083         .rmdir          = bcachefs_fuse_rmdir,
1084         .symlink        = bcachefs_fuse_symlink,
1085         .rename         = bcachefs_fuse_rename,
1086         .link           = bcachefs_fuse_link,
1087         .open           = bcachefs_fuse_open,
1088         .read           = bcachefs_fuse_read,
1089         .write          = bcachefs_fuse_write,
1090         //.flush        = bcachefs_fuse_flush,
1091         //.release      = bcachefs_fuse_release,
1092         //.fsync        = bcachefs_fuse_fsync,
1093         //.opendir      = bcachefs_fuse_opendir,
1094         .readdir        = bcachefs_fuse_readdir,
1095         //.readdirplus  = bcachefs_fuse_readdirplus,
1096         //.releasedir   = bcachefs_fuse_releasedir,
1097         //.fsyncdir     = bcachefs_fuse_fsyncdir,
1098         .statfs         = bcachefs_fuse_statfs,
1099         //.setxattr     = bcachefs_fuse_setxattr,
1100         //.getxattr     = bcachefs_fuse_getxattr,
1101         //.listxattr    = bcachefs_fuse_listxattr,
1102         //.removexattr  = bcachefs_fuse_removexattr,
1103         .create         = bcachefs_fuse_create,
1104
1105         /* posix locks: */
1106 #if 0
1107         .getlk          = bcachefs_fuse_getlk,
1108         .setlk          = bcachefs_fuse_setlk,
1109 #endif
1110         //.write_buf    = bcachefs_fuse_write_buf,
1111         //.fallocate    = bcachefs_fuse_fallocate,
1112
1113 };
1114
1115 /*
1116  * Setup and command parsing.
1117  */
1118
1119 struct bf_context {
1120         char            *devices_str;
1121         char            **devices;
1122         int             nr_devices;
1123 };
1124
1125 static void bf_context_free(struct bf_context *ctx)
1126 {
1127         int i;
1128
1129         free(ctx->devices_str);
1130         for (i = 0; i < ctx->nr_devices; ++i)
1131                 free(ctx->devices[i]);
1132         free(ctx->devices);
1133 }
1134
1135 static struct fuse_opt bf_opts[] = {
1136         FUSE_OPT_END
1137 };
1138
1139 /*
1140  * Fuse option parsing helper -- returning 0 means we consumed the argument, 1
1141  * means we did not.
1142  */
1143 static int bf_opt_proc(void *data, const char *arg, int key,
1144     struct fuse_args *outargs)
1145 {
1146         struct bf_context *ctx = data;
1147
1148         switch (key) {
1149         case FUSE_OPT_KEY_NONOPT:
1150                 /* Just extract the first non-option string. */
1151                 if (!ctx->devices_str) {
1152                         ctx->devices_str = strdup(arg);
1153                         return 0;
1154                 }
1155                 return 1;
1156         }
1157
1158         return 1;
1159 }
1160
1161 /*
1162  * dev1:dev2 -> [ dev1, dev2 ]
1163  * dev       -> [ dev ]
1164  */
1165 static void tokenize_devices(struct bf_context *ctx)
1166 {
1167         char *devices_str = strdup(ctx->devices_str);
1168         char *devices_tmp = devices_str;
1169         char **devices = NULL;
1170         int nr = 0;
1171         char *dev = NULL;
1172
1173         while ((dev = strsep(&devices_tmp, ":"))) {
1174                 if (strlen(dev) > 0) {
1175                         devices = realloc(devices, (nr + 1) * sizeof *devices);
1176                         devices[nr] = strdup(dev);
1177                         nr++;
1178                 }
1179         }
1180
1181         if (!devices) {
1182                 devices = malloc(sizeof *devices);
1183                 devices[0] = strdup(ctx->devices_str);
1184                 nr = 1;
1185         }
1186
1187         ctx->devices = devices;
1188         ctx->nr_devices = nr;
1189
1190         free(devices_str);
1191 }
1192
1193 static void usage(char *argv[])
1194 {
1195         printf("Usage: %s fusemount [options] <dev>[:dev2:...] <mountpoint>\n",
1196                argv[0]);
1197         printf("\n");
1198 }
1199
1200 int cmd_fusemount(int argc, char *argv[])
1201 {
1202         struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
1203         struct bch_opts bch_opts = bch2_opts_empty();
1204         struct bf_context ctx = { 0 };
1205         struct bch_fs *c = NULL;
1206         int ret = 0, i;
1207
1208         /* Parse arguments. */
1209         if (fuse_opt_parse(&args, &ctx, bf_opts, bf_opt_proc) < 0)
1210                 die("fuse_opt_parse err: %m");
1211
1212         struct fuse_cmdline_opts fuse_opts;
1213         if (fuse_parse_cmdline(&args, &fuse_opts) < 0)
1214                 die("fuse_parse_cmdline err: %m");
1215
1216         if (fuse_opts.show_help) {
1217                 usage(argv);
1218                 fuse_cmdline_help();
1219                 fuse_lowlevel_help();
1220                 ret = 0;
1221                 goto out;
1222         }
1223         if (fuse_opts.show_version) {
1224                 /* TODO: Show bcachefs version. */
1225                 printf("FUSE library version %s\n", fuse_pkgversion());
1226                 fuse_lowlevel_version();
1227                 ret = 0;
1228                 goto out;
1229         }
1230         if (!fuse_opts.mountpoint) {
1231                 usage(argv);
1232                 printf("Please supply a mountpoint.\n");
1233                 ret = 1;
1234                 goto out;
1235         }
1236         if (!ctx.devices_str) {
1237                 usage(argv);
1238                 printf("Please specify a device or device1:device2:...\n");
1239                 ret = 1;
1240                 goto out;
1241         }
1242         tokenize_devices(&ctx);
1243
1244         struct printbuf fsname = PRINTBUF;
1245         prt_printf(&fsname, "fsname=");
1246         for (i = 0; i < ctx.nr_devices; ++i) {
1247                 if (i)
1248                         prt_str(&fsname, ":");
1249                 prt_str(&fsname, ctx.devices[i]);
1250         }
1251
1252         fuse_opt_add_arg(&args, "-o");
1253         fuse_opt_add_arg(&args, fsname.buf);
1254
1255         /* Open bch */
1256         printf("Opening bcachefs filesystem on:\n");
1257         for (i = 0; i < ctx.nr_devices; ++i)
1258                 printf("\t%s\n", ctx.devices[i]);
1259
1260         c = bch2_fs_open(ctx.devices, ctx.nr_devices, bch_opts);
1261         if (IS_ERR(c))
1262                 die("error opening %s: %s", ctx.devices_str,
1263                     bch2_err_str(PTR_ERR(c)));
1264
1265         /* Fuse */
1266         struct fuse_session *se =
1267                 fuse_session_new(&args, &bcachefs_fuse_ops,
1268                                  sizeof(bcachefs_fuse_ops), c);
1269         if (!se)
1270                 die("fuse_lowlevel_new err: %m");
1271
1272         if (fuse_set_signal_handlers(se) < 0)
1273                 die("fuse_set_signal_handlers err: %m");
1274
1275         if (fuse_session_mount(se, fuse_opts.mountpoint))
1276                 die("fuse_mount err: %m");
1277
1278         /* This print statement is a trigger for tests. */
1279         printf("Fuse mount initialized.\n");
1280
1281         if (fuse_opts.foreground == 0){
1282                 printf("Fuse forcing to foreground mode, due gcc constructors usage.\n");
1283                 fuse_opts.foreground = 1;
1284         }
1285
1286         fuse_daemonize(fuse_opts.foreground);
1287
1288         ret = fuse_session_loop(se);
1289
1290         /* Cleanup */
1291         fuse_session_unmount(se);
1292         fuse_remove_signal_handlers(se);
1293         fuse_session_destroy(se);
1294
1295 out:
1296         free(fuse_opts.mountpoint);
1297         fuse_opt_free_args(&args);
1298         bf_context_free(&ctx);
1299
1300         return ret ? 1 : 0;
1301 }
1302
1303 #endif /* BCACHEFS_FUSE */