]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fs.c
43688cd34de9a992a3c7833be717fa08488a998f
[bcachefs-tools-debian] / libbcachefs / fs.c
1 #ifndef NO_BCACHEFS_FS
2
3 #include "bcachefs.h"
4 #include "acl.h"
5 #include "btree_update.h"
6 #include "buckets.h"
7 #include "chardev.h"
8 #include "dirent.h"
9 #include "extents.h"
10 #include "fs.h"
11 #include "fs-io.h"
12 #include "fs-ioctl.h"
13 #include "fsck.h"
14 #include "inode.h"
15 #include "journal.h"
16 #include "keylist.h"
17 #include "super.h"
18 #include "xattr.h"
19
20 #include <linux/aio.h>
21 #include <linux/backing-dev.h>
22 #include <linux/exportfs.h>
23 #include <linux/module.h>
24 #include <linux/posix_acl.h>
25 #include <linux/random.h>
26 #include <linux/statfs.h>
27 #include <linux/xattr.h>
28
29 static struct kmem_cache *bch2_inode_cache;
30
31 static void bch2_vfs_inode_init(struct bch_fs *,
32                                 struct bch_inode_info *,
33                                 struct bch_inode_unpacked *);
34
35 /*
36  * I_SIZE_DIRTY requires special handling:
37  *
38  * To the recovery code, the flag means that there is stale data past i_size
39  * that needs to be deleted; it's used for implementing atomic appends and
40  * truncates.
41  *
42  * On append, we set I_SIZE_DIRTY before doing the write, then after the write
43  * we clear I_SIZE_DIRTY atomically with updating i_size to the new larger size
44  * that exposes the data we just wrote.
45  *
46  * On truncate, it's the reverse: We set I_SIZE_DIRTY atomically with setting
47  * i_size to the new smaller size, then we delete the data that we just made
48  * invisible, and then we clear I_SIZE_DIRTY.
49  *
50  * Because there can be multiple appends in flight at a time, we need a refcount
51  * (i_size_dirty_count) instead of manipulating the flag directly. Nonzero
52  * refcount means I_SIZE_DIRTY is set, zero means it's cleared.
53  *
54  * Because write_inode() can be called at any time, i_size_dirty_count means
55  * something different to the runtime code - it means to write_inode() "don't
56  * update i_size yet".
57  *
58  * We don't clear I_SIZE_DIRTY directly, we let write_inode() clear it when
59  * i_size_dirty_count is zero - but the reverse is not true, I_SIZE_DIRTY must
60  * be set explicitly.
61  */
62
63 int __must_check __bch2_write_inode(struct bch_fs *c,
64                                     struct bch_inode_info *inode,
65                                     inode_set_fn set,
66                                     void *p)
67 {
68         struct btree_iter iter;
69         struct bch_inode_unpacked inode_u;
70         struct bkey_inode_buf inode_p;
71         u64 inum = inode->v.i_ino;
72         unsigned i_nlink = READ_ONCE(inode->v.i_nlink);
73         int ret;
74
75         /*
76          * We can't write an inode with i_nlink == 0 because it's stored biased;
77          * however, we don't need to because if i_nlink is 0 the inode is
78          * getting deleted when it's evicted.
79          */
80         if (!i_nlink)
81                 return 0;
82
83         lockdep_assert_held(&inode->ei_update_lock);
84
85         bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0),
86                              BTREE_ITER_INTENT);
87
88         do {
89                 struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
90
91                 if ((ret = btree_iter_err(k)))
92                         goto out;
93
94                 if (WARN_ONCE(k.k->type != BCH_INODE_FS,
95                               "inode %llu not found when updating", inum)) {
96                         bch2_btree_iter_unlock(&iter);
97                         return -ENOENT;
98                 }
99
100                 ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
101                 if (WARN_ONCE(ret,
102                               "error %i unpacking inode %llu", ret, inum)) {
103                         ret = -ENOENT;
104                         break;
105                 }
106
107                 if (set) {
108                         ret = set(inode, &inode_u, p);
109                         if (ret)
110                                 goto out;
111                 }
112
113                 BUG_ON(i_nlink < nlink_bias(inode->v.i_mode));
114
115                 inode_u.bi_mode = inode->v.i_mode;
116                 inode_u.bi_uid  = i_uid_read(&inode->v);
117                 inode_u.bi_gid  = i_gid_read(&inode->v);
118                 inode_u.bi_nlink= i_nlink - nlink_bias(inode->v.i_mode);
119                 inode_u.bi_dev  = inode->v.i_rdev;
120                 inode_u.bi_atime= timespec_to_bch2_time(c, inode->v.i_atime);
121                 inode_u.bi_mtime= timespec_to_bch2_time(c, inode->v.i_mtime);
122                 inode_u.bi_ctime= timespec_to_bch2_time(c, inode->v.i_ctime);
123
124                 bch2_inode_pack(&inode_p, &inode_u);
125
126                 ret = bch2_btree_insert_at(c, NULL, NULL,
127                                 &inode->ei_journal_seq,
128                                 BTREE_INSERT_ATOMIC|
129                                 BTREE_INSERT_NOFAIL,
130                                 BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i));
131         } while (ret == -EINTR);
132
133         if (!ret) {
134                 inode->ei_size  = inode_u.bi_size;
135                 inode->ei_flags = inode_u.bi_flags;
136         }
137 out:
138         bch2_btree_iter_unlock(&iter);
139
140         return ret < 0 ? ret : 0;
141 }
142
143 int __must_check bch2_write_inode(struct bch_fs *c,
144                                   struct bch_inode_info *inode)
145 {
146         return __bch2_write_inode(c, inode, NULL, NULL);
147 }
148
149 int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *inode)
150 {
151         int ret;
152
153         mutex_lock(&inode->ei_update_lock);
154         inc_nlink(&inode->v);
155         ret = bch2_write_inode(c, inode);
156         mutex_unlock(&inode->ei_update_lock);
157
158         return ret;
159 }
160
161 int bch2_dec_nlink(struct bch_fs *c, struct bch_inode_info *inode)
162 {
163         int ret = 0;
164
165         mutex_lock(&inode->ei_update_lock);
166         drop_nlink(&inode->v);
167         ret = bch2_write_inode(c, inode);
168         mutex_unlock(&inode->ei_update_lock);
169
170         return ret;
171 }
172
173 static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
174 {
175         struct bch_inode_unpacked inode_u;
176         struct bch_inode_info *inode;
177         int ret;
178
179         inode = to_bch_ei(iget_locked(c->vfs_sb, inum));
180         if (unlikely(!inode))
181                 return ERR_PTR(-ENOMEM);
182         if (!(inode->v.i_state & I_NEW))
183                 return &inode->v;
184
185         ret = bch2_inode_find_by_inum(c, inum, &inode_u);
186         if (ret) {
187                 iget_failed(&inode->v);
188                 return ERR_PTR(ret);
189         }
190
191         bch2_vfs_inode_init(c, inode, &inode_u);
192
193         inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum);
194
195         unlock_new_inode(&inode->v);
196
197         return &inode->v;
198 }
199
200 static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
201                                                     struct bch_inode_info *dir,
202                                                     umode_t mode, dev_t rdev)
203 {
204         struct posix_acl *default_acl = NULL, *acl = NULL;
205         struct bch_inode_info *inode;
206         struct bch_inode_unpacked inode_u;
207         int ret;
208
209         inode = to_bch_ei(new_inode(c->vfs_sb));
210         if (unlikely(!inode))
211                 return ERR_PTR(-ENOMEM);
212
213         inode_init_owner(&inode->v, &dir->v, mode);
214
215 #ifdef CONFIG_BCACHEFS_POSIX_ACL
216         ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl);
217         if (ret) {
218                 make_bad_inode(&inode->v);
219                 goto err;
220         }
221 #endif
222
223         bch2_inode_init(c, &inode_u,
224                         i_uid_read(&inode->v),
225                         i_gid_read(&inode->v),
226                         inode->v.i_mode, rdev);
227         ret = bch2_inode_create(c, &inode_u,
228                                 BLOCKDEV_INODE_MAX, 0,
229                                 &c->unused_inode_hint);
230         if (unlikely(ret)) {
231                 /*
232                  * indicate to bch_evict_inode that the inode was never actually
233                  * created:
234                  */
235                 make_bad_inode(&inode->v);
236                 goto err;
237         }
238
239         bch2_vfs_inode_init(c, inode, &inode_u);
240
241         if (default_acl) {
242                 ret = bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT);
243                 if (unlikely(ret))
244                         goto err;
245         }
246
247         if (acl) {
248                 ret = bch2_set_acl(&inode->v, acl, ACL_TYPE_ACCESS);
249                 if (unlikely(ret))
250                         goto err;
251         }
252
253         insert_inode_hash(&inode->v);
254         atomic_long_inc(&c->nr_inodes);
255 out:
256         posix_acl_release(default_acl);
257         posix_acl_release(acl);
258         return inode;
259 err:
260         clear_nlink(&inode->v);
261         iput(&inode->v);
262         inode = ERR_PTR(ret);
263         goto out;
264 }
265
266 static int bch2_vfs_dirent_create(struct bch_fs *c,
267                                   struct bch_inode_info *dir,
268                                   u8 type, const struct qstr *name,
269                                   u64 dst)
270 {
271         int ret;
272
273         ret = bch2_dirent_create(c, dir->v.i_ino, &dir->ei_str_hash,
274                                 type, name, dst,
275                                 &dir->ei_journal_seq,
276                                 BCH_HASH_SET_MUST_CREATE);
277         if (unlikely(ret))
278                 return ret;
279
280         dir->v.i_mtime = dir->v.i_ctime = current_fs_time(c->vfs_sb);
281         mark_inode_dirty_sync(&dir->v);
282         return 0;
283 }
284
285 static int __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
286                          umode_t mode, dev_t rdev)
287 {
288         struct bch_fs *c = dir->v.i_sb->s_fs_info;
289         struct bch_inode_info *inode;
290         int ret;
291
292         inode = bch2_vfs_inode_create(c, dir, mode, rdev);
293         if (unlikely(IS_ERR(inode)))
294                 return PTR_ERR(inode);
295
296         ret = bch2_vfs_dirent_create(c, dir, mode_to_type(mode),
297                                      &dentry->d_name, inode->v.i_ino);
298         if (unlikely(ret)) {
299                 clear_nlink(&inode->v);
300                 iput(&inode->v);
301                 return ret;
302         }
303
304         if (dir->ei_journal_seq > inode->ei_journal_seq)
305                 inode->ei_journal_seq = dir->ei_journal_seq;
306
307         d_instantiate(dentry, &inode->v);
308         return 0;
309 }
310
311 /* methods */
312
313 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
314                                   unsigned int flags)
315 {
316         struct bch_fs *c = vdir->i_sb->s_fs_info;
317         struct bch_inode_info *dir = to_bch_ei(vdir);
318         struct inode *vinode = NULL;
319         u64 inum;
320
321         inum = bch2_dirent_lookup(c, dir->v.i_ino,
322                                   &dir->ei_str_hash,
323                                   &dentry->d_name);
324
325         if (inum)
326                 vinode = bch2_vfs_inode_get(c, inum);
327
328         return d_splice_alias(vinode, dentry);
329 }
330
331 static int bch2_create(struct inode *vdir, struct dentry *dentry,
332                        umode_t mode, bool excl)
333 {
334         return __bch2_create(to_bch_ei(vdir), dentry, mode|S_IFREG, 0);
335 }
336
337 static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
338                      struct dentry *dentry)
339 {
340         struct bch_fs *c = vdir->i_sb->s_fs_info;
341         struct bch_inode_info *dir = to_bch_ei(vdir);
342         struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
343         int ret;
344
345         lockdep_assert_held(&inode->v.i_rwsem);
346
347         inode->v.i_ctime = current_fs_time(dir->v.i_sb);
348
349         ret = bch2_inc_nlink(c, inode);
350         if (ret)
351                 return ret;
352
353         ihold(&inode->v);
354
355         ret = bch2_vfs_dirent_create(c, dir, mode_to_type(inode->v.i_mode),
356                                      &dentry->d_name, inode->v.i_ino);
357         if (unlikely(ret)) {
358                 bch2_dec_nlink(c, inode);
359                 iput(&inode->v);
360                 return ret;
361         }
362
363         d_instantiate(dentry, &inode->v);
364         return 0;
365 }
366
367 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
368 {
369         struct bch_fs *c = vdir->i_sb->s_fs_info;
370         struct bch_inode_info *dir = to_bch_ei(vdir);
371         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
372         int ret;
373
374         lockdep_assert_held(&inode->v.i_rwsem);
375
376         ret = bch2_dirent_delete(c, dir->v.i_ino, &dir->ei_str_hash,
377                                  &dentry->d_name, &dir->ei_journal_seq);
378         if (ret)
379                 return ret;
380
381         if (dir->ei_journal_seq > inode->ei_journal_seq)
382                 inode->ei_journal_seq = dir->ei_journal_seq;
383
384         inode->v.i_ctime = dir->v.i_ctime;
385
386         if (S_ISDIR(inode->v.i_mode)) {
387                 bch2_dec_nlink(c, dir);
388                 drop_nlink(&inode->v);
389         }
390
391         bch2_dec_nlink(c, inode);
392
393         return 0;
394 }
395
396 static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
397                         const char *symname)
398 {
399         struct bch_fs *c = vdir->i_sb->s_fs_info;
400         struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
401         int ret;
402
403         inode = bch2_vfs_inode_create(c, dir, S_IFLNK|S_IRWXUGO, 0);
404         if (unlikely(IS_ERR(inode)))
405                 return PTR_ERR(inode);
406
407         inode_lock(&inode->v);
408         ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
409         inode_unlock(&inode->v);
410
411         if (unlikely(ret))
412                 goto err;
413
414         ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
415         if (unlikely(ret))
416                 goto err;
417
418         /* XXX: racy */
419         if (dir->ei_journal_seq < inode->ei_journal_seq)
420                 dir->ei_journal_seq = inode->ei_journal_seq;
421
422         ret = bch2_vfs_dirent_create(c, dir, DT_LNK, &dentry->d_name,
423                                      inode->v.i_ino);
424         if (unlikely(ret))
425                 goto err;
426
427         d_instantiate(dentry, &inode->v);
428         return 0;
429 err:
430         clear_nlink(&inode->v);
431         iput(&inode->v);
432         return ret;
433 }
434
435 static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
436 {
437         struct bch_fs *c = vdir->i_sb->s_fs_info;
438         struct bch_inode_info *dir = to_bch_ei(vdir);
439         int ret;
440
441         lockdep_assert_held(&dir->v.i_rwsem);
442
443         ret = __bch2_create(dir, dentry, mode|S_IFDIR, 0);
444         if (unlikely(ret))
445                 return ret;
446
447         bch2_inc_nlink(c, dir);
448
449         return 0;
450 }
451
452 static int bch2_rmdir(struct inode *vdir, struct dentry *dentry)
453 {
454         struct bch_fs *c = vdir->i_sb->s_fs_info;
455
456         if (bch2_empty_dir(c, dentry->d_inode->i_ino))
457                 return -ENOTEMPTY;
458
459         return bch2_unlink(vdir, dentry);
460 }
461
462 static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
463                       umode_t mode, dev_t rdev)
464 {
465         return __bch2_create(to_bch_ei(vdir), dentry, mode, rdev);
466 }
467
468 static int bch2_rename(struct bch_fs *c,
469                        struct bch_inode_info *old_dir,
470                        struct dentry *old_dentry,
471                        struct bch_inode_info *new_dir,
472                        struct dentry *new_dentry)
473 {
474         struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode);
475         struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode);
476         struct timespec now = current_fs_time(old_dir->v.i_sb);
477         int ret;
478
479         lockdep_assert_held(&old_dir->v.i_rwsem);
480         lockdep_assert_held(&new_dir->v.i_rwsem);
481
482         if (new_inode)
483                 filemap_write_and_wait_range(old_inode->v.i_mapping,
484                                              0, LLONG_MAX);
485
486         if (new_inode && S_ISDIR(old_inode->v.i_mode)) {
487                 lockdep_assert_held(&new_inode->v.i_rwsem);
488
489                 if (!S_ISDIR(new_inode->v.i_mode))
490                         return -ENOTDIR;
491
492                 if (bch2_empty_dir(c, new_inode->v.i_ino))
493                         return -ENOTEMPTY;
494
495                 ret = bch2_dirent_rename(c,
496                                 old_dir, &old_dentry->d_name,
497                                 new_dir, &new_dentry->d_name,
498                                 &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE);
499                 if (unlikely(ret))
500                         return ret;
501
502                 clear_nlink(&new_inode->v);
503                 bch2_dec_nlink(c, old_dir);
504         } else if (new_inode) {
505                 lockdep_assert_held(&new_inode->v.i_rwsem);
506
507                 ret = bch2_dirent_rename(c,
508                                 old_dir, &old_dentry->d_name,
509                                 new_dir, &new_dentry->d_name,
510                                 &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE);
511                 if (unlikely(ret))
512                         return ret;
513
514                 new_inode->v.i_ctime = now;
515                 bch2_dec_nlink(c, new_inode);
516         } else if (S_ISDIR(old_inode->v.i_mode)) {
517                 ret = bch2_dirent_rename(c,
518                                 old_dir, &old_dentry->d_name,
519                                 new_dir, &new_dentry->d_name,
520                                 &old_inode->ei_journal_seq, BCH_RENAME);
521                 if (unlikely(ret))
522                         return ret;
523
524                 bch2_inc_nlink(c, new_dir);
525                 bch2_dec_nlink(c, old_dir);
526         } else {
527                 ret = bch2_dirent_rename(c,
528                                 old_dir, &old_dentry->d_name,
529                                 new_dir, &new_dentry->d_name,
530                                 &old_inode->ei_journal_seq, BCH_RENAME);
531                 if (unlikely(ret))
532                         return ret;
533         }
534
535         old_dir->v.i_ctime = old_dir->v.i_mtime = now;
536         new_dir->v.i_ctime = new_dir->v.i_mtime = now;
537         mark_inode_dirty_sync(&old_dir->v);
538         mark_inode_dirty_sync(&new_dir->v);
539
540         old_inode->v.i_ctime = now;
541         mark_inode_dirty_sync(&old_inode->v);
542
543         return 0;
544 }
545
546 static int bch2_rename_exchange(struct bch_fs *c,
547                                 struct bch_inode_info *old_dir,
548                                 struct dentry *old_dentry,
549                                 struct bch_inode_info *new_dir,
550                                 struct dentry *new_dentry)
551 {
552         struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode);
553         struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode);
554         struct timespec now = current_fs_time(old_dir->v.i_sb);
555         int ret;
556
557         ret = bch2_dirent_rename(c,
558                                  old_dir, &old_dentry->d_name,
559                                  new_dir, &new_dentry->d_name,
560                                  &old_inode->ei_journal_seq, BCH_RENAME_EXCHANGE);
561         if (unlikely(ret))
562                 return ret;
563
564         if (S_ISDIR(old_inode->v.i_mode) !=
565             S_ISDIR(new_inode->v.i_mode)) {
566                 if (S_ISDIR(old_inode->v.i_mode)) {
567                         bch2_inc_nlink(c, new_dir);
568                         bch2_dec_nlink(c, old_dir);
569                 } else {
570                         bch2_dec_nlink(c, new_dir);
571                         bch2_inc_nlink(c, old_dir);
572                 }
573         }
574
575         old_dir->v.i_ctime = old_dir->v.i_mtime = now;
576         new_dir->v.i_ctime = new_dir->v.i_mtime = now;
577         mark_inode_dirty_sync(&old_dir->v);
578         mark_inode_dirty_sync(&new_dir->v);
579
580         old_inode->v.i_ctime = now;
581         new_inode->v.i_ctime = now;
582         mark_inode_dirty_sync(&old_inode->v);
583         mark_inode_dirty_sync(&new_inode->v);
584
585         return 0;
586 }
587
588 static int bch2_rename2(struct inode *old_vdir, struct dentry *old_dentry,
589                         struct inode *new_vdir, struct dentry *new_dentry,
590                         unsigned flags)
591 {
592         struct bch_fs *c = old_vdir->i_sb->s_fs_info;
593         struct bch_inode_info *old_dir = to_bch_ei(old_vdir);
594         struct bch_inode_info *new_dir = to_bch_ei(new_vdir);
595
596         if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
597                 return -EINVAL;
598
599         if (flags & RENAME_EXCHANGE)
600                 return bch2_rename_exchange(c, old_dir, old_dentry,
601                                             new_dir, new_dentry);
602
603         return bch2_rename(c, old_dir, old_dentry, new_dir, new_dentry);
604 }
605
606 static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
607 {
608         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
609         struct bch_fs *c = inode->v.i_sb->s_fs_info;
610         int ret = 0;
611
612         lockdep_assert_held(&inode->v.i_rwsem);
613
614         ret = setattr_prepare(dentry, iattr);
615         if (ret)
616                 return ret;
617
618         if (iattr->ia_valid & ATTR_SIZE) {
619                 ret = bch2_truncate(inode, iattr);
620         } else {
621                 mutex_lock(&inode->ei_update_lock);
622                 setattr_copy(&inode->v, iattr);
623                 ret = bch2_write_inode(c, inode);
624                 mutex_unlock(&inode->ei_update_lock);
625         }
626
627         if (unlikely(ret))
628                 return ret;
629
630         if (iattr->ia_valid & ATTR_MODE)
631                 ret = posix_acl_chmod(&inode->v, inode->v.i_mode);
632
633         return ret;
634 }
635
636 static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
637 {
638         struct bch_fs *c = vdir->i_sb->s_fs_info;
639         struct bch_inode_info *dir = to_bch_ei(vdir);
640         struct bch_inode_info *inode;
641
642         /* XXX: i_nlink should be 0? */
643         inode = bch2_vfs_inode_create(c, dir, mode, 0);
644         if (unlikely(IS_ERR(inode)))
645                 return PTR_ERR(inode);
646
647         d_tmpfile(dentry, &inode->v);
648         return 0;
649 }
650
651 static int bch2_fill_extent(struct fiemap_extent_info *info,
652                             const struct bkey_i *k, unsigned flags)
653 {
654         if (bkey_extent_is_data(&k->k)) {
655                 struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
656                 const struct bch_extent_ptr *ptr;
657                 struct bch_extent_crc_unpacked crc;
658                 int ret;
659
660                 extent_for_each_ptr_crc(e, ptr, crc) {
661                         int flags2 = 0;
662                         u64 offset = ptr->offset;
663
664                         if (crc.compression_type)
665                                 flags2 |= FIEMAP_EXTENT_ENCODED;
666                         else
667                                 offset += crc.offset;
668
669                         if ((offset & (PAGE_SECTORS - 1)) ||
670                             (e.k->size & (PAGE_SECTORS - 1)))
671                                 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
672
673                         ret = fiemap_fill_next_extent(info,
674                                                       bkey_start_offset(e.k) << 9,
675                                                       offset << 9,
676                                                       e.k->size << 9, flags|flags2);
677                         if (ret)
678                                 return ret;
679                 }
680
681                 return 0;
682         } else if (k->k.type == BCH_RESERVATION) {
683                 return fiemap_fill_next_extent(info,
684                                                bkey_start_offset(&k->k) << 9,
685                                                0, k->k.size << 9,
686                                                flags|
687                                                FIEMAP_EXTENT_DELALLOC|
688                                                FIEMAP_EXTENT_UNWRITTEN);
689         } else {
690                 BUG();
691         }
692 }
693
694 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
695                        u64 start, u64 len)
696 {
697         struct bch_fs *c = vinode->i_sb->s_fs_info;
698         struct bch_inode_info *ei = to_bch_ei(vinode);
699         struct btree_iter iter;
700         struct bkey_s_c k;
701         BKEY_PADDED(k) tmp;
702         bool have_extent = false;
703         int ret = 0;
704
705         if (start + len < start)
706                 return -EINVAL;
707
708         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
709                            POS(ei->v.i_ino, start >> 9), 0, k)
710                 if (bkey_extent_is_data(k.k) ||
711                     k.k->type == BCH_RESERVATION) {
712                         if (bkey_cmp(bkey_start_pos(k.k),
713                                      POS(ei->v.i_ino, (start + len) >> 9)) >= 0)
714                                 break;
715
716                         if (have_extent) {
717                                 ret = bch2_fill_extent(info, &tmp.k, 0);
718                                 if (ret)
719                                         goto out;
720                         }
721
722                         bkey_reassemble(&tmp.k, k);
723                         have_extent = true;
724                 }
725
726         if (have_extent)
727                 ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST);
728 out:
729         bch2_btree_iter_unlock(&iter);
730         return ret < 0 ? ret : 0;
731 }
732
733 static const struct vm_operations_struct bch_vm_ops = {
734         .fault          = filemap_fault,
735         .map_pages      = filemap_map_pages,
736         .page_mkwrite   = bch2_page_mkwrite,
737 };
738
739 static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
740 {
741         file_accessed(file);
742
743         vma->vm_ops = &bch_vm_ops;
744         return 0;
745 }
746
747 /* Directories: */
748
749 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
750 {
751         return generic_file_llseek_size(file, offset, whence,
752                                         S64_MAX, S64_MAX);
753 }
754
755 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
756 {
757         struct bch_fs *c = file_inode(file)->i_sb->s_fs_info;
758
759         return bch2_readdir(c, file, ctx);
760 }
761
762 static const struct file_operations bch_file_operations = {
763         .llseek         = bch2_llseek,
764         .read_iter      = generic_file_read_iter,
765         .write_iter     = bch2_write_iter,
766         .mmap           = bch2_mmap,
767         .open           = generic_file_open,
768         .fsync          = bch2_fsync,
769         .splice_read    = generic_file_splice_read,
770         .splice_write   = iter_file_splice_write,
771         .fallocate      = bch2_fallocate_dispatch,
772         .unlocked_ioctl = bch2_fs_file_ioctl,
773 #ifdef CONFIG_COMPAT
774         .compat_ioctl   = bch2_compat_fs_ioctl,
775 #endif
776 };
777
778 static const struct inode_operations bch_file_inode_operations = {
779         .setattr        = bch2_setattr,
780         .fiemap         = bch2_fiemap,
781         .listxattr      = bch2_xattr_list,
782 #ifdef CONFIG_BCACHEFS_POSIX_ACL
783         .get_acl        = bch2_get_acl,
784         .set_acl        = bch2_set_acl,
785 #endif
786 };
787
788 static const struct inode_operations bch_dir_inode_operations = {
789         .lookup         = bch2_lookup,
790         .create         = bch2_create,
791         .link           = bch2_link,
792         .unlink         = bch2_unlink,
793         .symlink        = bch2_symlink,
794         .mkdir          = bch2_mkdir,
795         .rmdir          = bch2_rmdir,
796         .mknod          = bch2_mknod,
797         .rename         = bch2_rename2,
798         .setattr        = bch2_setattr,
799         .tmpfile        = bch2_tmpfile,
800         .listxattr      = bch2_xattr_list,
801 #ifdef CONFIG_BCACHEFS_POSIX_ACL
802         .get_acl        = bch2_get_acl,
803         .set_acl        = bch2_set_acl,
804 #endif
805 };
806
807 static const struct file_operations bch_dir_file_operations = {
808         .llseek         = bch2_dir_llseek,
809         .read           = generic_read_dir,
810         .iterate        = bch2_vfs_readdir,
811         .fsync          = bch2_fsync,
812         .unlocked_ioctl = bch2_fs_file_ioctl,
813 #ifdef CONFIG_COMPAT
814         .compat_ioctl   = bch2_compat_fs_ioctl,
815 #endif
816 };
817
818 static const struct inode_operations bch_symlink_inode_operations = {
819         .get_link       = page_get_link,
820         .setattr        = bch2_setattr,
821         .listxattr      = bch2_xattr_list,
822 #ifdef CONFIG_BCACHEFS_POSIX_ACL
823         .get_acl        = bch2_get_acl,
824         .set_acl        = bch2_set_acl,
825 #endif
826 };
827
828 static const struct inode_operations bch_special_inode_operations = {
829         .setattr        = bch2_setattr,
830         .listxattr      = bch2_xattr_list,
831 #ifdef CONFIG_BCACHEFS_POSIX_ACL
832         .get_acl        = bch2_get_acl,
833         .set_acl        = bch2_set_acl,
834 #endif
835 };
836
837 static const struct address_space_operations bch_address_space_operations = {
838         .writepage      = bch2_writepage,
839         .readpage       = bch2_readpage,
840         .writepages     = bch2_writepages,
841         .readpages      = bch2_readpages,
842         .set_page_dirty = bch2_set_page_dirty,
843         .write_begin    = bch2_write_begin,
844         .write_end      = bch2_write_end,
845         .invalidatepage = bch2_invalidatepage,
846         .releasepage    = bch2_releasepage,
847         .direct_IO      = bch2_direct_IO,
848 #ifdef CONFIG_MIGRATION
849         .migratepage    = bch2_migrate_page,
850 #endif
851         .error_remove_page = generic_error_remove_page,
852 };
853
854 static struct inode *bch2_nfs_get_inode(struct super_block *sb,
855                 u64 ino, u32 generation)
856 {
857         struct bch_fs *c = sb->s_fs_info;
858         struct inode *vinode;
859
860         if (ino < BCACHEFS_ROOT_INO)
861                 return ERR_PTR(-ESTALE);
862
863         vinode = bch2_vfs_inode_get(c, ino);
864         if (IS_ERR(vinode))
865                 return ERR_CAST(vinode);
866         if (generation && vinode->i_generation != generation) {
867                 /* we didn't find the right inode.. */
868                 iput(vinode);
869                 return ERR_PTR(-ESTALE);
870         }
871         return vinode;
872 }
873
874 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid,
875                 int fh_len, int fh_type)
876 {
877         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
878                                     bch2_nfs_get_inode);
879 }
880
881 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
882                 int fh_len, int fh_type)
883 {
884         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
885                                     bch2_nfs_get_inode);
886 }
887
888 static const struct export_operations bch_export_ops = {
889         .fh_to_dentry   = bch2_fh_to_dentry,
890         .fh_to_parent   = bch2_fh_to_parent,
891         //.get_parent   = bch2_get_parent,
892 };
893
894 static void bch2_vfs_inode_init(struct bch_fs *c,
895                                 struct bch_inode_info *inode,
896                                 struct bch_inode_unpacked *bi)
897 {
898         inode->v.i_mode         = bi->bi_mode;
899         i_uid_write(&inode->v, bi->bi_uid);
900         i_gid_write(&inode->v, bi->bi_gid);
901         inode->v.i_blocks       = bi->bi_sectors;
902         inode->v.i_ino          = bi->bi_inum;
903         set_nlink(&inode->v, bi->bi_nlink + nlink_bias(inode->v.i_mode));
904         inode->v.i_rdev         = bi->bi_dev;
905         inode->v.i_generation   = bi->bi_generation;
906         inode->v.i_size         = bi->bi_size;
907         inode->v.i_atime        = bch2_time_to_timespec(c, bi->bi_atime);
908         inode->v.i_mtime        = bch2_time_to_timespec(c, bi->bi_mtime);
909         inode->v.i_ctime        = bch2_time_to_timespec(c, bi->bi_ctime);
910
911         inode->ei_journal_seq   = 0;
912         inode->ei_size          = bi->bi_size;
913         inode->ei_flags         = bi->bi_flags;
914         atomic64_set(&inode->ei_sectors, bi->bi_sectors);
915         inode->ei_str_hash      = bch2_hash_info_init(c, bi);
916
917         bch2_inode_flags_to_vfs(inode);
918
919         inode->v.i_mapping->a_ops = &bch_address_space_operations;
920
921         switch (inode->v.i_mode & S_IFMT) {
922         case S_IFREG:
923                 inode->v.i_op   = &bch_file_inode_operations;
924                 inode->v.i_fop  = &bch_file_operations;
925                 break;
926         case S_IFDIR:
927                 inode->v.i_op   = &bch_dir_inode_operations;
928                 inode->v.i_fop  = &bch_dir_file_operations;
929                 break;
930         case S_IFLNK:
931                 inode_nohighmem(&inode->v);
932                 inode->v.i_op   = &bch_symlink_inode_operations;
933                 break;
934         default:
935                 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
936                 inode->v.i_op   = &bch_special_inode_operations;
937                 break;
938         }
939 }
940
941 static struct inode *bch2_alloc_inode(struct super_block *sb)
942 {
943         struct bch_inode_info *inode;
944
945         inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
946         if (!inode)
947                 return NULL;
948
949         inode_init_once(&inode->v);
950         mutex_init(&inode->ei_update_lock);
951         inode->ei_journal_seq = 0;
952         atomic_long_set(&inode->ei_size_dirty_count, 0);
953         atomic_long_set(&inode->ei_sectors_dirty_count, 0);
954
955         return &inode->v;
956 }
957
958 static void bch2_i_callback(struct rcu_head *head)
959 {
960         struct inode *vinode = container_of(head, struct inode, i_rcu);
961         struct bch_inode_info *inode = to_bch_ei(vinode);
962
963         kmem_cache_free(bch2_inode_cache, inode);
964 }
965
966 static void bch2_destroy_inode(struct inode *vinode)
967 {
968         call_rcu(&vinode->i_rcu, bch2_i_callback);
969 }
970
971 static int bch2_vfs_write_inode(struct inode *vinode,
972                                 struct writeback_control *wbc)
973 {
974         struct bch_fs *c = vinode->i_sb->s_fs_info;
975         struct bch_inode_info *inode = to_bch_ei(vinode);
976         int ret;
977
978         mutex_lock(&inode->ei_update_lock);
979         ret = bch2_write_inode(c, inode);
980         mutex_unlock(&inode->ei_update_lock);
981
982         if (c->opts.journal_flush_disabled)
983                 return ret;
984
985         if (!ret && wbc->sync_mode == WB_SYNC_ALL)
986                 ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq);
987
988         return ret;
989 }
990
991 static void bch2_evict_inode(struct inode *vinode)
992 {
993         struct bch_fs *c = vinode->i_sb->s_fs_info;
994         struct bch_inode_info *inode = to_bch_ei(vinode);
995
996         truncate_inode_pages_final(&inode->v.i_data);
997
998         if (!bch2_journal_error(&c->journal) && !is_bad_inode(&inode->v)) {
999                 /* XXX - we want to check this stuff iff there weren't IO errors: */
1000                 BUG_ON(atomic_long_read(&inode->ei_sectors_dirty_count));
1001                 BUG_ON(atomic64_read(&inode->ei_sectors) != inode->v.i_blocks);
1002         }
1003
1004         clear_inode(&inode->v);
1005
1006         if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1007                 bch2_inode_rm(c, inode->v.i_ino);
1008                 atomic_long_dec(&c->nr_inodes);
1009         }
1010 }
1011
1012 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1013 {
1014         struct super_block *sb = dentry->d_sb;
1015         struct bch_fs *c = sb->s_fs_info;
1016         u64 fsid;
1017
1018         buf->f_type     = BCACHEFS_STATFS_MAGIC;
1019         buf->f_bsize    = sb->s_blocksize;
1020         buf->f_blocks   = c->capacity >> PAGE_SECTOR_SHIFT;
1021         buf->f_bfree    = (c->capacity - bch2_fs_sectors_used(c)) >> PAGE_SECTOR_SHIFT;
1022         buf->f_bavail   = buf->f_bfree;
1023         buf->f_files    = atomic_long_read(&c->nr_inodes);
1024         buf->f_ffree    = U64_MAX;
1025
1026         fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
1027                le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
1028         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1029         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1030         buf->f_namelen  = NAME_MAX;
1031
1032         return 0;
1033 }
1034
1035 static int bch2_sync_fs(struct super_block *sb, int wait)
1036 {
1037         struct bch_fs *c = sb->s_fs_info;
1038
1039         if (!wait) {
1040                 bch2_journal_flush_async(&c->journal, NULL);
1041                 return 0;
1042         }
1043
1044         return bch2_journal_flush(&c->journal);
1045 }
1046
1047 static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
1048                                              struct bch_opts opts)
1049 {
1050         size_t nr_devs = 0, i = 0;
1051         char *dev_name, *s, **devs;
1052         struct bch_fs *c = NULL;
1053         const char *err = "cannot allocate memory";
1054
1055         dev_name = kstrdup(_dev_name, GFP_KERNEL);
1056         if (!dev_name)
1057                 return NULL;
1058
1059         for (s = dev_name; s; s = strchr(s + 1, ':'))
1060                 nr_devs++;
1061
1062         devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL);
1063         if (!devs)
1064                 goto err;
1065
1066         for (i = 0, s = dev_name;
1067              s;
1068              (s = strchr(s, ':')) && (*s++ = '\0'))
1069                 devs[i++] = s;
1070
1071         err = bch2_fs_open(devs, nr_devs, opts, &c);
1072         if (err) {
1073                 /*
1074                  * Already open?
1075                  * Look up each block device, make sure they all belong to a
1076                  * filesystem and they all belong to the _same_ filesystem
1077                  */
1078
1079                 for (i = 0; i < nr_devs; i++) {
1080                         struct block_device *bdev = lookup_bdev(devs[i]);
1081                         struct bch_fs *c2;
1082
1083                         if (IS_ERR(bdev))
1084                                 goto err;
1085
1086                         c2 = bch2_bdev_to_fs(bdev);
1087                         bdput(bdev);
1088
1089                         if (!c)
1090                                 c = c2;
1091                         else if (c2)
1092                                 closure_put(&c2->cl);
1093
1094                         if (!c)
1095                                 goto err;
1096                         if (c != c2) {
1097                                 closure_put(&c->cl);
1098                                 goto err;
1099                         }
1100                 }
1101
1102                 mutex_lock(&c->state_lock);
1103
1104                 if (!bch2_fs_running(c)) {
1105                         mutex_unlock(&c->state_lock);
1106                         closure_put(&c->cl);
1107                         err = "incomplete filesystem";
1108                         c = NULL;
1109                         goto err;
1110                 }
1111
1112                 mutex_unlock(&c->state_lock);
1113         }
1114
1115         set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
1116 err:
1117         kfree(devs);
1118         kfree(dev_name);
1119
1120         if (!c)
1121                 pr_err("bch_fs_open err %s", err);
1122         return c;
1123 }
1124
1125 static int bch2_remount(struct super_block *sb, int *flags, char *data)
1126 {
1127         struct bch_fs *c = sb->s_fs_info;
1128         struct bch_opts opts = bch2_opts_empty();
1129         int ret;
1130
1131         opt_set(opts, read_only, (*flags & MS_RDONLY) != 0);
1132
1133         ret = bch2_parse_mount_opts(&opts, data);
1134         if (ret)
1135                 return ret;
1136
1137         if (opts.read_only != c->opts.read_only) {
1138                 const char *err = NULL;
1139
1140                 mutex_lock(&c->state_lock);
1141
1142                 if (opts.read_only) {
1143                         bch2_fs_read_only(c);
1144
1145                         sb->s_flags |= MS_RDONLY;
1146                 } else {
1147                         err = bch2_fs_read_write(c);
1148                         if (err) {
1149                                 bch_err(c, "error going rw: %s", err);
1150                                 return -EINVAL;
1151                         }
1152
1153                         sb->s_flags &= ~MS_RDONLY;
1154                 }
1155
1156                 c->opts.read_only = opts.read_only;
1157
1158                 mutex_unlock(&c->state_lock);
1159         }
1160
1161         if (opts.errors >= 0)
1162                 c->opts.errors = opts.errors;
1163
1164         return ret;
1165 }
1166
1167 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1168 {
1169         struct bch_fs *c = root->d_sb->s_fs_info;
1170         enum bch_opt_id i;
1171
1172         for (i = 0; i < bch2_opts_nr; i++) {
1173                 const struct bch_option *opt = &bch2_opt_table[i];
1174                 u64 v = bch2_opt_get_by_id(&c->opts, i);
1175
1176                 if (opt->mode < OPT_MOUNT)
1177                         continue;
1178
1179                 if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1180                         continue;
1181
1182                 switch (opt->type) {
1183                 case BCH_OPT_BOOL:
1184                         seq_printf(seq, ",%s%s", v ? "" : "no", opt->attr.name);
1185                         break;
1186                 case BCH_OPT_UINT:
1187                         seq_printf(seq, ",%s=%llu", opt->attr.name, v);
1188                         break;
1189                 case BCH_OPT_STR:
1190                         seq_printf(seq, ",%s=%s", opt->attr.name, opt->choices[v]);
1191                         break;
1192                 }
1193         }
1194
1195         return 0;
1196
1197 }
1198
1199 static const struct super_operations bch_super_operations = {
1200         .alloc_inode    = bch2_alloc_inode,
1201         .destroy_inode  = bch2_destroy_inode,
1202         .write_inode    = bch2_vfs_write_inode,
1203         .evict_inode    = bch2_evict_inode,
1204         .sync_fs        = bch2_sync_fs,
1205         .statfs         = bch2_statfs,
1206         .show_options   = bch2_show_options,
1207         .remount_fs     = bch2_remount,
1208 #if 0
1209         .put_super      = bch2_put_super,
1210         .freeze_fs      = bch2_freeze,
1211         .unfreeze_fs    = bch2_unfreeze,
1212 #endif
1213 };
1214
1215 static int bch2_test_super(struct super_block *s, void *data)
1216 {
1217         return s->s_fs_info == data;
1218 }
1219
1220 static int bch2_set_super(struct super_block *s, void *data)
1221 {
1222         s->s_fs_info = data;
1223         return 0;
1224 }
1225
1226 static struct dentry *bch2_mount(struct file_system_type *fs_type,
1227                                  int flags, const char *dev_name, void *data)
1228 {
1229         struct bch_fs *c;
1230         struct bch_dev *ca;
1231         struct super_block *sb;
1232         struct inode *vinode;
1233         struct bch_opts opts = bch2_opts_empty();
1234         unsigned i;
1235         int ret;
1236
1237         opt_set(opts, read_only, (flags & MS_RDONLY) != 0);
1238
1239         ret = bch2_parse_mount_opts(&opts, data);
1240         if (ret)
1241                 return ERR_PTR(ret);
1242
1243         c = bch2_open_as_blockdevs(dev_name, opts);
1244         if (!c)
1245                 return ERR_PTR(-ENOENT);
1246
1247         sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|MS_NOSEC, c);
1248         if (IS_ERR(sb)) {
1249                 closure_put(&c->cl);
1250                 return ERR_CAST(sb);
1251         }
1252
1253         BUG_ON(sb->s_fs_info != c);
1254
1255         if (sb->s_root) {
1256                 closure_put(&c->cl);
1257
1258                 if ((flags ^ sb->s_flags) & MS_RDONLY) {
1259                         ret = -EBUSY;
1260                         goto err_put_super;
1261                 }
1262                 goto out;
1263         }
1264
1265         /* XXX: blocksize */
1266         sb->s_blocksize         = PAGE_SIZE;
1267         sb->s_blocksize_bits    = PAGE_SHIFT;
1268         sb->s_maxbytes          = MAX_LFS_FILESIZE;
1269         sb->s_op                = &bch_super_operations;
1270         sb->s_export_op         = &bch_export_ops;
1271         sb->s_xattr             = bch2_xattr_handlers;
1272         sb->s_magic             = BCACHEFS_STATFS_MAGIC;
1273         sb->s_time_gran         = c->sb.time_precision;
1274         c->vfs_sb               = sb;
1275         sb->s_bdi               = &c->bdi;
1276         strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
1277
1278         for_each_online_member(ca, c, i) {
1279                 struct block_device *bdev = ca->disk_sb.bdev;
1280
1281                 /* XXX: create an anonymous device for multi device filesystems */
1282                 sb->s_bdev      = bdev;
1283                 sb->s_dev       = bdev->bd_dev;
1284                 percpu_ref_put(&ca->io_ref);
1285                 break;
1286         }
1287
1288 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1289         if (c->opts.acl)
1290                 sb->s_flags     |= MS_POSIXACL;
1291 #endif
1292
1293         vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
1294         if (IS_ERR(vinode)) {
1295                 ret = PTR_ERR(vinode);
1296                 goto err_put_super;
1297         }
1298
1299         sb->s_root = d_make_root(vinode);
1300         if (!sb->s_root) {
1301                 ret = -ENOMEM;
1302                 goto err_put_super;
1303         }
1304
1305         sb->s_flags |= MS_ACTIVE;
1306 out:
1307         return dget(sb->s_root);
1308
1309 err_put_super:
1310         deactivate_locked_super(sb);
1311         return ERR_PTR(ret);
1312 }
1313
1314 static void bch2_kill_sb(struct super_block *sb)
1315 {
1316         struct bch_fs *c = sb->s_fs_info;
1317
1318         generic_shutdown_super(sb);
1319
1320         if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
1321                 bch2_fs_stop(c);
1322         else
1323                 closure_put(&c->cl);
1324 }
1325
1326 static struct file_system_type bcache_fs_type = {
1327         .owner          = THIS_MODULE,
1328         .name           = "bcachefs",
1329         .mount          = bch2_mount,
1330         .kill_sb        = bch2_kill_sb,
1331         .fs_flags       = FS_REQUIRES_DEV,
1332 };
1333
1334 MODULE_ALIAS_FS("bcachefs");
1335
1336 void bch2_vfs_exit(void)
1337 {
1338         unregister_filesystem(&bcache_fs_type);
1339         if (bch2_inode_cache)
1340                 kmem_cache_destroy(bch2_inode_cache);
1341 }
1342
1343 int __init bch2_vfs_init(void)
1344 {
1345         int ret = -ENOMEM;
1346
1347         bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
1348         if (!bch2_inode_cache)
1349                 goto err;
1350
1351         ret = register_filesystem(&bcache_fs_type);
1352         if (ret)
1353                 goto err;
1354
1355         return 0;
1356 err:
1357         bch2_vfs_exit();
1358         return ret;
1359 }
1360
1361 #endif /* NO_BCACHEFS_FS */