]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fs.c
Update bcachefs sources to 84f132d569 bcachefs: fsck: Break walk_inode() up into...
[bcachefs-tools-debian] / libbcachefs / fs.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3
4 #include "bcachefs.h"
5 #include "acl.h"
6 #include "bkey_buf.h"
7 #include "btree_update.h"
8 #include "buckets.h"
9 #include "chardev.h"
10 #include "dirent.h"
11 #include "errcode.h"
12 #include "extents.h"
13 #include "fs.h"
14 #include "fs-common.h"
15 #include "fs-io.h"
16 #include "fs-ioctl.h"
17 #include "fsck.h"
18 #include "inode.h"
19 #include "io.h"
20 #include "journal.h"
21 #include "keylist.h"
22 #include "quota.h"
23 #include "super.h"
24 #include "xattr.h"
25
26 #include <linux/aio.h>
27 #include <linux/backing-dev.h>
28 #include <linux/exportfs.h>
29 #include <linux/fiemap.h>
30 #include <linux/module.h>
31 #include <linux/pagemap.h>
32 #include <linux/posix_acl.h>
33 #include <linux/random.h>
34 #include <linux/seq_file.h>
35 #include <linux/statfs.h>
36 #include <linux/string.h>
37 #include <linux/xattr.h>
38
39 static struct kmem_cache *bch2_inode_cache;
40
41 static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
42                                 struct bch_inode_info *,
43                                 struct bch_inode_unpacked *,
44                                 struct bch_subvolume *);
45
46 void bch2_inode_update_after_write(struct btree_trans *trans,
47                                    struct bch_inode_info *inode,
48                                    struct bch_inode_unpacked *bi,
49                                    unsigned fields)
50 {
51         struct bch_fs *c = trans->c;
52
53         BUG_ON(bi->bi_inum != inode->v.i_ino);
54
55         bch2_assert_pos_locked(trans, BTREE_ID_inodes,
56                                POS(0, bi->bi_inum),
57                                c->opts.inodes_use_key_cache);
58
59         set_nlink(&inode->v, bch2_inode_nlink_get(bi));
60         i_uid_write(&inode->v, bi->bi_uid);
61         i_gid_write(&inode->v, bi->bi_gid);
62         inode->v.i_mode = bi->bi_mode;
63
64         if (fields & ATTR_ATIME)
65                 inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
66         if (fields & ATTR_MTIME)
67                 inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
68         if (fields & ATTR_CTIME)
69                 inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
70
71         inode->ei_inode         = *bi;
72
73         bch2_inode_flags_to_vfs(inode);
74 }
75
76 int __must_check bch2_write_inode(struct bch_fs *c,
77                                   struct bch_inode_info *inode,
78                                   inode_set_fn set,
79                                   void *p, unsigned fields)
80 {
81         struct btree_trans trans;
82         struct btree_iter iter = { NULL };
83         struct bch_inode_unpacked inode_u;
84         int ret;
85
86         bch2_trans_init(&trans, c, 0, 512);
87 retry:
88         bch2_trans_begin(&trans);
89
90         ret   = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
91                                 BTREE_ITER_INTENT) ?:
92                 (set ? set(inode, &inode_u, p) : 0) ?:
93                 bch2_inode_write(&trans, &iter, &inode_u) ?:
94                 bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
95
96         /*
97          * the btree node lock protects inode->ei_inode, not ei_update_lock;
98          * this is important for inode updates via bchfs_write_index_update
99          */
100         if (!ret)
101                 bch2_inode_update_after_write(&trans, inode, &inode_u, fields);
102
103         bch2_trans_iter_exit(&trans, &iter);
104
105         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
106                 goto retry;
107
108         bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
109                              "inode %u:%llu not found when updating",
110                              inode_inum(inode).subvol,
111                              inode_inum(inode).inum);
112
113         bch2_trans_exit(&trans);
114         return ret < 0 ? ret : 0;
115 }
116
117 int bch2_fs_quota_transfer(struct bch_fs *c,
118                            struct bch_inode_info *inode,
119                            struct bch_qid new_qid,
120                            unsigned qtypes,
121                            enum quota_acct_mode mode)
122 {
123         unsigned i;
124         int ret;
125
126         qtypes &= enabled_qtypes(c);
127
128         for (i = 0; i < QTYP_NR; i++)
129                 if (new_qid.q[i] == inode->ei_qid.q[i])
130                         qtypes &= ~(1U << i);
131
132         if (!qtypes)
133                 return 0;
134
135         mutex_lock(&inode->ei_quota_lock);
136
137         ret = bch2_quota_transfer(c, qtypes, new_qid,
138                                   inode->ei_qid,
139                                   inode->v.i_blocks +
140                                   inode->ei_quota_reserved,
141                                   mode);
142         if (!ret)
143                 for (i = 0; i < QTYP_NR; i++)
144                         if (qtypes & (1 << i))
145                                 inode->ei_qid.q[i] = new_qid.q[i];
146
147         mutex_unlock(&inode->ei_quota_lock);
148
149         return ret;
150 }
151
152 static int bch2_iget5_test(struct inode *vinode, void *p)
153 {
154         struct bch_inode_info *inode = to_bch_ei(vinode);
155         subvol_inum *inum = p;
156
157         return inode->ei_subvol == inum->subvol &&
158                 inode->ei_inode.bi_inum == inum->inum;
159 }
160
161 static int bch2_iget5_set(struct inode *vinode, void *p)
162 {
163         struct bch_inode_info *inode = to_bch_ei(vinode);
164         subvol_inum *inum = p;
165
166         inode->v.i_ino          = inum->inum;
167         inode->ei_subvol        = inum->subvol;
168         inode->ei_inode.bi_inum = inum->inum;
169         return 0;
170 }
171
172 static unsigned bch2_inode_hash(subvol_inum inum)
173 {
174         return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
175 }
176
177 struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
178 {
179         struct bch_inode_unpacked inode_u;
180         struct bch_inode_info *inode;
181         struct btree_trans trans;
182         struct bch_subvolume subvol;
183         int ret;
184
185         inode = to_bch_ei(iget5_locked(c->vfs_sb,
186                                        bch2_inode_hash(inum),
187                                        bch2_iget5_test,
188                                        bch2_iget5_set,
189                                        &inum));
190         if (unlikely(!inode))
191                 return ERR_PTR(-ENOMEM);
192         if (!(inode->v.i_state & I_NEW))
193                 return &inode->v;
194
195         bch2_trans_init(&trans, c, 8, 0);
196         ret = lockrestart_do(&trans,
197                 bch2_subvolume_get(&trans, inum.subvol, true, 0, &subvol) ?:
198                 bch2_inode_find_by_inum_trans(&trans, inum, &inode_u));
199
200         if (!ret)
201                 bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
202         bch2_trans_exit(&trans);
203
204         if (ret) {
205                 iget_failed(&inode->v);
206                 return ERR_PTR(ret);
207         }
208
209         mutex_lock(&c->vfs_inodes_lock);
210         list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
211         mutex_unlock(&c->vfs_inodes_lock);
212
213         unlock_new_inode(&inode->v);
214
215         return &inode->v;
216 }
217
218 struct bch_inode_info *
219 __bch2_create(struct mnt_idmap *idmap,
220               struct bch_inode_info *dir, struct dentry *dentry,
221               umode_t mode, dev_t rdev, subvol_inum snapshot_src,
222               unsigned flags)
223 {
224         struct bch_fs *c = dir->v.i_sb->s_fs_info;
225         struct btree_trans trans;
226         struct bch_inode_unpacked dir_u;
227         struct bch_inode_info *inode, *old;
228         struct bch_inode_unpacked inode_u;
229         struct posix_acl *default_acl = NULL, *acl = NULL;
230         subvol_inum inum;
231         struct bch_subvolume subvol;
232         u64 journal_seq = 0;
233         int ret;
234
235         /*
236          * preallocate acls + vfs inode before btree transaction, so that
237          * nothing can fail after the transaction succeeds:
238          */
239 #ifdef CONFIG_BCACHEFS_POSIX_ACL
240         ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
241         if (ret)
242                 return ERR_PTR(ret);
243 #endif
244         inode = to_bch_ei(new_inode(c->vfs_sb));
245         if (unlikely(!inode)) {
246                 inode = ERR_PTR(-ENOMEM);
247                 goto err;
248         }
249
250         bch2_inode_init_early(c, &inode_u);
251
252         if (!(flags & BCH_CREATE_TMPFILE))
253                 mutex_lock(&dir->ei_update_lock);
254
255         bch2_trans_init(&trans, c, 8,
256                         2048 + (!(flags & BCH_CREATE_TMPFILE)
257                                 ? dentry->d_name.len : 0));
258 retry:
259         bch2_trans_begin(&trans);
260
261         ret   = bch2_create_trans(&trans,
262                                   inode_inum(dir), &dir_u, &inode_u,
263                                   !(flags & BCH_CREATE_TMPFILE)
264                                   ? &dentry->d_name : NULL,
265                                   from_kuid(i_user_ns(&dir->v), current_fsuid()),
266                                   from_kgid(i_user_ns(&dir->v), current_fsgid()),
267                                   mode, rdev,
268                                   default_acl, acl, snapshot_src, flags) ?:
269                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
270                                 KEY_TYPE_QUOTA_PREALLOC);
271         if (unlikely(ret))
272                 goto err_before_quota;
273
274         inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
275         inum.inum = inode_u.bi_inum;
276
277         ret   = bch2_subvolume_get(&trans, inum.subvol, true,
278                                    BTREE_ITER_WITH_UPDATES, &subvol) ?:
279                 bch2_trans_commit(&trans, NULL, &journal_seq, 0);
280         if (unlikely(ret)) {
281                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
282                                 KEY_TYPE_QUOTA_WARN);
283 err_before_quota:
284                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
285                         goto retry;
286                 goto err_trans;
287         }
288
289         if (!(flags & BCH_CREATE_TMPFILE)) {
290                 bch2_inode_update_after_write(&trans, dir, &dir_u,
291                                               ATTR_MTIME|ATTR_CTIME);
292                 mutex_unlock(&dir->ei_update_lock);
293         }
294
295         bch2_iget5_set(&inode->v, &inum);
296         bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
297
298         set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
299         set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
300
301         /*
302          * we must insert the new inode into the inode cache before calling
303          * bch2_trans_exit() and dropping locks, else we could race with another
304          * thread pulling the inode in and modifying it:
305          */
306
307         inode->v.i_state |= I_CREATING;
308
309         old = to_bch_ei(inode_insert5(&inode->v,
310                                       bch2_inode_hash(inum),
311                                       bch2_iget5_test,
312                                       bch2_iget5_set,
313                                       &inum));
314         BUG_ON(!old);
315
316         if (unlikely(old != inode)) {
317                 /*
318                  * We raced, another process pulled the new inode into cache
319                  * before us:
320                  */
321                 make_bad_inode(&inode->v);
322                 iput(&inode->v);
323
324                 inode = old;
325         } else {
326                 mutex_lock(&c->vfs_inodes_lock);
327                 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
328                 mutex_unlock(&c->vfs_inodes_lock);
329                 /*
330                  * we really don't want insert_inode_locked2() to be setting
331                  * I_NEW...
332                  */
333                 unlock_new_inode(&inode->v);
334         }
335
336         bch2_trans_exit(&trans);
337 err:
338         posix_acl_release(default_acl);
339         posix_acl_release(acl);
340         return inode;
341 err_trans:
342         if (!(flags & BCH_CREATE_TMPFILE))
343                 mutex_unlock(&dir->ei_update_lock);
344
345         bch2_trans_exit(&trans);
346         make_bad_inode(&inode->v);
347         iput(&inode->v);
348         inode = ERR_PTR(ret);
349         goto err;
350 }
351
352 /* methods */
353
354 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
355                                   unsigned int flags)
356 {
357         struct bch_fs *c = vdir->i_sb->s_fs_info;
358         struct bch_inode_info *dir = to_bch_ei(vdir);
359         struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
360         struct inode *vinode = NULL;
361         subvol_inum inum = { .subvol = 1 };
362         int ret;
363
364         ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
365                                  &dentry->d_name, &inum);
366
367         if (!ret)
368                 vinode = bch2_vfs_inode_get(c, inum);
369
370         return d_splice_alias(vinode, dentry);
371 }
372
373 static int bch2_mknod(struct mnt_idmap *idmap,
374                       struct inode *vdir, struct dentry *dentry,
375                       umode_t mode, dev_t rdev)
376 {
377         struct bch_inode_info *inode =
378                 __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
379                               (subvol_inum) { 0 }, 0);
380
381         if (IS_ERR(inode))
382                 return bch2_err_class(PTR_ERR(inode));
383
384         d_instantiate(dentry, &inode->v);
385         return 0;
386 }
387
388 static int bch2_create(struct mnt_idmap *idmap,
389                        struct inode *vdir, struct dentry *dentry,
390                        umode_t mode, bool excl)
391 {
392         return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
393 }
394
395 static int __bch2_link(struct bch_fs *c,
396                        struct bch_inode_info *inode,
397                        struct bch_inode_info *dir,
398                        struct dentry *dentry)
399 {
400         struct btree_trans trans;
401         struct bch_inode_unpacked dir_u, inode_u;
402         int ret;
403
404         mutex_lock(&inode->ei_update_lock);
405         bch2_trans_init(&trans, c, 4, 1024);
406
407         ret = commit_do(&trans, NULL, NULL, 0,
408                         bch2_link_trans(&trans,
409                                         inode_inum(dir),   &dir_u,
410                                         inode_inum(inode), &inode_u,
411                                         &dentry->d_name));
412
413         if (likely(!ret)) {
414                 bch2_inode_update_after_write(&trans, dir, &dir_u,
415                                               ATTR_MTIME|ATTR_CTIME);
416                 bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME);
417         }
418
419         bch2_trans_exit(&trans);
420         mutex_unlock(&inode->ei_update_lock);
421         return ret;
422 }
423
424 static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
425                      struct dentry *dentry)
426 {
427         struct bch_fs *c = vdir->i_sb->s_fs_info;
428         struct bch_inode_info *dir = to_bch_ei(vdir);
429         struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
430         int ret;
431
432         lockdep_assert_held(&inode->v.i_rwsem);
433
434         ret = __bch2_link(c, inode, dir, dentry);
435         if (unlikely(ret))
436                 return ret;
437
438         ihold(&inode->v);
439         d_instantiate(dentry, &inode->v);
440         return 0;
441 }
442
443 int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
444                   bool deleting_snapshot)
445 {
446         struct bch_fs *c = vdir->i_sb->s_fs_info;
447         struct bch_inode_info *dir = to_bch_ei(vdir);
448         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
449         struct bch_inode_unpacked dir_u, inode_u;
450         struct btree_trans trans;
451         int ret;
452
453         bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
454         bch2_trans_init(&trans, c, 4, 1024);
455
456         ret = commit_do(&trans, NULL, NULL,
457                         BTREE_INSERT_NOFAIL,
458                 bch2_unlink_trans(&trans,
459                                   inode_inum(dir), &dir_u,
460                                   &inode_u, &dentry->d_name,
461                                   deleting_snapshot));
462         if (unlikely(ret))
463                 goto err;
464
465         bch2_inode_update_after_write(&trans, dir, &dir_u,
466                                       ATTR_MTIME|ATTR_CTIME);
467         bch2_inode_update_after_write(&trans, inode, &inode_u,
468                                       ATTR_MTIME);
469
470         if (inode_u.bi_subvol) {
471                 /*
472                  * Subvolume deletion is asynchronous, but we still want to tell
473                  * the VFS that it's been deleted here:
474                  */
475                 set_nlink(&inode->v, 0);
476         }
477 err:
478         bch2_trans_exit(&trans);
479         bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
480
481         return ret;
482 }
483
484 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
485 {
486         return __bch2_unlink(vdir, dentry, false);
487 }
488
489 static int bch2_symlink(struct mnt_idmap *idmap,
490                         struct inode *vdir, struct dentry *dentry,
491                         const char *symname)
492 {
493         struct bch_fs *c = vdir->i_sb->s_fs_info;
494         struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
495         int ret;
496
497         inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
498                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
499         if (IS_ERR(inode))
500                 return bch2_err_class(PTR_ERR(inode));
501
502         inode_lock(&inode->v);
503         ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
504         inode_unlock(&inode->v);
505
506         if (unlikely(ret))
507                 goto err;
508
509         ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
510         if (unlikely(ret))
511                 goto err;
512
513         ret = __bch2_link(c, inode, dir, dentry);
514         if (unlikely(ret))
515                 goto err;
516
517         d_instantiate(dentry, &inode->v);
518         return 0;
519 err:
520         iput(&inode->v);
521         return ret;
522 }
523
524 static int bch2_mkdir(struct mnt_idmap *idmap,
525                       struct inode *vdir, struct dentry *dentry, umode_t mode)
526 {
527         return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
528 }
529
530 static int bch2_rename2(struct mnt_idmap *idmap,
531                         struct inode *src_vdir, struct dentry *src_dentry,
532                         struct inode *dst_vdir, struct dentry *dst_dentry,
533                         unsigned flags)
534 {
535         struct bch_fs *c = src_vdir->i_sb->s_fs_info;
536         struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
537         struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
538         struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
539         struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
540         struct bch_inode_unpacked dst_dir_u, src_dir_u;
541         struct bch_inode_unpacked src_inode_u, dst_inode_u;
542         struct btree_trans trans;
543         enum bch_rename_mode mode = flags & RENAME_EXCHANGE
544                 ? BCH_RENAME_EXCHANGE
545                 : dst_dentry->d_inode
546                 ? BCH_RENAME_OVERWRITE : BCH_RENAME;
547         int ret;
548
549         if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
550                 return -EINVAL;
551
552         if (mode == BCH_RENAME_OVERWRITE) {
553                 ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
554                                                    0, LLONG_MAX);
555                 if (ret)
556                         return ret;
557         }
558
559         bch2_trans_init(&trans, c, 8, 2048);
560
561         bch2_lock_inodes(INODE_UPDATE_LOCK,
562                          src_dir,
563                          dst_dir,
564                          src_inode,
565                          dst_inode);
566
567         if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
568                 ret = bch2_fs_quota_transfer(c, src_inode,
569                                              dst_dir->ei_qid,
570                                              1 << QTYP_PRJ,
571                                              KEY_TYPE_QUOTA_PREALLOC);
572                 if (ret)
573                         goto err;
574         }
575
576         if (mode == BCH_RENAME_EXCHANGE &&
577             inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
578                 ret = bch2_fs_quota_transfer(c, dst_inode,
579                                              src_dir->ei_qid,
580                                              1 << QTYP_PRJ,
581                                              KEY_TYPE_QUOTA_PREALLOC);
582                 if (ret)
583                         goto err;
584         }
585
586         ret = commit_do(&trans, NULL, NULL, 0,
587                         bch2_rename_trans(&trans,
588                                           inode_inum(src_dir), &src_dir_u,
589                                           inode_inum(dst_dir), &dst_dir_u,
590                                           &src_inode_u,
591                                           &dst_inode_u,
592                                           &src_dentry->d_name,
593                                           &dst_dentry->d_name,
594                                           mode));
595         if (unlikely(ret))
596                 goto err;
597
598         BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
599         BUG_ON(dst_inode &&
600                dst_inode->v.i_ino != dst_inode_u.bi_inum);
601
602         bch2_inode_update_after_write(&trans, src_dir, &src_dir_u,
603                                       ATTR_MTIME|ATTR_CTIME);
604
605         if (src_dir != dst_dir)
606                 bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u,
607                                               ATTR_MTIME|ATTR_CTIME);
608
609         bch2_inode_update_after_write(&trans, src_inode, &src_inode_u,
610                                       ATTR_CTIME);
611
612         if (dst_inode)
613                 bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u,
614                                               ATTR_CTIME);
615 err:
616         bch2_trans_exit(&trans);
617
618         bch2_fs_quota_transfer(c, src_inode,
619                                bch_qid(&src_inode->ei_inode),
620                                1 << QTYP_PRJ,
621                                KEY_TYPE_QUOTA_NOCHECK);
622         if (dst_inode)
623                 bch2_fs_quota_transfer(c, dst_inode,
624                                        bch_qid(&dst_inode->ei_inode),
625                                        1 << QTYP_PRJ,
626                                        KEY_TYPE_QUOTA_NOCHECK);
627
628         bch2_unlock_inodes(INODE_UPDATE_LOCK,
629                            src_dir,
630                            dst_dir,
631                            src_inode,
632                            dst_inode);
633
634         return ret;
635 }
636
637 static void bch2_setattr_copy(struct mnt_idmap *idmap,
638                               struct bch_inode_info *inode,
639                               struct bch_inode_unpacked *bi,
640                               struct iattr *attr)
641 {
642         struct bch_fs *c = inode->v.i_sb->s_fs_info;
643         unsigned int ia_valid = attr->ia_valid;
644
645         if (ia_valid & ATTR_UID)
646                 bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
647         if (ia_valid & ATTR_GID)
648                 bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
649
650         if (ia_valid & ATTR_SIZE)
651                 bi->bi_size = attr->ia_size;
652
653         if (ia_valid & ATTR_ATIME)
654                 bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
655         if (ia_valid & ATTR_MTIME)
656                 bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
657         if (ia_valid & ATTR_CTIME)
658                 bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
659
660         if (ia_valid & ATTR_MODE) {
661                 umode_t mode = attr->ia_mode;
662                 kgid_t gid = ia_valid & ATTR_GID
663                         ? attr->ia_gid
664                         : inode->v.i_gid;
665
666                 if (!in_group_p(gid) &&
667                     !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
668                         mode &= ~S_ISGID;
669                 bi->bi_mode = mode;
670         }
671 }
672
673 int bch2_setattr_nonsize(struct mnt_idmap *idmap,
674                          struct bch_inode_info *inode,
675                          struct iattr *attr)
676 {
677         struct bch_fs *c = inode->v.i_sb->s_fs_info;
678         struct bch_qid qid;
679         struct btree_trans trans;
680         struct btree_iter inode_iter = { NULL };
681         struct bch_inode_unpacked inode_u;
682         struct posix_acl *acl = NULL;
683         int ret;
684
685         mutex_lock(&inode->ei_update_lock);
686
687         qid = inode->ei_qid;
688
689         if (attr->ia_valid & ATTR_UID)
690                 qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
691
692         if (attr->ia_valid & ATTR_GID)
693                 qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
694
695         ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
696                                      KEY_TYPE_QUOTA_PREALLOC);
697         if (ret)
698                 goto err;
699
700         bch2_trans_init(&trans, c, 0, 0);
701 retry:
702         bch2_trans_begin(&trans);
703         kfree(acl);
704         acl = NULL;
705
706         ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
707                               BTREE_ITER_INTENT);
708         if (ret)
709                 goto btree_err;
710
711         bch2_setattr_copy(idmap, inode, &inode_u, attr);
712
713         if (attr->ia_valid & ATTR_MODE) {
714                 ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
715                                      inode_u.bi_mode, &acl);
716                 if (ret)
717                         goto btree_err;
718         }
719
720         ret =   bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
721                 bch2_trans_commit(&trans, NULL, NULL,
722                                   BTREE_INSERT_NOFAIL);
723 btree_err:
724         bch2_trans_iter_exit(&trans, &inode_iter);
725
726         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
727                 goto retry;
728         if (unlikely(ret))
729                 goto err_trans;
730
731         bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid);
732
733         if (acl)
734                 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
735 err_trans:
736         bch2_trans_exit(&trans);
737 err:
738         mutex_unlock(&inode->ei_update_lock);
739
740         return bch2_err_class(ret);
741 }
742
743 static int bch2_getattr(struct mnt_idmap *idmap,
744                         const struct path *path, struct kstat *stat,
745                         u32 request_mask, unsigned query_flags)
746 {
747         struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
748         struct bch_fs *c = inode->v.i_sb->s_fs_info;
749
750         stat->dev       = inode->v.i_sb->s_dev;
751         stat->ino       = inode->v.i_ino;
752         stat->mode      = inode->v.i_mode;
753         stat->nlink     = inode->v.i_nlink;
754         stat->uid       = inode->v.i_uid;
755         stat->gid       = inode->v.i_gid;
756         stat->rdev      = inode->v.i_rdev;
757         stat->size      = i_size_read(&inode->v);
758         stat->atime     = inode->v.i_atime;
759         stat->mtime     = inode->v.i_mtime;
760         stat->ctime     = inode->v.i_ctime;
761         stat->blksize   = block_bytes(c);
762         stat->blocks    = inode->v.i_blocks;
763
764         if (request_mask & STATX_BTIME) {
765                 stat->result_mask |= STATX_BTIME;
766                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
767         }
768
769         if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
770                 stat->attributes |= STATX_ATTR_IMMUTABLE;
771         stat->attributes_mask    |= STATX_ATTR_IMMUTABLE;
772
773         if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
774                 stat->attributes |= STATX_ATTR_APPEND;
775         stat->attributes_mask    |= STATX_ATTR_APPEND;
776
777         if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
778                 stat->attributes |= STATX_ATTR_NODUMP;
779         stat->attributes_mask    |= STATX_ATTR_NODUMP;
780
781         return 0;
782 }
783
784 static int bch2_setattr(struct mnt_idmap *idmap,
785                         struct dentry *dentry, struct iattr *iattr)
786 {
787         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
788         int ret;
789
790         lockdep_assert_held(&inode->v.i_rwsem);
791
792         ret = setattr_prepare(idmap, dentry, iattr);
793         if (ret)
794                 return ret;
795
796         return iattr->ia_valid & ATTR_SIZE
797                 ? bch2_truncate(idmap, inode, iattr)
798                 : bch2_setattr_nonsize(idmap, inode, iattr);
799 }
800
801 static int bch2_tmpfile(struct mnt_idmap *idmap,
802                         struct inode *vdir, struct file *file, umode_t mode)
803 {
804         struct bch_inode_info *inode =
805                 __bch2_create(idmap, to_bch_ei(vdir),
806                               file->f_path.dentry, mode, 0,
807                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
808
809         if (IS_ERR(inode))
810                 return bch2_err_class(PTR_ERR(inode));
811
812         d_mark_tmpfile(file, &inode->v);
813         d_instantiate(file->f_path.dentry, &inode->v);
814         return finish_open_simple(file, 0);
815 }
816
817 static int bch2_fill_extent(struct bch_fs *c,
818                             struct fiemap_extent_info *info,
819                             struct bkey_s_c k, unsigned flags)
820 {
821         if (bkey_extent_is_direct_data(k.k)) {
822                 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
823                 const union bch_extent_entry *entry;
824                 struct extent_ptr_decoded p;
825                 int ret;
826
827                 if (k.k->type == KEY_TYPE_reflink_v)
828                         flags |= FIEMAP_EXTENT_SHARED;
829
830                 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
831                         int flags2 = 0;
832                         u64 offset = p.ptr.offset;
833
834                         if (p.ptr.unwritten)
835                                 flags2 |= FIEMAP_EXTENT_UNWRITTEN;
836
837                         if (p.crc.compression_type)
838                                 flags2 |= FIEMAP_EXTENT_ENCODED;
839                         else
840                                 offset += p.crc.offset;
841
842                         if ((offset & (block_sectors(c) - 1)) ||
843                             (k.k->size & (block_sectors(c) - 1)))
844                                 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
845
846                         ret = fiemap_fill_next_extent(info,
847                                                 bkey_start_offset(k.k) << 9,
848                                                 offset << 9,
849                                                 k.k->size << 9, flags|flags2);
850                         if (ret)
851                                 return ret;
852                 }
853
854                 return 0;
855         } else if (bkey_extent_is_inline_data(k.k)) {
856                 return fiemap_fill_next_extent(info,
857                                                bkey_start_offset(k.k) << 9,
858                                                0, k.k->size << 9,
859                                                flags|
860                                                FIEMAP_EXTENT_DATA_INLINE);
861         } else if (k.k->type == KEY_TYPE_reservation) {
862                 return fiemap_fill_next_extent(info,
863                                                bkey_start_offset(k.k) << 9,
864                                                0, k.k->size << 9,
865                                                flags|
866                                                FIEMAP_EXTENT_DELALLOC|
867                                                FIEMAP_EXTENT_UNWRITTEN);
868         } else {
869                 BUG();
870         }
871 }
872
873 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
874                        u64 start, u64 len)
875 {
876         struct bch_fs *c = vinode->i_sb->s_fs_info;
877         struct bch_inode_info *ei = to_bch_ei(vinode);
878         struct btree_trans trans;
879         struct btree_iter iter;
880         struct bkey_s_c k;
881         struct bkey_buf cur, prev;
882         struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
883         unsigned offset_into_extent, sectors;
884         bool have_extent = false;
885         u32 snapshot;
886         int ret = 0;
887
888         ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
889         if (ret)
890                 return ret;
891
892         if (start + len < start)
893                 return -EINVAL;
894
895         start >>= 9;
896
897         bch2_bkey_buf_init(&cur);
898         bch2_bkey_buf_init(&prev);
899         bch2_trans_init(&trans, c, 0, 0);
900 retry:
901         bch2_trans_begin(&trans);
902
903         ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot);
904         if (ret)
905                 goto err;
906
907         bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
908                              SPOS(ei->v.i_ino, start, snapshot), 0);
909
910         while (!(ret = btree_trans_too_many_iters(&trans)) &&
911                (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
912                !(ret = bkey_err(k))) {
913                 enum btree_id data_btree = BTREE_ID_extents;
914
915                 if (!bkey_extent_is_data(k.k) &&
916                     k.k->type != KEY_TYPE_reservation) {
917                         bch2_btree_iter_advance(&iter);
918                         continue;
919                 }
920
921                 offset_into_extent      = iter.pos.offset -
922                         bkey_start_offset(k.k);
923                 sectors                 = k.k->size - offset_into_extent;
924
925                 bch2_bkey_buf_reassemble(&cur, c, k);
926
927                 ret = bch2_read_indirect_extent(&trans, &data_btree,
928                                         &offset_into_extent, &cur);
929                 if (ret)
930                         break;
931
932                 k = bkey_i_to_s_c(cur.k);
933                 bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
934
935                 sectors = min(sectors, k.k->size - offset_into_extent);
936
937                 bch2_cut_front(POS(k.k->p.inode,
938                                    bkey_start_offset(k.k) +
939                                    offset_into_extent),
940                                cur.k);
941                 bch2_key_resize(&cur.k->k, sectors);
942                 cur.k->k.p = iter.pos;
943                 cur.k->k.p.offset += cur.k->k.size;
944
945                 if (have_extent) {
946                         bch2_trans_unlock(&trans);
947                         ret = bch2_fill_extent(c, info,
948                                         bkey_i_to_s_c(prev.k), 0);
949                         if (ret)
950                                 break;
951                 }
952
953                 bkey_copy(prev.k, cur.k);
954                 have_extent = true;
955
956                 bch2_btree_iter_set_pos(&iter,
957                         POS(iter.pos.inode, iter.pos.offset + sectors));
958         }
959         start = iter.pos.offset;
960         bch2_trans_iter_exit(&trans, &iter);
961 err:
962         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
963                 goto retry;
964
965         if (!ret && have_extent) {
966                 bch2_trans_unlock(&trans);
967                 ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
968                                        FIEMAP_EXTENT_LAST);
969         }
970
971         bch2_trans_exit(&trans);
972         bch2_bkey_buf_exit(&cur, c);
973         bch2_bkey_buf_exit(&prev, c);
974         return ret < 0 ? ret : 0;
975 }
976
977 static const struct vm_operations_struct bch_vm_ops = {
978         .fault          = bch2_page_fault,
979         .map_pages      = filemap_map_pages,
980         .page_mkwrite   = bch2_page_mkwrite,
981 };
982
983 static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
984 {
985         file_accessed(file);
986
987         vma->vm_ops = &bch_vm_ops;
988         return 0;
989 }
990
991 /* Directories: */
992
993 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
994 {
995         return generic_file_llseek_size(file, offset, whence,
996                                         S64_MAX, S64_MAX);
997 }
998
999 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
1000 {
1001         struct bch_inode_info *inode = file_bch_inode(file);
1002         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1003
1004         if (!dir_emit_dots(file, ctx))
1005                 return 0;
1006
1007         return bch2_readdir(c, inode_inum(inode), ctx);
1008 }
1009
1010 static const struct file_operations bch_file_operations = {
1011         .llseek         = bch2_llseek,
1012         .read_iter      = bch2_read_iter,
1013         .write_iter     = bch2_write_iter,
1014         .mmap           = bch2_mmap,
1015         .open           = generic_file_open,
1016         .fsync          = bch2_fsync,
1017         .splice_read    = filemap_splice_read,
1018         .splice_write   = iter_file_splice_write,
1019         .fallocate      = bch2_fallocate_dispatch,
1020         .unlocked_ioctl = bch2_fs_file_ioctl,
1021 #ifdef CONFIG_COMPAT
1022         .compat_ioctl   = bch2_compat_fs_ioctl,
1023 #endif
1024         .remap_file_range = bch2_remap_file_range,
1025 };
1026
1027 static const struct inode_operations bch_file_inode_operations = {
1028         .getattr        = bch2_getattr,
1029         .setattr        = bch2_setattr,
1030         .fiemap         = bch2_fiemap,
1031         .listxattr      = bch2_xattr_list,
1032 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1033         .get_acl        = bch2_get_acl,
1034         .set_acl        = bch2_set_acl,
1035 #endif
1036 };
1037
1038 static const struct inode_operations bch_dir_inode_operations = {
1039         .lookup         = bch2_lookup,
1040         .create         = bch2_create,
1041         .link           = bch2_link,
1042         .unlink         = bch2_unlink,
1043         .symlink        = bch2_symlink,
1044         .mkdir          = bch2_mkdir,
1045         .rmdir          = bch2_unlink,
1046         .mknod          = bch2_mknod,
1047         .rename         = bch2_rename2,
1048         .getattr        = bch2_getattr,
1049         .setattr        = bch2_setattr,
1050         .tmpfile        = bch2_tmpfile,
1051         .listxattr      = bch2_xattr_list,
1052 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1053         .get_acl        = bch2_get_acl,
1054         .set_acl        = bch2_set_acl,
1055 #endif
1056 };
1057
1058 static const struct file_operations bch_dir_file_operations = {
1059         .llseek         = bch2_dir_llseek,
1060         .read           = generic_read_dir,
1061         .iterate_shared = bch2_vfs_readdir,
1062         .fsync          = bch2_fsync,
1063         .unlocked_ioctl = bch2_fs_file_ioctl,
1064 #ifdef CONFIG_COMPAT
1065         .compat_ioctl   = bch2_compat_fs_ioctl,
1066 #endif
1067 };
1068
1069 static const struct inode_operations bch_symlink_inode_operations = {
1070         .get_link       = page_get_link,
1071         .getattr        = bch2_getattr,
1072         .setattr        = bch2_setattr,
1073         .listxattr      = bch2_xattr_list,
1074 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1075         .get_acl        = bch2_get_acl,
1076         .set_acl        = bch2_set_acl,
1077 #endif
1078 };
1079
1080 static const struct inode_operations bch_special_inode_operations = {
1081         .getattr        = bch2_getattr,
1082         .setattr        = bch2_setattr,
1083         .listxattr      = bch2_xattr_list,
1084 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1085         .get_acl        = bch2_get_acl,
1086         .set_acl        = bch2_set_acl,
1087 #endif
1088 };
1089
1090 static const struct address_space_operations bch_address_space_operations = {
1091         .read_folio     = bch2_read_folio,
1092         .writepages     = bch2_writepages,
1093         .readahead      = bch2_readahead,
1094         .dirty_folio    = filemap_dirty_folio,
1095         .write_begin    = bch2_write_begin,
1096         .write_end      = bch2_write_end,
1097         .invalidate_folio = bch2_invalidate_folio,
1098         .release_folio  = bch2_release_folio,
1099         .direct_IO      = noop_direct_IO,
1100 #ifdef CONFIG_MIGRATION
1101         .migrate_folio  = filemap_migrate_folio,
1102 #endif
1103         .error_remove_page = generic_error_remove_page,
1104 };
1105
1106 struct bcachefs_fid {
1107         u64             inum;
1108         u32             subvol;
1109         u32             gen;
1110 } __packed;
1111
1112 struct bcachefs_fid_with_parent {
1113         struct bcachefs_fid     fid;
1114         struct bcachefs_fid     dir;
1115 } __packed;
1116
1117 static int bcachefs_fid_valid(int fh_len, int fh_type)
1118 {
1119         switch (fh_type) {
1120         case FILEID_BCACHEFS_WITHOUT_PARENT:
1121                 return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1122         case FILEID_BCACHEFS_WITH_PARENT:
1123                 return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1124         default:
1125                 return false;
1126         }
1127 }
1128
1129 static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1130 {
1131         return (struct bcachefs_fid) {
1132                 .inum   = inode->ei_inode.bi_inum,
1133                 .subvol = inode->ei_subvol,
1134                 .gen    = inode->ei_inode.bi_generation,
1135         };
1136 }
1137
1138 static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1139                           struct inode *vdir)
1140 {
1141         struct bch_inode_info *inode    = to_bch_ei(vinode);
1142         struct bch_inode_info *dir      = to_bch_ei(vdir);
1143
1144         if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32))
1145                 return FILEID_INVALID;
1146
1147         if (!S_ISDIR(inode->v.i_mode) && dir) {
1148                 struct bcachefs_fid_with_parent *fid = (void *) fh;
1149
1150                 fid->fid = bch2_inode_to_fid(inode);
1151                 fid->dir = bch2_inode_to_fid(dir);
1152
1153                 *len = sizeof(*fid) / sizeof(u32);
1154                 return FILEID_BCACHEFS_WITH_PARENT;
1155         } else {
1156                 struct bcachefs_fid *fid = (void *) fh;
1157
1158                 *fid = bch2_inode_to_fid(inode);
1159
1160                 *len = sizeof(*fid) / sizeof(u32);
1161                 return FILEID_BCACHEFS_WITHOUT_PARENT;
1162         }
1163 }
1164
1165 static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1166                                         struct bcachefs_fid fid)
1167 {
1168         struct bch_fs *c = sb->s_fs_info;
1169         struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1170                                     .subvol = fid.subvol,
1171                                     .inum = fid.inum,
1172         });
1173         if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1174                 iput(vinode);
1175                 vinode = ERR_PTR(-ESTALE);
1176         }
1177         return vinode;
1178 }
1179
1180 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1181                 int fh_len, int fh_type)
1182 {
1183         struct bcachefs_fid *fid = (void *) _fid;
1184
1185         if (!bcachefs_fid_valid(fh_len, fh_type))
1186                 return NULL;
1187
1188         return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1189 }
1190
1191 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1192                 int fh_len, int fh_type)
1193 {
1194         struct bcachefs_fid_with_parent *fid = (void *) _fid;
1195
1196         if (!bcachefs_fid_valid(fh_len, fh_type) ||
1197             fh_type != FILEID_BCACHEFS_WITH_PARENT)
1198                 return NULL;
1199
1200         return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1201 }
1202
1203 static struct dentry *bch2_get_parent(struct dentry *child)
1204 {
1205         struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1206         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1207         subvol_inum parent_inum = {
1208                 .subvol = inode->ei_inode.bi_parent_subvol ?:
1209                         inode->ei_subvol,
1210                 .inum = inode->ei_inode.bi_dir,
1211         };
1212
1213         if (!parent_inum.inum)
1214                 return NULL;
1215
1216         return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1217 }
1218
1219 static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1220 {
1221         struct bch_inode_info *inode    = to_bch_ei(child->d_inode);
1222         struct bch_inode_info *dir      = to_bch_ei(parent->d_inode);
1223         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1224         struct btree_trans trans;
1225         struct btree_iter iter1;
1226         struct btree_iter iter2;
1227         struct bkey_s_c k;
1228         struct bkey_s_c_dirent d;
1229         struct bch_inode_unpacked inode_u;
1230         subvol_inum target;
1231         u32 snapshot;
1232         unsigned name_len;
1233         int ret;
1234
1235         if (!S_ISDIR(dir->v.i_mode))
1236                 return -EINVAL;
1237
1238         bch2_trans_init(&trans, c, 0, 0);
1239
1240         bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents,
1241                              POS(dir->ei_inode.bi_inum, 0), 0);
1242         bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents,
1243                              POS(dir->ei_inode.bi_inum, 0), 0);
1244 retry:
1245         bch2_trans_begin(&trans);
1246
1247         ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot);
1248         if (ret)
1249                 goto err;
1250
1251         bch2_btree_iter_set_snapshot(&iter1, snapshot);
1252         bch2_btree_iter_set_snapshot(&iter2, snapshot);
1253
1254         ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u);
1255         if (ret)
1256                 goto err;
1257
1258         if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1259                 bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1260
1261                 k = bch2_btree_iter_peek_slot(&iter1);
1262                 ret = bkey_err(k);
1263                 if (ret)
1264                         goto err;
1265
1266                 if (k.k->type != KEY_TYPE_dirent) {
1267                         ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1268                         goto err;
1269                 }
1270
1271                 d = bkey_s_c_to_dirent(k);
1272                 ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
1273                 if (ret > 0)
1274                         ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1275                 if (ret)
1276                         goto err;
1277
1278                 if (target.subvol       == inode->ei_subvol &&
1279                     target.inum         == inode->ei_inode.bi_inum)
1280                         goto found;
1281         } else {
1282                 /*
1283                  * File with multiple hardlinks and our backref is to the wrong
1284                  * directory - linear search:
1285                  */
1286                 for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1287                         if (k.k->p.inode > dir->ei_inode.bi_inum)
1288                                 break;
1289
1290                         if (k.k->type != KEY_TYPE_dirent)
1291                                 continue;
1292
1293                         d = bkey_s_c_to_dirent(k);
1294                         ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
1295                         if (ret < 0)
1296                                 break;
1297                         if (ret)
1298                                 continue;
1299
1300                         if (target.subvol       == inode->ei_subvol &&
1301                             target.inum         == inode->ei_inode.bi_inum)
1302                                 goto found;
1303                 }
1304         }
1305
1306         ret = -ENOENT;
1307         goto err;
1308 found:
1309         name_len = min_t(unsigned, bch2_dirent_name_bytes(d), NAME_MAX);
1310
1311         memcpy(name, d.v->d_name, name_len);
1312         name[name_len] = '\0';
1313 err:
1314         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1315                 goto retry;
1316
1317         bch2_trans_iter_exit(&trans, &iter1);
1318         bch2_trans_iter_exit(&trans, &iter2);
1319         bch2_trans_exit(&trans);
1320
1321         return ret;
1322 }
1323
1324 static const struct export_operations bch_export_ops = {
1325         .encode_fh      = bch2_encode_fh,
1326         .fh_to_dentry   = bch2_fh_to_dentry,
1327         .fh_to_parent   = bch2_fh_to_parent,
1328         .get_parent     = bch2_get_parent,
1329         .get_name       = bch2_get_name,
1330 };
1331
1332 static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
1333                                 struct bch_inode_info *inode,
1334                                 struct bch_inode_unpacked *bi,
1335                                 struct bch_subvolume *subvol)
1336 {
1337         bch2_inode_update_after_write(trans, inode, bi, ~0);
1338
1339         if (BCH_SUBVOLUME_SNAP(subvol))
1340                 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1341         else
1342                 clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1343
1344         inode->v.i_blocks       = bi->bi_sectors;
1345         inode->v.i_ino          = bi->bi_inum;
1346         inode->v.i_rdev         = bi->bi_dev;
1347         inode->v.i_generation   = bi->bi_generation;
1348         inode->v.i_size         = bi->bi_size;
1349
1350         inode->ei_flags         = 0;
1351         inode->ei_quota_reserved = 0;
1352         inode->ei_qid           = bch_qid(bi);
1353         inode->ei_subvol        = inum.subvol;
1354
1355         inode->v.i_mapping->a_ops = &bch_address_space_operations;
1356
1357         switch (inode->v.i_mode & S_IFMT) {
1358         case S_IFREG:
1359                 inode->v.i_op   = &bch_file_inode_operations;
1360                 inode->v.i_fop  = &bch_file_operations;
1361                 break;
1362         case S_IFDIR:
1363                 inode->v.i_op   = &bch_dir_inode_operations;
1364                 inode->v.i_fop  = &bch_dir_file_operations;
1365                 break;
1366         case S_IFLNK:
1367                 inode_nohighmem(&inode->v);
1368                 inode->v.i_op   = &bch_symlink_inode_operations;
1369                 break;
1370         default:
1371                 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1372                 inode->v.i_op   = &bch_special_inode_operations;
1373                 break;
1374         }
1375
1376         mapping_set_large_folios(inode->v.i_mapping);
1377 }
1378
1379 static struct inode *bch2_alloc_inode(struct super_block *sb)
1380 {
1381         struct bch_inode_info *inode;
1382
1383         inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
1384         if (!inode)
1385                 return NULL;
1386
1387         inode_init_once(&inode->v);
1388         mutex_init(&inode->ei_update_lock);
1389         two_state_lock_init(&inode->ei_pagecache_lock);
1390         INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
1391         mutex_init(&inode->ei_quota_lock);
1392
1393         return &inode->v;
1394 }
1395
1396 static void bch2_i_callback(struct rcu_head *head)
1397 {
1398         struct inode *vinode = container_of(head, struct inode, i_rcu);
1399         struct bch_inode_info *inode = to_bch_ei(vinode);
1400
1401         kmem_cache_free(bch2_inode_cache, inode);
1402 }
1403
1404 static void bch2_destroy_inode(struct inode *vinode)
1405 {
1406         call_rcu(&vinode->i_rcu, bch2_i_callback);
1407 }
1408
1409 static int inode_update_times_fn(struct bch_inode_info *inode,
1410                                  struct bch_inode_unpacked *bi,
1411                                  void *p)
1412 {
1413         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1414
1415         bi->bi_atime    = timespec_to_bch2_time(c, inode->v.i_atime);
1416         bi->bi_mtime    = timespec_to_bch2_time(c, inode->v.i_mtime);
1417         bi->bi_ctime    = timespec_to_bch2_time(c, inode->v.i_ctime);
1418
1419         return 0;
1420 }
1421
1422 static int bch2_vfs_write_inode(struct inode *vinode,
1423                                 struct writeback_control *wbc)
1424 {
1425         struct bch_fs *c = vinode->i_sb->s_fs_info;
1426         struct bch_inode_info *inode = to_bch_ei(vinode);
1427         int ret;
1428
1429         mutex_lock(&inode->ei_update_lock);
1430         ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1431                                ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1432         mutex_unlock(&inode->ei_update_lock);
1433
1434         return bch2_err_class(ret);
1435 }
1436
1437 static void bch2_evict_inode(struct inode *vinode)
1438 {
1439         struct bch_fs *c = vinode->i_sb->s_fs_info;
1440         struct bch_inode_info *inode = to_bch_ei(vinode);
1441
1442         truncate_inode_pages_final(&inode->v.i_data);
1443
1444         clear_inode(&inode->v);
1445
1446         BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1447
1448         if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1449                 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
1450                                 KEY_TYPE_QUOTA_WARN);
1451                 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
1452                                 KEY_TYPE_QUOTA_WARN);
1453                 bch2_inode_rm(c, inode_inum(inode));
1454         }
1455
1456         mutex_lock(&c->vfs_inodes_lock);
1457         list_del_init(&inode->ei_vfs_inode_list);
1458         mutex_unlock(&c->vfs_inodes_lock);
1459 }
1460
1461 void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
1462 {
1463         struct bch_inode_info *inode, **i;
1464         DARRAY(struct bch_inode_info *) grabbed;
1465         bool clean_pass = false, this_pass_clean;
1466
1467         /*
1468          * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1469          * be pruned with d_mark_dontcache().
1470          *
1471          * Once we've had a clean pass where we didn't find any inodes without
1472          * I_DONTCACHE, we wait for them to be freed:
1473          */
1474
1475         darray_init(&grabbed);
1476         darray_make_room(&grabbed, 1024);
1477 again:
1478         cond_resched();
1479         this_pass_clean = true;
1480
1481         mutex_lock(&c->vfs_inodes_lock);
1482         list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
1483                 if (!snapshot_list_has_id(s, inode->ei_subvol))
1484                         continue;
1485
1486                 if (!(inode->v.i_state & I_DONTCACHE) &&
1487                     !(inode->v.i_state & I_FREEING) &&
1488                     igrab(&inode->v)) {
1489                         this_pass_clean = false;
1490
1491                         if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
1492                                 iput(&inode->v);
1493                                 break;
1494                         }
1495                 } else if (clean_pass && this_pass_clean) {
1496                         wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
1497                         DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
1498
1499                         prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1500                         mutex_unlock(&c->vfs_inodes_lock);
1501
1502                         schedule();
1503                         finish_wait(wq, &wait.wq_entry);
1504                         goto again;
1505                 }
1506         }
1507         mutex_unlock(&c->vfs_inodes_lock);
1508
1509         darray_for_each(grabbed, i) {
1510                 inode = *i;
1511                 d_mark_dontcache(&inode->v);
1512                 d_prune_aliases(&inode->v);
1513                 iput(&inode->v);
1514         }
1515         grabbed.nr = 0;
1516
1517         if (!clean_pass || !this_pass_clean) {
1518                 clean_pass = this_pass_clean;
1519                 goto again;
1520         }
1521
1522         darray_exit(&grabbed);
1523 }
1524
1525 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1526 {
1527         struct super_block *sb = dentry->d_sb;
1528         struct bch_fs *c = sb->s_fs_info;
1529         struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
1530         unsigned shift = sb->s_blocksize_bits - 9;
1531         /*
1532          * this assumes inodes take up 64 bytes, which is a decent average
1533          * number:
1534          */
1535         u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1536         u64 fsid;
1537
1538         buf->f_type     = BCACHEFS_STATFS_MAGIC;
1539         buf->f_bsize    = sb->s_blocksize;
1540         buf->f_blocks   = usage.capacity >> shift;
1541         buf->f_bfree    = usage.free >> shift;
1542         buf->f_bavail   = avail_factor(usage.free) >> shift;
1543
1544         buf->f_files    = usage.nr_inodes + avail_inodes;
1545         buf->f_ffree    = avail_inodes;
1546
1547         fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
1548                le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
1549         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1550         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1551         buf->f_namelen  = BCH_NAME_MAX;
1552
1553         return 0;
1554 }
1555
1556 static int bch2_sync_fs(struct super_block *sb, int wait)
1557 {
1558         struct bch_fs *c = sb->s_fs_info;
1559         int ret;
1560
1561         if (c->opts.journal_flush_disabled)
1562                 return 0;
1563
1564         if (!wait) {
1565                 bch2_journal_flush_async(&c->journal, NULL);
1566                 return 0;
1567         }
1568
1569         ret = bch2_journal_flush(&c->journal);
1570         return bch2_err_class(ret);
1571 }
1572
1573 static struct bch_fs *bch2_path_to_fs(const char *path)
1574 {
1575         struct bch_fs *c;
1576         dev_t dev;
1577         int ret;
1578
1579         ret = lookup_bdev(path, &dev);
1580         if (ret)
1581                 return ERR_PTR(ret);
1582
1583         c = bch2_dev_to_fs(dev);
1584         if (c)
1585                 closure_put(&c->cl);
1586         return c ?: ERR_PTR(-ENOENT);
1587 }
1588
1589 static char **split_devs(const char *_dev_name, unsigned *nr)
1590 {
1591         char *dev_name = NULL, **devs = NULL, *s;
1592         size_t i, nr_devs = 0;
1593
1594         dev_name = kstrdup(_dev_name, GFP_KERNEL);
1595         if (!dev_name)
1596                 return NULL;
1597
1598         for (s = dev_name; s; s = strchr(s + 1, ':'))
1599                 nr_devs++;
1600
1601         devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
1602         if (!devs) {
1603                 kfree(dev_name);
1604                 return NULL;
1605         }
1606
1607         for (i = 0, s = dev_name;
1608              s;
1609              (s = strchr(s, ':')) && (*s++ = '\0'))
1610                 devs[i++] = s;
1611
1612         *nr = nr_devs;
1613         return devs;
1614 }
1615
1616 static int bch2_remount(struct super_block *sb, int *flags, char *data)
1617 {
1618         struct bch_fs *c = sb->s_fs_info;
1619         struct bch_opts opts = bch2_opts_empty();
1620         int ret;
1621
1622         opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
1623
1624         ret = bch2_parse_mount_opts(c, &opts, data);
1625         if (ret)
1626                 goto err;
1627
1628         if (opts.read_only != c->opts.read_only) {
1629                 down_write(&c->state_lock);
1630
1631                 if (opts.read_only) {
1632                         bch2_fs_read_only(c);
1633
1634                         sb->s_flags |= SB_RDONLY;
1635                 } else {
1636                         ret = bch2_fs_read_write(c);
1637                         if (ret) {
1638                                 bch_err(c, "error going rw: %i", ret);
1639                                 up_write(&c->state_lock);
1640                                 ret = -EINVAL;
1641                                 goto err;
1642                         }
1643
1644                         sb->s_flags &= ~SB_RDONLY;
1645                 }
1646
1647                 c->opts.read_only = opts.read_only;
1648
1649                 up_write(&c->state_lock);
1650         }
1651
1652         if (opts.errors >= 0)
1653                 c->opts.errors = opts.errors;
1654 err:
1655         return bch2_err_class(ret);
1656 }
1657
1658 static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
1659 {
1660         struct bch_fs *c = root->d_sb->s_fs_info;
1661         struct bch_dev *ca;
1662         unsigned i;
1663         bool first = true;
1664
1665         for_each_online_member(ca, c, i) {
1666                 if (!first)
1667                         seq_putc(seq, ':');
1668                 first = false;
1669                 seq_puts(seq, "/dev/");
1670                 seq_puts(seq, ca->name);
1671         }
1672
1673         return 0;
1674 }
1675
1676 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1677 {
1678         struct bch_fs *c = root->d_sb->s_fs_info;
1679         enum bch_opt_id i;
1680         struct printbuf buf = PRINTBUF;
1681         int ret = 0;
1682
1683         for (i = 0; i < bch2_opts_nr; i++) {
1684                 const struct bch_option *opt = &bch2_opt_table[i];
1685                 u64 v = bch2_opt_get_by_id(&c->opts, i);
1686
1687                 if (!(opt->flags & OPT_MOUNT))
1688                         continue;
1689
1690                 if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1691                         continue;
1692
1693                 printbuf_reset(&buf);
1694                 bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
1695                                  OPT_SHOW_MOUNT_STYLE);
1696                 seq_putc(seq, ',');
1697                 seq_puts(seq, buf.buf);
1698         }
1699
1700         if (buf.allocation_failure)
1701                 ret = -ENOMEM;
1702         printbuf_exit(&buf);
1703         return ret;
1704 }
1705
1706 static void bch2_put_super(struct super_block *sb)
1707 {
1708         struct bch_fs *c = sb->s_fs_info;
1709
1710         __bch2_fs_stop(c);
1711 }
1712
1713 static const struct super_operations bch_super_operations = {
1714         .alloc_inode    = bch2_alloc_inode,
1715         .destroy_inode  = bch2_destroy_inode,
1716         .write_inode    = bch2_vfs_write_inode,
1717         .evict_inode    = bch2_evict_inode,
1718         .sync_fs        = bch2_sync_fs,
1719         .statfs         = bch2_statfs,
1720         .show_devname   = bch2_show_devname,
1721         .show_options   = bch2_show_options,
1722         .remount_fs     = bch2_remount,
1723         .put_super      = bch2_put_super,
1724 #if 0
1725         .freeze_fs      = bch2_freeze,
1726         .unfreeze_fs    = bch2_unfreeze,
1727 #endif
1728 };
1729
1730 static int bch2_set_super(struct super_block *s, void *data)
1731 {
1732         s->s_fs_info = data;
1733         return 0;
1734 }
1735
1736 static int bch2_noset_super(struct super_block *s, void *data)
1737 {
1738         return -EBUSY;
1739 }
1740
1741 static int bch2_test_super(struct super_block *s, void *data)
1742 {
1743         struct bch_fs *c = s->s_fs_info;
1744         struct bch_fs **devs = data;
1745         unsigned i;
1746
1747         if (!c)
1748                 return false;
1749
1750         for (i = 0; devs[i]; i++)
1751                 if (c != devs[i])
1752                         return false;
1753         return true;
1754 }
1755
1756 static struct dentry *bch2_mount(struct file_system_type *fs_type,
1757                                  int flags, const char *dev_name, void *data)
1758 {
1759         struct bch_fs *c;
1760         struct bch_dev *ca;
1761         struct super_block *sb;
1762         struct inode *vinode;
1763         struct bch_opts opts = bch2_opts_empty();
1764         char **devs;
1765         struct bch_fs **devs_to_fs = NULL;
1766         unsigned i, nr_devs;
1767         int ret;
1768
1769         opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
1770
1771         ret = bch2_parse_mount_opts(NULL, &opts, data);
1772         if (ret)
1773                 return ERR_PTR(ret);
1774
1775         if (!dev_name || strlen(dev_name) == 0)
1776                 return ERR_PTR(-EINVAL);
1777
1778         devs = split_devs(dev_name, &nr_devs);
1779         if (!devs)
1780                 return ERR_PTR(-ENOMEM);
1781
1782         devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
1783         if (!devs_to_fs) {
1784                 sb = ERR_PTR(-ENOMEM);
1785                 goto got_sb;
1786         }
1787
1788         for (i = 0; i < nr_devs; i++)
1789                 devs_to_fs[i] = bch2_path_to_fs(devs[i]);
1790
1791         sb = sget(fs_type, bch2_test_super, bch2_noset_super,
1792                   flags|SB_NOSEC, devs_to_fs);
1793         if (!IS_ERR(sb))
1794                 goto got_sb;
1795
1796         c = bch2_fs_open(devs, nr_devs, opts);
1797         if (IS_ERR(c)) {
1798                 sb = ERR_CAST(c);
1799                 goto got_sb;
1800         }
1801
1802         /* Some options can't be parsed until after the fs is started: */
1803         ret = bch2_parse_mount_opts(c, &opts, data);
1804         if (ret) {
1805                 bch2_fs_stop(c);
1806                 sb = ERR_PTR(ret);
1807                 goto got_sb;
1808         }
1809
1810         bch2_opts_apply(&c->opts, opts);
1811
1812         sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
1813         if (IS_ERR(sb))
1814                 bch2_fs_stop(c);
1815 got_sb:
1816         kfree(devs_to_fs);
1817         kfree(devs[0]);
1818         kfree(devs);
1819
1820         if (IS_ERR(sb)) {
1821                 ret = PTR_ERR(sb);
1822                 ret = bch2_err_class(ret);
1823                 return ERR_PTR(ret);
1824         }
1825
1826         c = sb->s_fs_info;
1827
1828         if (sb->s_root) {
1829                 if ((flags ^ sb->s_flags) & SB_RDONLY) {
1830                         ret = -EBUSY;
1831                         goto err_put_super;
1832                 }
1833                 goto out;
1834         }
1835
1836         sb->s_blocksize         = block_bytes(c);
1837         sb->s_blocksize_bits    = ilog2(block_bytes(c));
1838         sb->s_maxbytes          = MAX_LFS_FILESIZE;
1839         sb->s_op                = &bch_super_operations;
1840         sb->s_export_op         = &bch_export_ops;
1841 #ifdef CONFIG_BCACHEFS_QUOTA
1842         sb->s_qcop              = &bch2_quotactl_operations;
1843         sb->s_quota_types       = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
1844 #endif
1845         sb->s_xattr             = bch2_xattr_handlers;
1846         sb->s_magic             = BCACHEFS_STATFS_MAGIC;
1847         sb->s_time_gran         = c->sb.nsec_per_time_unit;
1848         sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
1849         sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
1850         c->vfs_sb               = sb;
1851         strscpy(sb->s_id, c->name, sizeof(sb->s_id));
1852
1853         ret = super_setup_bdi(sb);
1854         if (ret)
1855                 goto err_put_super;
1856
1857         sb->s_bdi->ra_pages             = VM_READAHEAD_PAGES;
1858
1859         for_each_online_member(ca, c, i) {
1860                 struct block_device *bdev = ca->disk_sb.bdev;
1861
1862                 /* XXX: create an anonymous device for multi device filesystems */
1863                 sb->s_bdev      = bdev;
1864                 sb->s_dev       = bdev->bd_dev;
1865                 percpu_ref_put(&ca->io_ref);
1866                 break;
1867         }
1868
1869         c->dev = sb->s_dev;
1870
1871 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1872         if (c->opts.acl)
1873                 sb->s_flags     |= SB_POSIXACL;
1874 #endif
1875
1876         sb->s_shrink.seeks = 0;
1877
1878         vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
1879         ret = PTR_ERR_OR_ZERO(vinode);
1880         if (ret) {
1881                 bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
1882                 goto err_put_super;
1883         }
1884
1885         sb->s_root = d_make_root(vinode);
1886         if (!sb->s_root) {
1887                 bch_err(c, "error mounting: error allocating root dentry");
1888                 ret = -ENOMEM;
1889                 goto err_put_super;
1890         }
1891
1892         sb->s_flags |= SB_ACTIVE;
1893 out:
1894         return dget(sb->s_root);
1895
1896 err_put_super:
1897         deactivate_locked_super(sb);
1898         return ERR_PTR(bch2_err_class(ret));
1899 }
1900
1901 static void bch2_kill_sb(struct super_block *sb)
1902 {
1903         struct bch_fs *c = sb->s_fs_info;
1904
1905         generic_shutdown_super(sb);
1906         bch2_fs_free(c);
1907 }
1908
1909 static struct file_system_type bcache_fs_type = {
1910         .owner          = THIS_MODULE,
1911         .name           = "bcachefs",
1912         .mount          = bch2_mount,
1913         .kill_sb        = bch2_kill_sb,
1914         .fs_flags       = FS_REQUIRES_DEV,
1915 };
1916
1917 MODULE_ALIAS_FS("bcachefs");
1918
1919 void bch2_vfs_exit(void)
1920 {
1921         unregister_filesystem(&bcache_fs_type);
1922         kmem_cache_destroy(bch2_inode_cache);
1923 }
1924
1925 int __init bch2_vfs_init(void)
1926 {
1927         int ret = -ENOMEM;
1928
1929         bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
1930         if (!bch2_inode_cache)
1931                 goto err;
1932
1933         ret = register_filesystem(&bcache_fs_type);
1934         if (ret)
1935                 goto err;
1936
1937         return 0;
1938 err:
1939         bch2_vfs_exit();
1940         return ret;
1941 }
1942
1943 #endif /* NO_BCACHEFS_FS */