]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fs.c
Update bcachefs sources to 6a20aede29 bcachefs: Fix quotas + snapshots
[bcachefs-tools-debian] / libbcachefs / fs.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3
4 #include "bcachefs.h"
5 #include "acl.h"
6 #include "bkey_buf.h"
7 #include "btree_update.h"
8 #include "buckets.h"
9 #include "chardev.h"
10 #include "dirent.h"
11 #include "errcode.h"
12 #include "extents.h"
13 #include "fs.h"
14 #include "fs-common.h"
15 #include "fs-io.h"
16 #include "fs-ioctl.h"
17 #include "fsck.h"
18 #include "inode.h"
19 #include "io.h"
20 #include "journal.h"
21 #include "keylist.h"
22 #include "quota.h"
23 #include "super.h"
24 #include "xattr.h"
25
26 #include <linux/aio.h>
27 #include <linux/backing-dev.h>
28 #include <linux/exportfs.h>
29 #include <linux/fiemap.h>
30 #include <linux/module.h>
31 #include <linux/pagemap.h>
32 #include <linux/posix_acl.h>
33 #include <linux/random.h>
34 #include <linux/seq_file.h>
35 #include <linux/statfs.h>
36 #include <linux/string.h>
37 #include <linux/xattr.h>
38
39 static struct kmem_cache *bch2_inode_cache;
40
41 static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
42                                 struct bch_inode_info *,
43                                 struct bch_inode_unpacked *,
44                                 struct bch_subvolume *);
45
46 void bch2_inode_update_after_write(struct btree_trans *trans,
47                                    struct bch_inode_info *inode,
48                                    struct bch_inode_unpacked *bi,
49                                    unsigned fields)
50 {
51         struct bch_fs *c = trans->c;
52
53         BUG_ON(bi->bi_inum != inode->v.i_ino);
54
55         bch2_assert_pos_locked(trans, BTREE_ID_inodes,
56                                POS(0, bi->bi_inum),
57                                c->opts.inodes_use_key_cache);
58
59         set_nlink(&inode->v, bch2_inode_nlink_get(bi));
60         i_uid_write(&inode->v, bi->bi_uid);
61         i_gid_write(&inode->v, bi->bi_gid);
62         inode->v.i_mode = bi->bi_mode;
63
64         if (fields & ATTR_ATIME)
65                 inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
66         if (fields & ATTR_MTIME)
67                 inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
68         if (fields & ATTR_CTIME)
69                 inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
70
71         inode->ei_inode         = *bi;
72
73         bch2_inode_flags_to_vfs(inode);
74 }
75
76 int __must_check bch2_write_inode(struct bch_fs *c,
77                                   struct bch_inode_info *inode,
78                                   inode_set_fn set,
79                                   void *p, unsigned fields)
80 {
81         struct btree_trans trans;
82         struct btree_iter iter = { NULL };
83         struct bch_inode_unpacked inode_u;
84         int ret;
85
86         bch2_trans_init(&trans, c, 0, 512);
87 retry:
88         bch2_trans_begin(&trans);
89
90         ret   = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
91                                 BTREE_ITER_INTENT) ?:
92                 (set ? set(inode, &inode_u, p) : 0) ?:
93                 bch2_inode_write(&trans, &iter, &inode_u) ?:
94                 bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
95
96         /*
97          * the btree node lock protects inode->ei_inode, not ei_update_lock;
98          * this is important for inode updates via bchfs_write_index_update
99          */
100         if (!ret)
101                 bch2_inode_update_after_write(&trans, inode, &inode_u, fields);
102
103         bch2_trans_iter_exit(&trans, &iter);
104
105         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
106                 goto retry;
107
108         bch2_fs_fatal_err_on(ret == -ENOENT, c,
109                              "inode %u:%llu not found when updating",
110                              inode_inum(inode).subvol,
111                              inode_inum(inode).inum);
112
113         bch2_trans_exit(&trans);
114         return ret < 0 ? ret : 0;
115 }
116
117 int bch2_fs_quota_transfer(struct bch_fs *c,
118                            struct bch_inode_info *inode,
119                            struct bch_qid new_qid,
120                            unsigned qtypes,
121                            enum quota_acct_mode mode)
122 {
123         unsigned i;
124         int ret;
125
126         qtypes &= enabled_qtypes(c);
127
128         for (i = 0; i < QTYP_NR; i++)
129                 if (new_qid.q[i] == inode->ei_qid.q[i])
130                         qtypes &= ~(1U << i);
131
132         if (!qtypes)
133                 return 0;
134
135         mutex_lock(&inode->ei_quota_lock);
136
137         ret = bch2_quota_transfer(c, qtypes, new_qid,
138                                   inode->ei_qid,
139                                   inode->v.i_blocks +
140                                   inode->ei_quota_reserved,
141                                   mode);
142         if (!ret)
143                 for (i = 0; i < QTYP_NR; i++)
144                         if (qtypes & (1 << i))
145                                 inode->ei_qid.q[i] = new_qid.q[i];
146
147         mutex_unlock(&inode->ei_quota_lock);
148
149         return ret;
150 }
151
152 static int bch2_iget5_test(struct inode *vinode, void *p)
153 {
154         struct bch_inode_info *inode = to_bch_ei(vinode);
155         subvol_inum *inum = p;
156
157         return inode->ei_subvol == inum->subvol &&
158                 inode->ei_inode.bi_inum == inum->inum;
159 }
160
161 static int bch2_iget5_set(struct inode *vinode, void *p)
162 {
163         struct bch_inode_info *inode = to_bch_ei(vinode);
164         subvol_inum *inum = p;
165
166         inode->v.i_ino          = inum->inum;
167         inode->ei_subvol        = inum->subvol;
168         inode->ei_inode.bi_inum = inum->inum;
169         return 0;
170 }
171
172 static unsigned bch2_inode_hash(subvol_inum inum)
173 {
174         return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
175 }
176
177 struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
178 {
179         struct bch_inode_unpacked inode_u;
180         struct bch_inode_info *inode;
181         struct btree_trans trans;
182         struct bch_subvolume subvol;
183         int ret;
184
185         inode = to_bch_ei(iget5_locked(c->vfs_sb,
186                                        bch2_inode_hash(inum),
187                                        bch2_iget5_test,
188                                        bch2_iget5_set,
189                                        &inum));
190         if (unlikely(!inode))
191                 return ERR_PTR(-ENOMEM);
192         if (!(inode->v.i_state & I_NEW))
193                 return &inode->v;
194
195         bch2_trans_init(&trans, c, 8, 0);
196         ret = lockrestart_do(&trans,
197                 bch2_subvolume_get(&trans, inum.subvol, true, 0, &subvol) ?:
198                 bch2_inode_find_by_inum_trans(&trans, inum, &inode_u));
199
200         if (!ret)
201                 bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
202         bch2_trans_exit(&trans);
203
204         if (ret) {
205                 iget_failed(&inode->v);
206                 return ERR_PTR(ret);
207         }
208
209         mutex_lock(&c->vfs_inodes_lock);
210         list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
211         mutex_unlock(&c->vfs_inodes_lock);
212
213         unlock_new_inode(&inode->v);
214
215         return &inode->v;
216 }
217
218 struct bch_inode_info *
219 __bch2_create(struct mnt_idmap *idmap,
220               struct bch_inode_info *dir, struct dentry *dentry,
221               umode_t mode, dev_t rdev, subvol_inum snapshot_src,
222               unsigned flags)
223 {
224         struct bch_fs *c = dir->v.i_sb->s_fs_info;
225         struct btree_trans trans;
226         struct bch_inode_unpacked dir_u;
227         struct bch_inode_info *inode, *old;
228         struct bch_inode_unpacked inode_u;
229         struct posix_acl *default_acl = NULL, *acl = NULL;
230         subvol_inum inum;
231         struct bch_subvolume subvol;
232         u64 journal_seq = 0;
233         int ret;
234
235         /*
236          * preallocate acls + vfs inode before btree transaction, so that
237          * nothing can fail after the transaction succeeds:
238          */
239 #ifdef CONFIG_BCACHEFS_POSIX_ACL
240         ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
241         if (ret)
242                 return ERR_PTR(ret);
243 #endif
244         inode = to_bch_ei(new_inode(c->vfs_sb));
245         if (unlikely(!inode)) {
246                 inode = ERR_PTR(-ENOMEM);
247                 goto err;
248         }
249
250         bch2_inode_init_early(c, &inode_u);
251
252         if (!(flags & BCH_CREATE_TMPFILE))
253                 mutex_lock(&dir->ei_update_lock);
254
255         bch2_trans_init(&trans, c, 8,
256                         2048 + (!(flags & BCH_CREATE_TMPFILE)
257                                 ? dentry->d_name.len : 0));
258 retry:
259         bch2_trans_begin(&trans);
260
261         ret   = bch2_create_trans(&trans,
262                                   inode_inum(dir), &dir_u, &inode_u,
263                                   !(flags & BCH_CREATE_TMPFILE)
264                                   ? &dentry->d_name : NULL,
265                                   from_kuid(i_user_ns(&dir->v), current_fsuid()),
266                                   from_kgid(i_user_ns(&dir->v), current_fsgid()),
267                                   mode, rdev,
268                                   default_acl, acl, snapshot_src, flags) ?:
269                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
270                                 KEY_TYPE_QUOTA_PREALLOC);
271         if (unlikely(ret))
272                 goto err_before_quota;
273
274         inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
275         inum.inum = inode_u.bi_inum;
276
277         ret   = bch2_subvolume_get(&trans, inum.subvol, true,
278                                    BTREE_ITER_WITH_UPDATES, &subvol) ?:
279                 bch2_trans_commit(&trans, NULL, &journal_seq, 0);
280         if (unlikely(ret)) {
281                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
282                                 KEY_TYPE_QUOTA_WARN);
283 err_before_quota:
284                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
285                         goto retry;
286                 goto err_trans;
287         }
288
289         if (!(flags & BCH_CREATE_TMPFILE)) {
290                 bch2_inode_update_after_write(&trans, dir, &dir_u,
291                                               ATTR_MTIME|ATTR_CTIME);
292                 mutex_unlock(&dir->ei_update_lock);
293         }
294
295         bch2_iget5_set(&inode->v, &inum);
296         bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
297
298         set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
299         set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
300
301         /*
302          * we must insert the new inode into the inode cache before calling
303          * bch2_trans_exit() and dropping locks, else we could race with another
304          * thread pulling the inode in and modifying it:
305          */
306
307         inode->v.i_state |= I_CREATING;
308
309         old = to_bch_ei(inode_insert5(&inode->v,
310                                       bch2_inode_hash(inum),
311                                       bch2_iget5_test,
312                                       bch2_iget5_set,
313                                       &inum));
314         BUG_ON(!old);
315
316         if (unlikely(old != inode)) {
317                 /*
318                  * We raced, another process pulled the new inode into cache
319                  * before us:
320                  */
321                 make_bad_inode(&inode->v);
322                 iput(&inode->v);
323
324                 inode = old;
325         } else {
326                 mutex_lock(&c->vfs_inodes_lock);
327                 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
328                 mutex_unlock(&c->vfs_inodes_lock);
329                 /*
330                  * we really don't want insert_inode_locked2() to be setting
331                  * I_NEW...
332                  */
333                 unlock_new_inode(&inode->v);
334         }
335
336         bch2_trans_exit(&trans);
337 err:
338         posix_acl_release(default_acl);
339         posix_acl_release(acl);
340         return inode;
341 err_trans:
342         if (!(flags & BCH_CREATE_TMPFILE))
343                 mutex_unlock(&dir->ei_update_lock);
344
345         bch2_trans_exit(&trans);
346         make_bad_inode(&inode->v);
347         iput(&inode->v);
348         inode = ERR_PTR(ret);
349         goto err;
350 }
351
352 /* methods */
353
354 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
355                                   unsigned int flags)
356 {
357         struct bch_fs *c = vdir->i_sb->s_fs_info;
358         struct bch_inode_info *dir = to_bch_ei(vdir);
359         struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
360         struct inode *vinode = NULL;
361         subvol_inum inum = { .subvol = 1 };
362         int ret;
363
364         ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
365                                  &dentry->d_name, &inum);
366
367         if (!ret)
368                 vinode = bch2_vfs_inode_get(c, inum);
369
370         return d_splice_alias(vinode, dentry);
371 }
372
373 static int bch2_mknod(struct mnt_idmap *idmap,
374                       struct inode *vdir, struct dentry *dentry,
375                       umode_t mode, dev_t rdev)
376 {
377         struct bch_inode_info *inode =
378                 __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
379                               (subvol_inum) { 0 }, 0);
380
381         if (IS_ERR(inode))
382                 return bch2_err_class(PTR_ERR(inode));
383
384         d_instantiate(dentry, &inode->v);
385         return 0;
386 }
387
388 static int bch2_create(struct mnt_idmap *idmap,
389                        struct inode *vdir, struct dentry *dentry,
390                        umode_t mode, bool excl)
391 {
392         return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
393 }
394
395 static int __bch2_link(struct bch_fs *c,
396                        struct bch_inode_info *inode,
397                        struct bch_inode_info *dir,
398                        struct dentry *dentry)
399 {
400         struct btree_trans trans;
401         struct bch_inode_unpacked dir_u, inode_u;
402         int ret;
403
404         mutex_lock(&inode->ei_update_lock);
405         bch2_trans_init(&trans, c, 4, 1024);
406
407         ret = commit_do(&trans, NULL, NULL, 0,
408                         bch2_link_trans(&trans,
409                                         inode_inum(dir),   &dir_u,
410                                         inode_inum(inode), &inode_u,
411                                         &dentry->d_name));
412
413         if (likely(!ret)) {
414                 bch2_inode_update_after_write(&trans, dir, &dir_u,
415                                               ATTR_MTIME|ATTR_CTIME);
416                 bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME);
417         }
418
419         bch2_trans_exit(&trans);
420         mutex_unlock(&inode->ei_update_lock);
421         return ret;
422 }
423
424 static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
425                      struct dentry *dentry)
426 {
427         struct bch_fs *c = vdir->i_sb->s_fs_info;
428         struct bch_inode_info *dir = to_bch_ei(vdir);
429         struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
430         int ret;
431
432         lockdep_assert_held(&inode->v.i_rwsem);
433
434         ret = __bch2_link(c, inode, dir, dentry);
435         if (unlikely(ret))
436                 return ret;
437
438         ihold(&inode->v);
439         d_instantiate(dentry, &inode->v);
440         return 0;
441 }
442
443 int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
444                   bool deleting_snapshot)
445 {
446         struct bch_fs *c = vdir->i_sb->s_fs_info;
447         struct bch_inode_info *dir = to_bch_ei(vdir);
448         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
449         struct bch_inode_unpacked dir_u, inode_u;
450         struct btree_trans trans;
451         int ret;
452
453         bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
454         bch2_trans_init(&trans, c, 4, 1024);
455
456         ret = commit_do(&trans, NULL, NULL,
457                         BTREE_INSERT_NOFAIL,
458                 bch2_unlink_trans(&trans,
459                                   inode_inum(dir), &dir_u,
460                                   &inode_u, &dentry->d_name,
461                                   deleting_snapshot));
462         if (unlikely(ret))
463                 goto err;
464
465         bch2_inode_update_after_write(&trans, dir, &dir_u,
466                                       ATTR_MTIME|ATTR_CTIME);
467         bch2_inode_update_after_write(&trans, inode, &inode_u,
468                                       ATTR_MTIME);
469
470         if (inode_u.bi_subvol) {
471                 /*
472                  * Subvolume deletion is asynchronous, but we still want to tell
473                  * the VFS that it's been deleted here:
474                  */
475                 set_nlink(&inode->v, 0);
476         }
477 err:
478         bch2_trans_exit(&trans);
479         bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
480
481         return ret;
482 }
483
484 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
485 {
486         return __bch2_unlink(vdir, dentry, false);
487 }
488
489 static int bch2_symlink(struct mnt_idmap *idmap,
490                         struct inode *vdir, struct dentry *dentry,
491                         const char *symname)
492 {
493         struct bch_fs *c = vdir->i_sb->s_fs_info;
494         struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
495         int ret;
496
497         inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
498                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
499         if (IS_ERR(inode))
500                 return bch2_err_class(PTR_ERR(inode));
501
502         inode_lock(&inode->v);
503         ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
504         inode_unlock(&inode->v);
505
506         if (unlikely(ret))
507                 goto err;
508
509         ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
510         if (unlikely(ret))
511                 goto err;
512
513         ret = __bch2_link(c, inode, dir, dentry);
514         if (unlikely(ret))
515                 goto err;
516
517         d_instantiate(dentry, &inode->v);
518         return 0;
519 err:
520         iput(&inode->v);
521         return ret;
522 }
523
524 static int bch2_mkdir(struct mnt_idmap *idmap,
525                       struct inode *vdir, struct dentry *dentry, umode_t mode)
526 {
527         return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
528 }
529
530 static int bch2_rename2(struct mnt_idmap *idmap,
531                         struct inode *src_vdir, struct dentry *src_dentry,
532                         struct inode *dst_vdir, struct dentry *dst_dentry,
533                         unsigned flags)
534 {
535         struct bch_fs *c = src_vdir->i_sb->s_fs_info;
536         struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
537         struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
538         struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
539         struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
540         struct bch_inode_unpacked dst_dir_u, src_dir_u;
541         struct bch_inode_unpacked src_inode_u, dst_inode_u;
542         struct btree_trans trans;
543         enum bch_rename_mode mode = flags & RENAME_EXCHANGE
544                 ? BCH_RENAME_EXCHANGE
545                 : dst_dentry->d_inode
546                 ? BCH_RENAME_OVERWRITE : BCH_RENAME;
547         int ret;
548
549         if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
550                 return -EINVAL;
551
552         if (mode == BCH_RENAME_OVERWRITE) {
553                 ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
554                                                    0, LLONG_MAX);
555                 if (ret)
556                         return ret;
557         }
558
559         bch2_trans_init(&trans, c, 8, 2048);
560
561         bch2_lock_inodes(INODE_UPDATE_LOCK,
562                          src_dir,
563                          dst_dir,
564                          src_inode,
565                          dst_inode);
566
567         if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
568                 ret = bch2_fs_quota_transfer(c, src_inode,
569                                              dst_dir->ei_qid,
570                                              1 << QTYP_PRJ,
571                                              KEY_TYPE_QUOTA_PREALLOC);
572                 if (ret)
573                         goto err;
574         }
575
576         if (mode == BCH_RENAME_EXCHANGE &&
577             inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
578                 ret = bch2_fs_quota_transfer(c, dst_inode,
579                                              src_dir->ei_qid,
580                                              1 << QTYP_PRJ,
581                                              KEY_TYPE_QUOTA_PREALLOC);
582                 if (ret)
583                         goto err;
584         }
585
586         ret = commit_do(&trans, NULL, NULL, 0,
587                         bch2_rename_trans(&trans,
588                                           inode_inum(src_dir), &src_dir_u,
589                                           inode_inum(dst_dir), &dst_dir_u,
590                                           &src_inode_u,
591                                           &dst_inode_u,
592                                           &src_dentry->d_name,
593                                           &dst_dentry->d_name,
594                                           mode));
595         if (unlikely(ret))
596                 goto err;
597
598         BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
599         BUG_ON(dst_inode &&
600                dst_inode->v.i_ino != dst_inode_u.bi_inum);
601
602         bch2_inode_update_after_write(&trans, src_dir, &src_dir_u,
603                                       ATTR_MTIME|ATTR_CTIME);
604
605         if (src_dir != dst_dir)
606                 bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u,
607                                               ATTR_MTIME|ATTR_CTIME);
608
609         bch2_inode_update_after_write(&trans, src_inode, &src_inode_u,
610                                       ATTR_CTIME);
611
612         if (dst_inode)
613                 bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u,
614                                               ATTR_CTIME);
615 err:
616         bch2_trans_exit(&trans);
617
618         bch2_fs_quota_transfer(c, src_inode,
619                                bch_qid(&src_inode->ei_inode),
620                                1 << QTYP_PRJ,
621                                KEY_TYPE_QUOTA_NOCHECK);
622         if (dst_inode)
623                 bch2_fs_quota_transfer(c, dst_inode,
624                                        bch_qid(&dst_inode->ei_inode),
625                                        1 << QTYP_PRJ,
626                                        KEY_TYPE_QUOTA_NOCHECK);
627
628         bch2_unlock_inodes(INODE_UPDATE_LOCK,
629                            src_dir,
630                            dst_dir,
631                            src_inode,
632                            dst_inode);
633
634         return ret;
635 }
636
637 static void bch2_setattr_copy(struct mnt_idmap *idmap,
638                               struct bch_inode_info *inode,
639                               struct bch_inode_unpacked *bi,
640                               struct iattr *attr)
641 {
642         struct bch_fs *c = inode->v.i_sb->s_fs_info;
643         unsigned int ia_valid = attr->ia_valid;
644
645         if (ia_valid & ATTR_UID)
646                 bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
647         if (ia_valid & ATTR_GID)
648                 bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
649
650         if (ia_valid & ATTR_SIZE)
651                 bi->bi_size = attr->ia_size;
652
653         if (ia_valid & ATTR_ATIME)
654                 bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
655         if (ia_valid & ATTR_MTIME)
656                 bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
657         if (ia_valid & ATTR_CTIME)
658                 bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
659
660         if (ia_valid & ATTR_MODE) {
661                 umode_t mode = attr->ia_mode;
662                 kgid_t gid = ia_valid & ATTR_GID
663                         ? attr->ia_gid
664                         : inode->v.i_gid;
665
666                 if (!in_group_p(gid) &&
667                     !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
668                         mode &= ~S_ISGID;
669                 bi->bi_mode = mode;
670         }
671 }
672
673 int bch2_setattr_nonsize(struct mnt_idmap *idmap,
674                          struct bch_inode_info *inode,
675                          struct iattr *attr)
676 {
677         struct bch_fs *c = inode->v.i_sb->s_fs_info;
678         struct bch_qid qid;
679         struct btree_trans trans;
680         struct btree_iter inode_iter = { NULL };
681         struct bch_inode_unpacked inode_u;
682         struct posix_acl *acl = NULL;
683         int ret;
684
685         mutex_lock(&inode->ei_update_lock);
686
687         qid = inode->ei_qid;
688
689         if (attr->ia_valid & ATTR_UID)
690                 qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
691
692         if (attr->ia_valid & ATTR_GID)
693                 qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
694
695         ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
696                                      KEY_TYPE_QUOTA_PREALLOC);
697         if (ret)
698                 goto err;
699
700         bch2_trans_init(&trans, c, 0, 0);
701 retry:
702         bch2_trans_begin(&trans);
703         kfree(acl);
704         acl = NULL;
705
706         ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
707                               BTREE_ITER_INTENT);
708         if (ret)
709                 goto btree_err;
710
711         bch2_setattr_copy(idmap, inode, &inode_u, attr);
712
713         if (attr->ia_valid & ATTR_MODE) {
714                 ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
715                                      inode_u.bi_mode, &acl);
716                 if (ret)
717                         goto btree_err;
718         }
719
720         ret =   bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
721                 bch2_trans_commit(&trans, NULL, NULL,
722                                   BTREE_INSERT_NOFAIL);
723 btree_err:
724         bch2_trans_iter_exit(&trans, &inode_iter);
725
726         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
727                 goto retry;
728         if (unlikely(ret))
729                 goto err_trans;
730
731         bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid);
732
733         if (acl)
734                 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
735 err_trans:
736         bch2_trans_exit(&trans);
737 err:
738         mutex_unlock(&inode->ei_update_lock);
739
740         return bch2_err_class(ret);
741 }
742
743 static int bch2_getattr(struct mnt_idmap *idmap,
744                         const struct path *path, struct kstat *stat,
745                         u32 request_mask, unsigned query_flags)
746 {
747         struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
748         struct bch_fs *c = inode->v.i_sb->s_fs_info;
749
750         stat->dev       = inode->v.i_sb->s_dev;
751         stat->ino       = inode->v.i_ino;
752         stat->mode      = inode->v.i_mode;
753         stat->nlink     = inode->v.i_nlink;
754         stat->uid       = inode->v.i_uid;
755         stat->gid       = inode->v.i_gid;
756         stat->rdev      = inode->v.i_rdev;
757         stat->size      = i_size_read(&inode->v);
758         stat->atime     = inode->v.i_atime;
759         stat->mtime     = inode->v.i_mtime;
760         stat->ctime     = inode->v.i_ctime;
761         stat->blksize   = block_bytes(c);
762         stat->blocks    = inode->v.i_blocks;
763
764         if (request_mask & STATX_BTIME) {
765                 stat->result_mask |= STATX_BTIME;
766                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
767         }
768
769         if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
770                 stat->attributes |= STATX_ATTR_IMMUTABLE;
771         stat->attributes_mask    |= STATX_ATTR_IMMUTABLE;
772
773         if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
774                 stat->attributes |= STATX_ATTR_APPEND;
775         stat->attributes_mask    |= STATX_ATTR_APPEND;
776
777         if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
778                 stat->attributes |= STATX_ATTR_NODUMP;
779         stat->attributes_mask    |= STATX_ATTR_NODUMP;
780
781         return 0;
782 }
783
784 static int bch2_setattr(struct mnt_idmap *idmap,
785                         struct dentry *dentry, struct iattr *iattr)
786 {
787         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
788         int ret;
789
790         lockdep_assert_held(&inode->v.i_rwsem);
791
792         ret = setattr_prepare(idmap, dentry, iattr);
793         if (ret)
794                 return ret;
795
796         return iattr->ia_valid & ATTR_SIZE
797                 ? bch2_truncate(idmap, inode, iattr)
798                 : bch2_setattr_nonsize(idmap, inode, iattr);
799 }
800
801 static int bch2_tmpfile(struct mnt_idmap *idmap,
802                         struct inode *vdir, struct file *file, umode_t mode)
803 {
804         struct bch_inode_info *inode =
805                 __bch2_create(idmap, to_bch_ei(vdir),
806                               file->f_path.dentry, mode, 0,
807                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
808
809         if (IS_ERR(inode))
810                 return bch2_err_class(PTR_ERR(inode));
811
812         d_mark_tmpfile(file, &inode->v);
813         d_instantiate(file->f_path.dentry, &inode->v);
814         return finish_open_simple(file, 0);
815 }
816
817 static int bch2_fill_extent(struct bch_fs *c,
818                             struct fiemap_extent_info *info,
819                             struct bkey_s_c k, unsigned flags)
820 {
821         if (bkey_extent_is_direct_data(k.k)) {
822                 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
823                 const union bch_extent_entry *entry;
824                 struct extent_ptr_decoded p;
825                 int ret;
826
827                 if (k.k->type == KEY_TYPE_reflink_v)
828                         flags |= FIEMAP_EXTENT_SHARED;
829
830                 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
831                         int flags2 = 0;
832                         u64 offset = p.ptr.offset;
833
834                         if (p.ptr.unwritten)
835                                 flags2 |= FIEMAP_EXTENT_UNWRITTEN;
836
837                         if (p.crc.compression_type)
838                                 flags2 |= FIEMAP_EXTENT_ENCODED;
839                         else
840                                 offset += p.crc.offset;
841
842                         if ((offset & (block_sectors(c) - 1)) ||
843                             (k.k->size & (block_sectors(c) - 1)))
844                                 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
845
846                         ret = fiemap_fill_next_extent(info,
847                                                 bkey_start_offset(k.k) << 9,
848                                                 offset << 9,
849                                                 k.k->size << 9, flags|flags2);
850                         if (ret)
851                                 return ret;
852                 }
853
854                 return 0;
855         } else if (bkey_extent_is_inline_data(k.k)) {
856                 return fiemap_fill_next_extent(info,
857                                                bkey_start_offset(k.k) << 9,
858                                                0, k.k->size << 9,
859                                                flags|
860                                                FIEMAP_EXTENT_DATA_INLINE);
861         } else if (k.k->type == KEY_TYPE_reservation) {
862                 return fiemap_fill_next_extent(info,
863                                                bkey_start_offset(k.k) << 9,
864                                                0, k.k->size << 9,
865                                                flags|
866                                                FIEMAP_EXTENT_DELALLOC|
867                                                FIEMAP_EXTENT_UNWRITTEN);
868         } else {
869                 BUG();
870         }
871 }
872
873 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
874                        u64 start, u64 len)
875 {
876         struct bch_fs *c = vinode->i_sb->s_fs_info;
877         struct bch_inode_info *ei = to_bch_ei(vinode);
878         struct btree_trans trans;
879         struct btree_iter iter;
880         struct bkey_s_c k;
881         struct bkey_buf cur, prev;
882         struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
883         unsigned offset_into_extent, sectors;
884         bool have_extent = false;
885         u32 snapshot;
886         int ret = 0;
887
888         ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
889         if (ret)
890                 return ret;
891
892         if (start + len < start)
893                 return -EINVAL;
894
895         start >>= 9;
896
897         bch2_bkey_buf_init(&cur);
898         bch2_bkey_buf_init(&prev);
899         bch2_trans_init(&trans, c, 0, 0);
900 retry:
901         bch2_trans_begin(&trans);
902
903         ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot);
904         if (ret)
905                 goto err;
906
907         bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
908                              SPOS(ei->v.i_ino, start, snapshot), 0);
909
910         while (!(ret = btree_trans_too_many_iters(&trans)) &&
911                (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
912                !(ret = bkey_err(k))) {
913                 enum btree_id data_btree = BTREE_ID_extents;
914
915                 if (!bkey_extent_is_data(k.k) &&
916                     k.k->type != KEY_TYPE_reservation) {
917                         bch2_btree_iter_advance(&iter);
918                         continue;
919                 }
920
921                 offset_into_extent      = iter.pos.offset -
922                         bkey_start_offset(k.k);
923                 sectors                 = k.k->size - offset_into_extent;
924
925                 bch2_bkey_buf_reassemble(&cur, c, k);
926
927                 ret = bch2_read_indirect_extent(&trans, &data_btree,
928                                         &offset_into_extent, &cur);
929                 if (ret)
930                         break;
931
932                 k = bkey_i_to_s_c(cur.k);
933                 bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
934
935                 sectors = min(sectors, k.k->size - offset_into_extent);
936
937                 bch2_cut_front(POS(k.k->p.inode,
938                                    bkey_start_offset(k.k) +
939                                    offset_into_extent),
940                                cur.k);
941                 bch2_key_resize(&cur.k->k, sectors);
942                 cur.k->k.p = iter.pos;
943                 cur.k->k.p.offset += cur.k->k.size;
944
945                 if (have_extent) {
946                         ret = bch2_fill_extent(c, info,
947                                         bkey_i_to_s_c(prev.k), 0);
948                         if (ret)
949                                 break;
950                 }
951
952                 bkey_copy(prev.k, cur.k);
953                 have_extent = true;
954
955                 bch2_btree_iter_set_pos(&iter,
956                         POS(iter.pos.inode, iter.pos.offset + sectors));
957         }
958         start = iter.pos.offset;
959         bch2_trans_iter_exit(&trans, &iter);
960 err:
961         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
962                 goto retry;
963
964         if (!ret && have_extent)
965                 ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
966                                        FIEMAP_EXTENT_LAST);
967
968         bch2_trans_exit(&trans);
969         bch2_bkey_buf_exit(&cur, c);
970         bch2_bkey_buf_exit(&prev, c);
971         return ret < 0 ? ret : 0;
972 }
973
974 static const struct vm_operations_struct bch_vm_ops = {
975         .fault          = bch2_page_fault,
976         .map_pages      = filemap_map_pages,
977         .page_mkwrite   = bch2_page_mkwrite,
978 };
979
980 static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
981 {
982         file_accessed(file);
983
984         vma->vm_ops = &bch_vm_ops;
985         return 0;
986 }
987
988 /* Directories: */
989
990 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
991 {
992         return generic_file_llseek_size(file, offset, whence,
993                                         S64_MAX, S64_MAX);
994 }
995
996 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
997 {
998         struct bch_inode_info *inode = file_bch_inode(file);
999         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1000
1001         if (!dir_emit_dots(file, ctx))
1002                 return 0;
1003
1004         return bch2_readdir(c, inode_inum(inode), ctx);
1005 }
1006
1007 static const struct file_operations bch_file_operations = {
1008         .llseek         = bch2_llseek,
1009         .read_iter      = bch2_read_iter,
1010         .write_iter     = bch2_write_iter,
1011         .mmap           = bch2_mmap,
1012         .open           = generic_file_open,
1013         .fsync          = bch2_fsync,
1014         .splice_read    = generic_file_splice_read,
1015         .splice_write   = iter_file_splice_write,
1016         .fallocate      = bch2_fallocate_dispatch,
1017         .unlocked_ioctl = bch2_fs_file_ioctl,
1018 #ifdef CONFIG_COMPAT
1019         .compat_ioctl   = bch2_compat_fs_ioctl,
1020 #endif
1021         .remap_file_range = bch2_remap_file_range,
1022 };
1023
1024 static const struct inode_operations bch_file_inode_operations = {
1025         .getattr        = bch2_getattr,
1026         .setattr        = bch2_setattr,
1027         .fiemap         = bch2_fiemap,
1028         .listxattr      = bch2_xattr_list,
1029 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1030         .get_acl        = bch2_get_acl,
1031         .set_acl        = bch2_set_acl,
1032 #endif
1033 };
1034
1035 static const struct inode_operations bch_dir_inode_operations = {
1036         .lookup         = bch2_lookup,
1037         .create         = bch2_create,
1038         .link           = bch2_link,
1039         .unlink         = bch2_unlink,
1040         .symlink        = bch2_symlink,
1041         .mkdir          = bch2_mkdir,
1042         .rmdir          = bch2_unlink,
1043         .mknod          = bch2_mknod,
1044         .rename         = bch2_rename2,
1045         .getattr        = bch2_getattr,
1046         .setattr        = bch2_setattr,
1047         .tmpfile        = bch2_tmpfile,
1048         .listxattr      = bch2_xattr_list,
1049 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1050         .get_acl        = bch2_get_acl,
1051         .set_acl        = bch2_set_acl,
1052 #endif
1053 };
1054
1055 static const struct file_operations bch_dir_file_operations = {
1056         .llseek         = bch2_dir_llseek,
1057         .read           = generic_read_dir,
1058         .iterate_shared = bch2_vfs_readdir,
1059         .fsync          = bch2_fsync,
1060         .unlocked_ioctl = bch2_fs_file_ioctl,
1061 #ifdef CONFIG_COMPAT
1062         .compat_ioctl   = bch2_compat_fs_ioctl,
1063 #endif
1064 };
1065
1066 static const struct inode_operations bch_symlink_inode_operations = {
1067         .get_link       = page_get_link,
1068         .getattr        = bch2_getattr,
1069         .setattr        = bch2_setattr,
1070         .listxattr      = bch2_xattr_list,
1071 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1072         .get_acl        = bch2_get_acl,
1073         .set_acl        = bch2_set_acl,
1074 #endif
1075 };
1076
1077 static const struct inode_operations bch_special_inode_operations = {
1078         .getattr        = bch2_getattr,
1079         .setattr        = bch2_setattr,
1080         .listxattr      = bch2_xattr_list,
1081 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1082         .get_acl        = bch2_get_acl,
1083         .set_acl        = bch2_set_acl,
1084 #endif
1085 };
1086
1087 static const struct address_space_operations bch_address_space_operations = {
1088         .read_folio     = bch2_read_folio,
1089         .writepages     = bch2_writepages,
1090         .readahead      = bch2_readahead,
1091         .dirty_folio    = filemap_dirty_folio,
1092         .write_begin    = bch2_write_begin,
1093         .write_end      = bch2_write_end,
1094         .invalidate_folio = bch2_invalidate_folio,
1095         .release_folio  = bch2_release_folio,
1096         .direct_IO      = noop_direct_IO,
1097 #ifdef CONFIG_MIGRATION
1098         .migrate_folio  = filemap_migrate_folio,
1099 #endif
1100         .error_remove_page = generic_error_remove_page,
1101 };
1102
1103 struct bcachefs_fid {
1104         u64             inum;
1105         u32             subvol;
1106         u32             gen;
1107 } __packed;
1108
1109 struct bcachefs_fid_with_parent {
1110         struct bcachefs_fid     fid;
1111         struct bcachefs_fid     dir;
1112 } __packed;
1113
1114 static int bcachefs_fid_valid(int fh_len, int fh_type)
1115 {
1116         switch (fh_type) {
1117         case FILEID_BCACHEFS_WITHOUT_PARENT:
1118                 return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1119         case FILEID_BCACHEFS_WITH_PARENT:
1120                 return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1121         default:
1122                 return false;
1123         }
1124 }
1125
1126 static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1127 {
1128         return (struct bcachefs_fid) {
1129                 .inum   = inode->ei_inode.bi_inum,
1130                 .subvol = inode->ei_subvol,
1131                 .gen    = inode->ei_inode.bi_generation,
1132         };
1133 }
1134
1135 static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1136                           struct inode *vdir)
1137 {
1138         struct bch_inode_info *inode    = to_bch_ei(vinode);
1139         struct bch_inode_info *dir      = to_bch_ei(vdir);
1140
1141         if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32))
1142                 return FILEID_INVALID;
1143
1144         if (!S_ISDIR(inode->v.i_mode) && dir) {
1145                 struct bcachefs_fid_with_parent *fid = (void *) fh;
1146
1147                 fid->fid = bch2_inode_to_fid(inode);
1148                 fid->dir = bch2_inode_to_fid(dir);
1149
1150                 *len = sizeof(*fid) / sizeof(u32);
1151                 return FILEID_BCACHEFS_WITH_PARENT;
1152         } else {
1153                 struct bcachefs_fid *fid = (void *) fh;
1154
1155                 *fid = bch2_inode_to_fid(inode);
1156
1157                 *len = sizeof(*fid) / sizeof(u32);
1158                 return FILEID_BCACHEFS_WITHOUT_PARENT;
1159         }
1160 }
1161
1162 static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1163                                         struct bcachefs_fid fid)
1164 {
1165         struct bch_fs *c = sb->s_fs_info;
1166         struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1167                                     .subvol = fid.subvol,
1168                                     .inum = fid.inum,
1169         });
1170         if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1171                 iput(vinode);
1172                 vinode = ERR_PTR(-ESTALE);
1173         }
1174         return vinode;
1175 }
1176
1177 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1178                 int fh_len, int fh_type)
1179 {
1180         struct bcachefs_fid *fid = (void *) _fid;
1181
1182         if (!bcachefs_fid_valid(fh_len, fh_type))
1183                 return NULL;
1184
1185         return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1186 }
1187
1188 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1189                 int fh_len, int fh_type)
1190 {
1191         struct bcachefs_fid_with_parent *fid = (void *) _fid;
1192
1193         if (!bcachefs_fid_valid(fh_len, fh_type) ||
1194             fh_type != FILEID_BCACHEFS_WITH_PARENT)
1195                 return NULL;
1196
1197         return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1198 }
1199
1200 static struct dentry *bch2_get_parent(struct dentry *child)
1201 {
1202         struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1203         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1204         subvol_inum parent_inum = {
1205                 .subvol = inode->ei_inode.bi_parent_subvol ?:
1206                         inode->ei_subvol,
1207                 .inum = inode->ei_inode.bi_dir,
1208         };
1209
1210         if (!parent_inum.inum)
1211                 return NULL;
1212
1213         return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1214 }
1215
1216 static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1217 {
1218         struct bch_inode_info *inode    = to_bch_ei(child->d_inode);
1219         struct bch_inode_info *dir      = to_bch_ei(parent->d_inode);
1220         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1221         struct btree_trans trans;
1222         struct btree_iter iter1;
1223         struct btree_iter iter2;
1224         struct bkey_s_c k;
1225         struct bkey_s_c_dirent d;
1226         struct bch_inode_unpacked inode_u;
1227         subvol_inum target;
1228         u32 snapshot;
1229         unsigned name_len;
1230         int ret;
1231
1232         if (!S_ISDIR(dir->v.i_mode))
1233                 return -EINVAL;
1234
1235         bch2_trans_init(&trans, c, 0, 0);
1236
1237         bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents,
1238                              POS(dir->ei_inode.bi_inum, 0), 0);
1239         bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents,
1240                              POS(dir->ei_inode.bi_inum, 0), 0);
1241 retry:
1242         bch2_trans_begin(&trans);
1243
1244         ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot);
1245         if (ret)
1246                 goto err;
1247
1248         bch2_btree_iter_set_snapshot(&iter1, snapshot);
1249         bch2_btree_iter_set_snapshot(&iter2, snapshot);
1250
1251         ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u);
1252         if (ret)
1253                 goto err;
1254
1255         if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1256                 bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1257
1258                 k = bch2_btree_iter_peek_slot(&iter1);
1259                 ret = bkey_err(k);
1260                 if (ret)
1261                         goto err;
1262
1263                 if (k.k->type != KEY_TYPE_dirent) {
1264                         ret = -ENOENT;
1265                         goto err;
1266                 }
1267
1268                 d = bkey_s_c_to_dirent(k);
1269                 ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
1270                 if (ret > 0)
1271                         ret = -ENOENT;
1272                 if (ret)
1273                         goto err;
1274
1275                 if (target.subvol       == inode->ei_subvol &&
1276                     target.inum         == inode->ei_inode.bi_inum)
1277                         goto found;
1278         } else {
1279                 /*
1280                  * File with multiple hardlinks and our backref is to the wrong
1281                  * directory - linear search:
1282                  */
1283                 for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1284                         if (k.k->p.inode > dir->ei_inode.bi_inum)
1285                                 break;
1286
1287                         if (k.k->type != KEY_TYPE_dirent)
1288                                 continue;
1289
1290                         d = bkey_s_c_to_dirent(k);
1291                         ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
1292                         if (ret < 0)
1293                                 break;
1294                         if (ret)
1295                                 continue;
1296
1297                         if (target.subvol       == inode->ei_subvol &&
1298                             target.inum         == inode->ei_inode.bi_inum)
1299                                 goto found;
1300                 }
1301         }
1302
1303         ret = -ENOENT;
1304         goto err;
1305 found:
1306         name_len = min_t(unsigned, bch2_dirent_name_bytes(d), NAME_MAX);
1307
1308         memcpy(name, d.v->d_name, name_len);
1309         name[name_len] = '\0';
1310 err:
1311         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1312                 goto retry;
1313
1314         bch2_trans_iter_exit(&trans, &iter1);
1315         bch2_trans_iter_exit(&trans, &iter2);
1316         bch2_trans_exit(&trans);
1317
1318         return ret;
1319 }
1320
1321 static const struct export_operations bch_export_ops = {
1322         .encode_fh      = bch2_encode_fh,
1323         .fh_to_dentry   = bch2_fh_to_dentry,
1324         .fh_to_parent   = bch2_fh_to_parent,
1325         .get_parent     = bch2_get_parent,
1326         .get_name       = bch2_get_name,
1327 };
1328
1329 static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
1330                                 struct bch_inode_info *inode,
1331                                 struct bch_inode_unpacked *bi,
1332                                 struct bch_subvolume *subvol)
1333 {
1334         bch2_inode_update_after_write(trans, inode, bi, ~0);
1335
1336         if (BCH_SUBVOLUME_SNAP(subvol))
1337                 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1338         else
1339                 clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1340
1341         inode->v.i_blocks       = bi->bi_sectors;
1342         inode->v.i_ino          = bi->bi_inum;
1343         inode->v.i_rdev         = bi->bi_dev;
1344         inode->v.i_generation   = bi->bi_generation;
1345         inode->v.i_size         = bi->bi_size;
1346
1347         inode->ei_flags         = 0;
1348         inode->ei_quota_reserved = 0;
1349         inode->ei_qid           = bch_qid(bi);
1350         inode->ei_subvol        = inum.subvol;
1351
1352         inode->v.i_mapping->a_ops = &bch_address_space_operations;
1353
1354         switch (inode->v.i_mode & S_IFMT) {
1355         case S_IFREG:
1356                 inode->v.i_op   = &bch_file_inode_operations;
1357                 inode->v.i_fop  = &bch_file_operations;
1358                 break;
1359         case S_IFDIR:
1360                 inode->v.i_op   = &bch_dir_inode_operations;
1361                 inode->v.i_fop  = &bch_dir_file_operations;
1362                 break;
1363         case S_IFLNK:
1364                 inode_nohighmem(&inode->v);
1365                 inode->v.i_op   = &bch_symlink_inode_operations;
1366                 break;
1367         default:
1368                 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1369                 inode->v.i_op   = &bch_special_inode_operations;
1370                 break;
1371         }
1372
1373         mapping_set_large_folios(inode->v.i_mapping);
1374 }
1375
1376 static struct inode *bch2_alloc_inode(struct super_block *sb)
1377 {
1378         struct bch_inode_info *inode;
1379
1380         inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
1381         if (!inode)
1382                 return NULL;
1383
1384         inode_init_once(&inode->v);
1385         mutex_init(&inode->ei_update_lock);
1386         two_state_lock_init(&inode->ei_pagecache_lock);
1387         INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
1388         mutex_init(&inode->ei_quota_lock);
1389
1390         return &inode->v;
1391 }
1392
1393 static void bch2_i_callback(struct rcu_head *head)
1394 {
1395         struct inode *vinode = container_of(head, struct inode, i_rcu);
1396         struct bch_inode_info *inode = to_bch_ei(vinode);
1397
1398         kmem_cache_free(bch2_inode_cache, inode);
1399 }
1400
1401 static void bch2_destroy_inode(struct inode *vinode)
1402 {
1403         call_rcu(&vinode->i_rcu, bch2_i_callback);
1404 }
1405
1406 static int inode_update_times_fn(struct bch_inode_info *inode,
1407                                  struct bch_inode_unpacked *bi,
1408                                  void *p)
1409 {
1410         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1411
1412         bi->bi_atime    = timespec_to_bch2_time(c, inode->v.i_atime);
1413         bi->bi_mtime    = timespec_to_bch2_time(c, inode->v.i_mtime);
1414         bi->bi_ctime    = timespec_to_bch2_time(c, inode->v.i_ctime);
1415
1416         return 0;
1417 }
1418
1419 static int bch2_vfs_write_inode(struct inode *vinode,
1420                                 struct writeback_control *wbc)
1421 {
1422         struct bch_fs *c = vinode->i_sb->s_fs_info;
1423         struct bch_inode_info *inode = to_bch_ei(vinode);
1424         int ret;
1425
1426         mutex_lock(&inode->ei_update_lock);
1427         ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1428                                ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1429         mutex_unlock(&inode->ei_update_lock);
1430
1431         return bch2_err_class(ret);
1432 }
1433
1434 static void bch2_evict_inode(struct inode *vinode)
1435 {
1436         struct bch_fs *c = vinode->i_sb->s_fs_info;
1437         struct bch_inode_info *inode = to_bch_ei(vinode);
1438
1439         truncate_inode_pages_final(&inode->v.i_data);
1440
1441         clear_inode(&inode->v);
1442
1443         BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1444
1445         if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1446                 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
1447                                 KEY_TYPE_QUOTA_WARN);
1448                 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
1449                                 KEY_TYPE_QUOTA_WARN);
1450                 bch2_inode_rm(c, inode_inum(inode));
1451         }
1452
1453         mutex_lock(&c->vfs_inodes_lock);
1454         list_del_init(&inode->ei_vfs_inode_list);
1455         mutex_unlock(&c->vfs_inodes_lock);
1456 }
1457
1458 void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
1459 {
1460         struct bch_inode_info *inode, **i;
1461         DARRAY(struct bch_inode_info *) grabbed;
1462         bool clean_pass = false, this_pass_clean;
1463
1464         /*
1465          * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1466          * be pruned with d_mark_dontcache().
1467          *
1468          * Once we've had a clean pass where we didn't find any inodes without
1469          * I_DONTCACHE, we wait for them to be freed:
1470          */
1471
1472         darray_init(&grabbed);
1473         darray_make_room(&grabbed, 1024);
1474 again:
1475         cond_resched();
1476         this_pass_clean = true;
1477
1478         mutex_lock(&c->vfs_inodes_lock);
1479         list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
1480                 if (!snapshot_list_has_id(s, inode->ei_subvol))
1481                         continue;
1482
1483                 if (!(inode->v.i_state & I_DONTCACHE) &&
1484                     !(inode->v.i_state & I_FREEING)) {
1485                         this_pass_clean = false;
1486
1487                         d_mark_dontcache(&inode->v);
1488                         d_prune_aliases(&inode->v);
1489
1490                         /*
1491                          * If i_count was zero, we have to take and release a
1492                          * ref in order for I_DONTCACHE to be noticed and the
1493                          * inode to be dropped;
1494                          */
1495
1496                         if (!atomic_read(&inode->v.i_count) &&
1497                             igrab(&inode->v) &&
1498                             darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN))
1499                                 break;
1500                 } else if (clean_pass && this_pass_clean) {
1501                         wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
1502                         DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
1503
1504                         prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1505                         mutex_unlock(&c->vfs_inodes_lock);
1506
1507                         schedule();
1508                         finish_wait(wq, &wait.wq_entry);
1509                         goto again;
1510                 }
1511         }
1512         mutex_unlock(&c->vfs_inodes_lock);
1513
1514         darray_for_each(grabbed, i)
1515                 iput(&(*i)->v);
1516         grabbed.nr = 0;
1517
1518         if (!clean_pass || !this_pass_clean) {
1519                 clean_pass = this_pass_clean;
1520                 goto again;
1521         }
1522
1523         darray_exit(&grabbed);
1524 }
1525
1526 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1527 {
1528         struct super_block *sb = dentry->d_sb;
1529         struct bch_fs *c = sb->s_fs_info;
1530         struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
1531         unsigned shift = sb->s_blocksize_bits - 9;
1532         /*
1533          * this assumes inodes take up 64 bytes, which is a decent average
1534          * number:
1535          */
1536         u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1537         u64 fsid;
1538
1539         buf->f_type     = BCACHEFS_STATFS_MAGIC;
1540         buf->f_bsize    = sb->s_blocksize;
1541         buf->f_blocks   = usage.capacity >> shift;
1542         buf->f_bfree    = usage.free >> shift;
1543         buf->f_bavail   = avail_factor(usage.free) >> shift;
1544
1545         buf->f_files    = usage.nr_inodes + avail_inodes;
1546         buf->f_ffree    = avail_inodes;
1547
1548         fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
1549                le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
1550         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1551         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1552         buf->f_namelen  = BCH_NAME_MAX;
1553
1554         return 0;
1555 }
1556
1557 static int bch2_sync_fs(struct super_block *sb, int wait)
1558 {
1559         struct bch_fs *c = sb->s_fs_info;
1560         int ret;
1561
1562         if (c->opts.journal_flush_disabled)
1563                 return 0;
1564
1565         if (!wait) {
1566                 bch2_journal_flush_async(&c->journal, NULL);
1567                 return 0;
1568         }
1569
1570         ret = bch2_journal_flush(&c->journal);
1571         return bch2_err_class(ret);
1572 }
1573
1574 static struct bch_fs *bch2_path_to_fs(const char *path)
1575 {
1576         struct bch_fs *c;
1577         dev_t dev;
1578         int ret;
1579
1580         ret = lookup_bdev(path, &dev);
1581         if (ret)
1582                 return ERR_PTR(ret);
1583
1584         c = bch2_dev_to_fs(dev);
1585         if (c)
1586                 closure_put(&c->cl);
1587         return c ?: ERR_PTR(-ENOENT);
1588 }
1589
1590 static char **split_devs(const char *_dev_name, unsigned *nr)
1591 {
1592         char *dev_name = NULL, **devs = NULL, *s;
1593         size_t i, nr_devs = 0;
1594
1595         dev_name = kstrdup(_dev_name, GFP_KERNEL);
1596         if (!dev_name)
1597                 return NULL;
1598
1599         for (s = dev_name; s; s = strchr(s + 1, ':'))
1600                 nr_devs++;
1601
1602         devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
1603         if (!devs) {
1604                 kfree(dev_name);
1605                 return NULL;
1606         }
1607
1608         for (i = 0, s = dev_name;
1609              s;
1610              (s = strchr(s, ':')) && (*s++ = '\0'))
1611                 devs[i++] = s;
1612
1613         *nr = nr_devs;
1614         return devs;
1615 }
1616
1617 static int bch2_remount(struct super_block *sb, int *flags, char *data)
1618 {
1619         struct bch_fs *c = sb->s_fs_info;
1620         struct bch_opts opts = bch2_opts_empty();
1621         int ret;
1622
1623         opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
1624
1625         ret = bch2_parse_mount_opts(c, &opts, data);
1626         if (ret)
1627                 goto err;
1628
1629         if (opts.read_only != c->opts.read_only) {
1630                 down_write(&c->state_lock);
1631
1632                 if (opts.read_only) {
1633                         bch2_fs_read_only(c);
1634
1635                         sb->s_flags |= SB_RDONLY;
1636                 } else {
1637                         ret = bch2_fs_read_write(c);
1638                         if (ret) {
1639                                 bch_err(c, "error going rw: %i", ret);
1640                                 up_write(&c->state_lock);
1641                                 ret = -EINVAL;
1642                                 goto err;
1643                         }
1644
1645                         sb->s_flags &= ~SB_RDONLY;
1646                 }
1647
1648                 c->opts.read_only = opts.read_only;
1649
1650                 up_write(&c->state_lock);
1651         }
1652
1653         if (opts.errors >= 0)
1654                 c->opts.errors = opts.errors;
1655 err:
1656         return bch2_err_class(ret);
1657 }
1658
1659 static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
1660 {
1661         struct bch_fs *c = root->d_sb->s_fs_info;
1662         struct bch_dev *ca;
1663         unsigned i;
1664         bool first = true;
1665
1666         for_each_online_member(ca, c, i) {
1667                 if (!first)
1668                         seq_putc(seq, ':');
1669                 first = false;
1670                 seq_puts(seq, "/dev/");
1671                 seq_puts(seq, ca->name);
1672         }
1673
1674         return 0;
1675 }
1676
1677 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1678 {
1679         struct bch_fs *c = root->d_sb->s_fs_info;
1680         enum bch_opt_id i;
1681         struct printbuf buf = PRINTBUF;
1682         int ret = 0;
1683
1684         for (i = 0; i < bch2_opts_nr; i++) {
1685                 const struct bch_option *opt = &bch2_opt_table[i];
1686                 u64 v = bch2_opt_get_by_id(&c->opts, i);
1687
1688                 if (!(opt->flags & OPT_MOUNT))
1689                         continue;
1690
1691                 if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1692                         continue;
1693
1694                 printbuf_reset(&buf);
1695                 bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
1696                                  OPT_SHOW_MOUNT_STYLE);
1697                 seq_putc(seq, ',');
1698                 seq_puts(seq, buf.buf);
1699         }
1700
1701         if (buf.allocation_failure)
1702                 ret = -ENOMEM;
1703         printbuf_exit(&buf);
1704         return ret;
1705 }
1706
1707 static void bch2_put_super(struct super_block *sb)
1708 {
1709         struct bch_fs *c = sb->s_fs_info;
1710
1711         __bch2_fs_stop(c);
1712 }
1713
1714 static const struct super_operations bch_super_operations = {
1715         .alloc_inode    = bch2_alloc_inode,
1716         .destroy_inode  = bch2_destroy_inode,
1717         .write_inode    = bch2_vfs_write_inode,
1718         .evict_inode    = bch2_evict_inode,
1719         .sync_fs        = bch2_sync_fs,
1720         .statfs         = bch2_statfs,
1721         .show_devname   = bch2_show_devname,
1722         .show_options   = bch2_show_options,
1723         .remount_fs     = bch2_remount,
1724         .put_super      = bch2_put_super,
1725 #if 0
1726         .freeze_fs      = bch2_freeze,
1727         .unfreeze_fs    = bch2_unfreeze,
1728 #endif
1729 };
1730
1731 static int bch2_set_super(struct super_block *s, void *data)
1732 {
1733         s->s_fs_info = data;
1734         return 0;
1735 }
1736
1737 static int bch2_noset_super(struct super_block *s, void *data)
1738 {
1739         return -EBUSY;
1740 }
1741
1742 static int bch2_test_super(struct super_block *s, void *data)
1743 {
1744         struct bch_fs *c = s->s_fs_info;
1745         struct bch_fs **devs = data;
1746         unsigned i;
1747
1748         if (!c)
1749                 return false;
1750
1751         for (i = 0; devs[i]; i++)
1752                 if (c != devs[i])
1753                         return false;
1754         return true;
1755 }
1756
1757 static struct dentry *bch2_mount(struct file_system_type *fs_type,
1758                                  int flags, const char *dev_name, void *data)
1759 {
1760         struct bch_fs *c;
1761         struct bch_dev *ca;
1762         struct super_block *sb;
1763         struct inode *vinode;
1764         struct bch_opts opts = bch2_opts_empty();
1765         char **devs;
1766         struct bch_fs **devs_to_fs = NULL;
1767         unsigned i, nr_devs;
1768         int ret;
1769
1770         opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
1771
1772         ret = bch2_parse_mount_opts(NULL, &opts, data);
1773         if (ret)
1774                 return ERR_PTR(ret);
1775
1776         if (!dev_name || strlen(dev_name) == 0)
1777                 return ERR_PTR(-EINVAL);
1778
1779         devs = split_devs(dev_name, &nr_devs);
1780         if (!devs)
1781                 return ERR_PTR(-ENOMEM);
1782
1783         devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
1784         if (!devs_to_fs) {
1785                 sb = ERR_PTR(-ENOMEM);
1786                 goto got_sb;
1787         }
1788
1789         for (i = 0; i < nr_devs; i++)
1790                 devs_to_fs[i] = bch2_path_to_fs(devs[i]);
1791
1792         sb = sget(fs_type, bch2_test_super, bch2_noset_super,
1793                   flags|SB_NOSEC, devs_to_fs);
1794         if (!IS_ERR(sb))
1795                 goto got_sb;
1796
1797         c = bch2_fs_open(devs, nr_devs, opts);
1798         if (IS_ERR(c)) {
1799                 sb = ERR_CAST(c);
1800                 goto got_sb;
1801         }
1802
1803         /* Some options can't be parsed until after the fs is started: */
1804         ret = bch2_parse_mount_opts(c, &opts, data);
1805         if (ret) {
1806                 bch2_fs_stop(c);
1807                 sb = ERR_PTR(ret);
1808                 goto got_sb;
1809         }
1810
1811         bch2_opts_apply(&c->opts, opts);
1812
1813         sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
1814         if (IS_ERR(sb))
1815                 bch2_fs_stop(c);
1816 got_sb:
1817         kfree(devs_to_fs);
1818         kfree(devs[0]);
1819         kfree(devs);
1820
1821         if (IS_ERR(sb)) {
1822                 ret = PTR_ERR(sb);
1823                 ret = bch2_err_class(ret);
1824                 return ERR_PTR(ret);
1825         }
1826
1827         c = sb->s_fs_info;
1828
1829         if (sb->s_root) {
1830                 if ((flags ^ sb->s_flags) & SB_RDONLY) {
1831                         ret = -EBUSY;
1832                         goto err_put_super;
1833                 }
1834                 goto out;
1835         }
1836
1837         sb->s_blocksize         = block_bytes(c);
1838         sb->s_blocksize_bits    = ilog2(block_bytes(c));
1839         sb->s_maxbytes          = MAX_LFS_FILESIZE;
1840         sb->s_op                = &bch_super_operations;
1841         sb->s_export_op         = &bch_export_ops;
1842 #ifdef CONFIG_BCACHEFS_QUOTA
1843         sb->s_qcop              = &bch2_quotactl_operations;
1844         sb->s_quota_types       = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
1845 #endif
1846         sb->s_xattr             = bch2_xattr_handlers;
1847         sb->s_magic             = BCACHEFS_STATFS_MAGIC;
1848         sb->s_time_gran         = c->sb.nsec_per_time_unit;
1849         sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
1850         sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
1851         c->vfs_sb               = sb;
1852         strscpy(sb->s_id, c->name, sizeof(sb->s_id));
1853
1854         ret = super_setup_bdi(sb);
1855         if (ret)
1856                 goto err_put_super;
1857
1858         sb->s_bdi->ra_pages             = VM_READAHEAD_PAGES;
1859
1860         for_each_online_member(ca, c, i) {
1861                 struct block_device *bdev = ca->disk_sb.bdev;
1862
1863                 /* XXX: create an anonymous device for multi device filesystems */
1864                 sb->s_bdev      = bdev;
1865                 sb->s_dev       = bdev->bd_dev;
1866                 percpu_ref_put(&ca->io_ref);
1867                 break;
1868         }
1869
1870         c->dev = sb->s_dev;
1871
1872 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1873         if (c->opts.acl)
1874                 sb->s_flags     |= SB_POSIXACL;
1875 #endif
1876
1877         sb->s_shrink.seeks = 0;
1878
1879         vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
1880         ret = PTR_ERR_OR_ZERO(vinode);
1881         if (ret) {
1882                 bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
1883                 goto err_put_super;
1884         }
1885
1886         sb->s_root = d_make_root(vinode);
1887         if (!sb->s_root) {
1888                 bch_err(c, "error mounting: error allocating root dentry");
1889                 ret = -ENOMEM;
1890                 goto err_put_super;
1891         }
1892
1893         sb->s_flags |= SB_ACTIVE;
1894 out:
1895         return dget(sb->s_root);
1896
1897 err_put_super:
1898         deactivate_locked_super(sb);
1899         return ERR_PTR(bch2_err_class(ret));
1900 }
1901
1902 static void bch2_kill_sb(struct super_block *sb)
1903 {
1904         struct bch_fs *c = sb->s_fs_info;
1905
1906         generic_shutdown_super(sb);
1907         bch2_fs_free(c);
1908 }
1909
1910 static struct file_system_type bcache_fs_type = {
1911         .owner          = THIS_MODULE,
1912         .name           = "bcachefs",
1913         .mount          = bch2_mount,
1914         .kill_sb        = bch2_kill_sb,
1915         .fs_flags       = FS_REQUIRES_DEV,
1916 };
1917
1918 MODULE_ALIAS_FS("bcachefs");
1919
1920 void bch2_vfs_exit(void)
1921 {
1922         unregister_filesystem(&bcache_fs_type);
1923         kmem_cache_destroy(bch2_inode_cache);
1924 }
1925
1926 int __init bch2_vfs_init(void)
1927 {
1928         int ret = -ENOMEM;
1929
1930         bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
1931         if (!bch2_inode_cache)
1932                 goto err;
1933
1934         ret = register_filesystem(&bcache_fs_type);
1935         if (ret)
1936                 goto err;
1937
1938         return 0;
1939 err:
1940         bch2_vfs_exit();
1941         return ret;
1942 }
1943
1944 #endif /* NO_BCACHEFS_FS */