]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fs.c
Update bcachefs sources to e7f6215768 bcachefs: Fix snapshot_skiplist_good()
[bcachefs-tools-debian] / libbcachefs / fs.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3
4 #include "bcachefs.h"
5 #include "acl.h"
6 #include "bkey_buf.h"
7 #include "btree_update.h"
8 #include "buckets.h"
9 #include "chardev.h"
10 #include "dirent.h"
11 #include "errcode.h"
12 #include "extents.h"
13 #include "fs.h"
14 #include "fs-common.h"
15 #include "fs-io.h"
16 #include "fs-ioctl.h"
17 #include "fs-io-buffered.h"
18 #include "fs-io-direct.h"
19 #include "fs-io-pagecache.h"
20 #include "fsck.h"
21 #include "inode.h"
22 #include "io.h"
23 #include "journal.h"
24 #include "keylist.h"
25 #include "quota.h"
26 #include "snapshot.h"
27 #include "super.h"
28 #include "xattr.h"
29
30 #include <linux/aio.h>
31 #include <linux/backing-dev.h>
32 #include <linux/exportfs.h>
33 #include <linux/fiemap.h>
34 #include <linux/module.h>
35 #include <linux/pagemap.h>
36 #include <linux/posix_acl.h>
37 #include <linux/random.h>
38 #include <linux/seq_file.h>
39 #include <linux/statfs.h>
40 #include <linux/string.h>
41 #include <linux/xattr.h>
42
43 static struct kmem_cache *bch2_inode_cache;
44
45 static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
46                                 struct bch_inode_info *,
47                                 struct bch_inode_unpacked *,
48                                 struct bch_subvolume *);
49
50 void bch2_inode_update_after_write(struct btree_trans *trans,
51                                    struct bch_inode_info *inode,
52                                    struct bch_inode_unpacked *bi,
53                                    unsigned fields)
54 {
55         struct bch_fs *c = trans->c;
56
57         BUG_ON(bi->bi_inum != inode->v.i_ino);
58
59         bch2_assert_pos_locked(trans, BTREE_ID_inodes,
60                                POS(0, bi->bi_inum),
61                                c->opts.inodes_use_key_cache);
62
63         set_nlink(&inode->v, bch2_inode_nlink_get(bi));
64         i_uid_write(&inode->v, bi->bi_uid);
65         i_gid_write(&inode->v, bi->bi_gid);
66         inode->v.i_mode = bi->bi_mode;
67
68         if (fields & ATTR_ATIME)
69                 inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
70         if (fields & ATTR_MTIME)
71                 inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
72         if (fields & ATTR_CTIME)
73                 inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
74
75         inode->ei_inode         = *bi;
76
77         bch2_inode_flags_to_vfs(inode);
78 }
79
80 int __must_check bch2_write_inode(struct bch_fs *c,
81                                   struct bch_inode_info *inode,
82                                   inode_set_fn set,
83                                   void *p, unsigned fields)
84 {
85         struct btree_trans trans;
86         struct btree_iter iter = { NULL };
87         struct bch_inode_unpacked inode_u;
88         int ret;
89
90         bch2_trans_init(&trans, c, 0, 512);
91 retry:
92         bch2_trans_begin(&trans);
93
94         ret   = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
95                                 BTREE_ITER_INTENT) ?:
96                 (set ? set(&trans, inode, &inode_u, p) : 0) ?:
97                 bch2_inode_write(&trans, &iter, &inode_u) ?:
98                 bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
99
100         /*
101          * the btree node lock protects inode->ei_inode, not ei_update_lock;
102          * this is important for inode updates via bchfs_write_index_update
103          */
104         if (!ret)
105                 bch2_inode_update_after_write(&trans, inode, &inode_u, fields);
106
107         bch2_trans_iter_exit(&trans, &iter);
108
109         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
110                 goto retry;
111
112         bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
113                              "inode %u:%llu not found when updating",
114                              inode_inum(inode).subvol,
115                              inode_inum(inode).inum);
116
117         bch2_trans_exit(&trans);
118         return ret < 0 ? ret : 0;
119 }
120
121 int bch2_fs_quota_transfer(struct bch_fs *c,
122                            struct bch_inode_info *inode,
123                            struct bch_qid new_qid,
124                            unsigned qtypes,
125                            enum quota_acct_mode mode)
126 {
127         unsigned i;
128         int ret;
129
130         qtypes &= enabled_qtypes(c);
131
132         for (i = 0; i < QTYP_NR; i++)
133                 if (new_qid.q[i] == inode->ei_qid.q[i])
134                         qtypes &= ~(1U << i);
135
136         if (!qtypes)
137                 return 0;
138
139         mutex_lock(&inode->ei_quota_lock);
140
141         ret = bch2_quota_transfer(c, qtypes, new_qid,
142                                   inode->ei_qid,
143                                   inode->v.i_blocks +
144                                   inode->ei_quota_reserved,
145                                   mode);
146         if (!ret)
147                 for (i = 0; i < QTYP_NR; i++)
148                         if (qtypes & (1 << i))
149                                 inode->ei_qid.q[i] = new_qid.q[i];
150
151         mutex_unlock(&inode->ei_quota_lock);
152
153         return ret;
154 }
155
156 static int bch2_iget5_test(struct inode *vinode, void *p)
157 {
158         struct bch_inode_info *inode = to_bch_ei(vinode);
159         subvol_inum *inum = p;
160
161         return inode->ei_subvol == inum->subvol &&
162                 inode->ei_inode.bi_inum == inum->inum;
163 }
164
165 static int bch2_iget5_set(struct inode *vinode, void *p)
166 {
167         struct bch_inode_info *inode = to_bch_ei(vinode);
168         subvol_inum *inum = p;
169
170         inode->v.i_ino          = inum->inum;
171         inode->ei_subvol        = inum->subvol;
172         inode->ei_inode.bi_inum = inum->inum;
173         return 0;
174 }
175
176 static unsigned bch2_inode_hash(subvol_inum inum)
177 {
178         return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
179 }
180
181 struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
182 {
183         struct bch_inode_unpacked inode_u;
184         struct bch_inode_info *inode;
185         struct btree_trans trans;
186         struct bch_subvolume subvol;
187         int ret;
188
189         inode = to_bch_ei(iget5_locked(c->vfs_sb,
190                                        bch2_inode_hash(inum),
191                                        bch2_iget5_test,
192                                        bch2_iget5_set,
193                                        &inum));
194         if (unlikely(!inode))
195                 return ERR_PTR(-ENOMEM);
196         if (!(inode->v.i_state & I_NEW))
197                 return &inode->v;
198
199         bch2_trans_init(&trans, c, 8, 0);
200         ret = lockrestart_do(&trans,
201                 bch2_subvolume_get(&trans, inum.subvol, true, 0, &subvol) ?:
202                 bch2_inode_find_by_inum_trans(&trans, inum, &inode_u));
203
204         if (!ret)
205                 bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
206         bch2_trans_exit(&trans);
207
208         if (ret) {
209                 iget_failed(&inode->v);
210                 return ERR_PTR(bch2_err_class(ret));
211         }
212
213         mutex_lock(&c->vfs_inodes_lock);
214         list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
215         mutex_unlock(&c->vfs_inodes_lock);
216
217         unlock_new_inode(&inode->v);
218
219         return &inode->v;
220 }
221
222 struct bch_inode_info *
223 __bch2_create(struct mnt_idmap *idmap,
224               struct bch_inode_info *dir, struct dentry *dentry,
225               umode_t mode, dev_t rdev, subvol_inum snapshot_src,
226               unsigned flags)
227 {
228         struct bch_fs *c = dir->v.i_sb->s_fs_info;
229         struct btree_trans trans;
230         struct bch_inode_unpacked dir_u;
231         struct bch_inode_info *inode, *old;
232         struct bch_inode_unpacked inode_u;
233         struct posix_acl *default_acl = NULL, *acl = NULL;
234         subvol_inum inum;
235         struct bch_subvolume subvol;
236         u64 journal_seq = 0;
237         int ret;
238
239         /*
240          * preallocate acls + vfs inode before btree transaction, so that
241          * nothing can fail after the transaction succeeds:
242          */
243 #ifdef CONFIG_BCACHEFS_POSIX_ACL
244         ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
245         if (ret)
246                 return ERR_PTR(ret);
247 #endif
248         inode = to_bch_ei(new_inode(c->vfs_sb));
249         if (unlikely(!inode)) {
250                 inode = ERR_PTR(-ENOMEM);
251                 goto err;
252         }
253
254         bch2_inode_init_early(c, &inode_u);
255
256         if (!(flags & BCH_CREATE_TMPFILE))
257                 mutex_lock(&dir->ei_update_lock);
258
259         bch2_trans_init(&trans, c, 8,
260                         2048 + (!(flags & BCH_CREATE_TMPFILE)
261                                 ? dentry->d_name.len : 0));
262 retry:
263         bch2_trans_begin(&trans);
264
265         ret   = bch2_create_trans(&trans,
266                                   inode_inum(dir), &dir_u, &inode_u,
267                                   !(flags & BCH_CREATE_TMPFILE)
268                                   ? &dentry->d_name : NULL,
269                                   from_kuid(i_user_ns(&dir->v), current_fsuid()),
270                                   from_kgid(i_user_ns(&dir->v), current_fsgid()),
271                                   mode, rdev,
272                                   default_acl, acl, snapshot_src, flags) ?:
273                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
274                                 KEY_TYPE_QUOTA_PREALLOC);
275         if (unlikely(ret))
276                 goto err_before_quota;
277
278         inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
279         inum.inum = inode_u.bi_inum;
280
281         ret   = bch2_subvolume_get(&trans, inum.subvol, true,
282                                    BTREE_ITER_WITH_UPDATES, &subvol) ?:
283                 bch2_trans_commit(&trans, NULL, &journal_seq, 0);
284         if (unlikely(ret)) {
285                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
286                                 KEY_TYPE_QUOTA_WARN);
287 err_before_quota:
288                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
289                         goto retry;
290                 goto err_trans;
291         }
292
293         if (!(flags & BCH_CREATE_TMPFILE)) {
294                 bch2_inode_update_after_write(&trans, dir, &dir_u,
295                                               ATTR_MTIME|ATTR_CTIME);
296                 mutex_unlock(&dir->ei_update_lock);
297         }
298
299         bch2_iget5_set(&inode->v, &inum);
300         bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
301
302         set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
303         set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
304
305         /*
306          * we must insert the new inode into the inode cache before calling
307          * bch2_trans_exit() and dropping locks, else we could race with another
308          * thread pulling the inode in and modifying it:
309          */
310
311         inode->v.i_state |= I_CREATING;
312
313         old = to_bch_ei(inode_insert5(&inode->v,
314                                       bch2_inode_hash(inum),
315                                       bch2_iget5_test,
316                                       bch2_iget5_set,
317                                       &inum));
318         BUG_ON(!old);
319
320         if (unlikely(old != inode)) {
321                 /*
322                  * We raced, another process pulled the new inode into cache
323                  * before us:
324                  */
325                 make_bad_inode(&inode->v);
326                 iput(&inode->v);
327
328                 inode = old;
329         } else {
330                 mutex_lock(&c->vfs_inodes_lock);
331                 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
332                 mutex_unlock(&c->vfs_inodes_lock);
333                 /*
334                  * we really don't want insert_inode_locked2() to be setting
335                  * I_NEW...
336                  */
337                 unlock_new_inode(&inode->v);
338         }
339
340         bch2_trans_exit(&trans);
341 err:
342         posix_acl_release(default_acl);
343         posix_acl_release(acl);
344         return inode;
345 err_trans:
346         if (!(flags & BCH_CREATE_TMPFILE))
347                 mutex_unlock(&dir->ei_update_lock);
348
349         bch2_trans_exit(&trans);
350         make_bad_inode(&inode->v);
351         iput(&inode->v);
352         inode = ERR_PTR(ret);
353         goto err;
354 }
355
356 /* methods */
357
358 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
359                                   unsigned int flags)
360 {
361         struct bch_fs *c = vdir->i_sb->s_fs_info;
362         struct bch_inode_info *dir = to_bch_ei(vdir);
363         struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
364         struct inode *vinode = NULL;
365         subvol_inum inum = { .subvol = 1 };
366         int ret;
367
368         ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
369                                  &dentry->d_name, &inum);
370
371         if (!ret)
372                 vinode = bch2_vfs_inode_get(c, inum);
373
374         return d_splice_alias(vinode, dentry);
375 }
376
377 static int bch2_mknod(struct mnt_idmap *idmap,
378                       struct inode *vdir, struct dentry *dentry,
379                       umode_t mode, dev_t rdev)
380 {
381         struct bch_inode_info *inode =
382                 __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
383                               (subvol_inum) { 0 }, 0);
384
385         if (IS_ERR(inode))
386                 return bch2_err_class(PTR_ERR(inode));
387
388         d_instantiate(dentry, &inode->v);
389         return 0;
390 }
391
392 static int bch2_create(struct mnt_idmap *idmap,
393                        struct inode *vdir, struct dentry *dentry,
394                        umode_t mode, bool excl)
395 {
396         return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
397 }
398
399 static int __bch2_link(struct bch_fs *c,
400                        struct bch_inode_info *inode,
401                        struct bch_inode_info *dir,
402                        struct dentry *dentry)
403 {
404         struct btree_trans trans;
405         struct bch_inode_unpacked dir_u, inode_u;
406         int ret;
407
408         mutex_lock(&inode->ei_update_lock);
409         bch2_trans_init(&trans, c, 4, 1024);
410
411         ret = commit_do(&trans, NULL, NULL, 0,
412                         bch2_link_trans(&trans,
413                                         inode_inum(dir),   &dir_u,
414                                         inode_inum(inode), &inode_u,
415                                         &dentry->d_name));
416
417         if (likely(!ret)) {
418                 bch2_inode_update_after_write(&trans, dir, &dir_u,
419                                               ATTR_MTIME|ATTR_CTIME);
420                 bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME);
421         }
422
423         bch2_trans_exit(&trans);
424         mutex_unlock(&inode->ei_update_lock);
425         return ret;
426 }
427
428 static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
429                      struct dentry *dentry)
430 {
431         struct bch_fs *c = vdir->i_sb->s_fs_info;
432         struct bch_inode_info *dir = to_bch_ei(vdir);
433         struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
434         int ret;
435
436         lockdep_assert_held(&inode->v.i_rwsem);
437
438         ret = __bch2_link(c, inode, dir, dentry);
439         if (unlikely(ret))
440                 return ret;
441
442         ihold(&inode->v);
443         d_instantiate(dentry, &inode->v);
444         return 0;
445 }
446
447 int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
448                   bool deleting_snapshot)
449 {
450         struct bch_fs *c = vdir->i_sb->s_fs_info;
451         struct bch_inode_info *dir = to_bch_ei(vdir);
452         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
453         struct bch_inode_unpacked dir_u, inode_u;
454         struct btree_trans trans;
455         int ret;
456
457         bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
458         bch2_trans_init(&trans, c, 4, 1024);
459
460         ret = commit_do(&trans, NULL, NULL,
461                         BTREE_INSERT_NOFAIL,
462                 bch2_unlink_trans(&trans,
463                                   inode_inum(dir), &dir_u,
464                                   &inode_u, &dentry->d_name,
465                                   deleting_snapshot));
466         if (unlikely(ret))
467                 goto err;
468
469         bch2_inode_update_after_write(&trans, dir, &dir_u,
470                                       ATTR_MTIME|ATTR_CTIME);
471         bch2_inode_update_after_write(&trans, inode, &inode_u,
472                                       ATTR_MTIME);
473
474         if (inode_u.bi_subvol) {
475                 /*
476                  * Subvolume deletion is asynchronous, but we still want to tell
477                  * the VFS that it's been deleted here:
478                  */
479                 set_nlink(&inode->v, 0);
480         }
481 err:
482         bch2_trans_exit(&trans);
483         bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
484
485         return ret;
486 }
487
488 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
489 {
490         return __bch2_unlink(vdir, dentry, false);
491 }
492
493 static int bch2_symlink(struct mnt_idmap *idmap,
494                         struct inode *vdir, struct dentry *dentry,
495                         const char *symname)
496 {
497         struct bch_fs *c = vdir->i_sb->s_fs_info;
498         struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
499         int ret;
500
501         inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
502                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
503         if (IS_ERR(inode))
504                 return bch2_err_class(PTR_ERR(inode));
505
506         inode_lock(&inode->v);
507         ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
508         inode_unlock(&inode->v);
509
510         if (unlikely(ret))
511                 goto err;
512
513         ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
514         if (unlikely(ret))
515                 goto err;
516
517         ret = __bch2_link(c, inode, dir, dentry);
518         if (unlikely(ret))
519                 goto err;
520
521         d_instantiate(dentry, &inode->v);
522         return 0;
523 err:
524         iput(&inode->v);
525         return ret;
526 }
527
528 static int bch2_mkdir(struct mnt_idmap *idmap,
529                       struct inode *vdir, struct dentry *dentry, umode_t mode)
530 {
531         return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
532 }
533
534 static int bch2_rename2(struct mnt_idmap *idmap,
535                         struct inode *src_vdir, struct dentry *src_dentry,
536                         struct inode *dst_vdir, struct dentry *dst_dentry,
537                         unsigned flags)
538 {
539         struct bch_fs *c = src_vdir->i_sb->s_fs_info;
540         struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
541         struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
542         struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
543         struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
544         struct bch_inode_unpacked dst_dir_u, src_dir_u;
545         struct bch_inode_unpacked src_inode_u, dst_inode_u;
546         struct btree_trans trans;
547         enum bch_rename_mode mode = flags & RENAME_EXCHANGE
548                 ? BCH_RENAME_EXCHANGE
549                 : dst_dentry->d_inode
550                 ? BCH_RENAME_OVERWRITE : BCH_RENAME;
551         int ret;
552
553         if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
554                 return -EINVAL;
555
556         if (mode == BCH_RENAME_OVERWRITE) {
557                 ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
558                                                    0, LLONG_MAX);
559                 if (ret)
560                         return ret;
561         }
562
563         bch2_trans_init(&trans, c, 8, 2048);
564
565         bch2_lock_inodes(INODE_UPDATE_LOCK,
566                          src_dir,
567                          dst_dir,
568                          src_inode,
569                          dst_inode);
570
571         if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
572                 ret = bch2_fs_quota_transfer(c, src_inode,
573                                              dst_dir->ei_qid,
574                                              1 << QTYP_PRJ,
575                                              KEY_TYPE_QUOTA_PREALLOC);
576                 if (ret)
577                         goto err;
578         }
579
580         if (mode == BCH_RENAME_EXCHANGE &&
581             inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
582                 ret = bch2_fs_quota_transfer(c, dst_inode,
583                                              src_dir->ei_qid,
584                                              1 << QTYP_PRJ,
585                                              KEY_TYPE_QUOTA_PREALLOC);
586                 if (ret)
587                         goto err;
588         }
589
590         ret = commit_do(&trans, NULL, NULL, 0,
591                         bch2_rename_trans(&trans,
592                                           inode_inum(src_dir), &src_dir_u,
593                                           inode_inum(dst_dir), &dst_dir_u,
594                                           &src_inode_u,
595                                           &dst_inode_u,
596                                           &src_dentry->d_name,
597                                           &dst_dentry->d_name,
598                                           mode));
599         if (unlikely(ret))
600                 goto err;
601
602         BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
603         BUG_ON(dst_inode &&
604                dst_inode->v.i_ino != dst_inode_u.bi_inum);
605
606         bch2_inode_update_after_write(&trans, src_dir, &src_dir_u,
607                                       ATTR_MTIME|ATTR_CTIME);
608
609         if (src_dir != dst_dir)
610                 bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u,
611                                               ATTR_MTIME|ATTR_CTIME);
612
613         bch2_inode_update_after_write(&trans, src_inode, &src_inode_u,
614                                       ATTR_CTIME);
615
616         if (dst_inode)
617                 bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u,
618                                               ATTR_CTIME);
619 err:
620         bch2_trans_exit(&trans);
621
622         bch2_fs_quota_transfer(c, src_inode,
623                                bch_qid(&src_inode->ei_inode),
624                                1 << QTYP_PRJ,
625                                KEY_TYPE_QUOTA_NOCHECK);
626         if (dst_inode)
627                 bch2_fs_quota_transfer(c, dst_inode,
628                                        bch_qid(&dst_inode->ei_inode),
629                                        1 << QTYP_PRJ,
630                                        KEY_TYPE_QUOTA_NOCHECK);
631
632         bch2_unlock_inodes(INODE_UPDATE_LOCK,
633                            src_dir,
634                            dst_dir,
635                            src_inode,
636                            dst_inode);
637
638         return ret;
639 }
640
641 static void bch2_setattr_copy(struct mnt_idmap *idmap,
642                               struct bch_inode_info *inode,
643                               struct bch_inode_unpacked *bi,
644                               struct iattr *attr)
645 {
646         struct bch_fs *c = inode->v.i_sb->s_fs_info;
647         unsigned int ia_valid = attr->ia_valid;
648
649         if (ia_valid & ATTR_UID)
650                 bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
651         if (ia_valid & ATTR_GID)
652                 bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
653
654         if (ia_valid & ATTR_SIZE)
655                 bi->bi_size = attr->ia_size;
656
657         if (ia_valid & ATTR_ATIME)
658                 bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
659         if (ia_valid & ATTR_MTIME)
660                 bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
661         if (ia_valid & ATTR_CTIME)
662                 bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
663
664         if (ia_valid & ATTR_MODE) {
665                 umode_t mode = attr->ia_mode;
666                 kgid_t gid = ia_valid & ATTR_GID
667                         ? attr->ia_gid
668                         : inode->v.i_gid;
669
670                 if (!in_group_p(gid) &&
671                     !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
672                         mode &= ~S_ISGID;
673                 bi->bi_mode = mode;
674         }
675 }
676
677 int bch2_setattr_nonsize(struct mnt_idmap *idmap,
678                          struct bch_inode_info *inode,
679                          struct iattr *attr)
680 {
681         struct bch_fs *c = inode->v.i_sb->s_fs_info;
682         struct bch_qid qid;
683         struct btree_trans trans;
684         struct btree_iter inode_iter = { NULL };
685         struct bch_inode_unpacked inode_u;
686         struct posix_acl *acl = NULL;
687         int ret;
688
689         mutex_lock(&inode->ei_update_lock);
690
691         qid = inode->ei_qid;
692
693         if (attr->ia_valid & ATTR_UID)
694                 qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
695
696         if (attr->ia_valid & ATTR_GID)
697                 qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
698
699         ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
700                                      KEY_TYPE_QUOTA_PREALLOC);
701         if (ret)
702                 goto err;
703
704         bch2_trans_init(&trans, c, 0, 0);
705 retry:
706         bch2_trans_begin(&trans);
707         kfree(acl);
708         acl = NULL;
709
710         ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
711                               BTREE_ITER_INTENT);
712         if (ret)
713                 goto btree_err;
714
715         bch2_setattr_copy(idmap, inode, &inode_u, attr);
716
717         if (attr->ia_valid & ATTR_MODE) {
718                 ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
719                                      inode_u.bi_mode, &acl);
720                 if (ret)
721                         goto btree_err;
722         }
723
724         ret =   bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
725                 bch2_trans_commit(&trans, NULL, NULL,
726                                   BTREE_INSERT_NOFAIL);
727 btree_err:
728         bch2_trans_iter_exit(&trans, &inode_iter);
729
730         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
731                 goto retry;
732         if (unlikely(ret))
733                 goto err_trans;
734
735         bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid);
736
737         if (acl)
738                 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
739 err_trans:
740         bch2_trans_exit(&trans);
741 err:
742         mutex_unlock(&inode->ei_update_lock);
743
744         return bch2_err_class(ret);
745 }
746
747 static int bch2_getattr(struct mnt_idmap *idmap,
748                         const struct path *path, struct kstat *stat,
749                         u32 request_mask, unsigned query_flags)
750 {
751         struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
752         struct bch_fs *c = inode->v.i_sb->s_fs_info;
753
754         stat->dev       = inode->v.i_sb->s_dev;
755         stat->ino       = inode->v.i_ino;
756         stat->mode      = inode->v.i_mode;
757         stat->nlink     = inode->v.i_nlink;
758         stat->uid       = inode->v.i_uid;
759         stat->gid       = inode->v.i_gid;
760         stat->rdev      = inode->v.i_rdev;
761         stat->size      = i_size_read(&inode->v);
762         stat->atime     = inode->v.i_atime;
763         stat->mtime     = inode->v.i_mtime;
764         stat->ctime     = inode->v.i_ctime;
765         stat->blksize   = block_bytes(c);
766         stat->blocks    = inode->v.i_blocks;
767
768         if (request_mask & STATX_BTIME) {
769                 stat->result_mask |= STATX_BTIME;
770                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
771         }
772
773         if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
774                 stat->attributes |= STATX_ATTR_IMMUTABLE;
775         stat->attributes_mask    |= STATX_ATTR_IMMUTABLE;
776
777         if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
778                 stat->attributes |= STATX_ATTR_APPEND;
779         stat->attributes_mask    |= STATX_ATTR_APPEND;
780
781         if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
782                 stat->attributes |= STATX_ATTR_NODUMP;
783         stat->attributes_mask    |= STATX_ATTR_NODUMP;
784
785         return 0;
786 }
787
788 static int bch2_setattr(struct mnt_idmap *idmap,
789                         struct dentry *dentry, struct iattr *iattr)
790 {
791         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
792         int ret;
793
794         lockdep_assert_held(&inode->v.i_rwsem);
795
796         ret = setattr_prepare(idmap, dentry, iattr);
797         if (ret)
798                 return ret;
799
800         return iattr->ia_valid & ATTR_SIZE
801                 ? bch2_truncate(idmap, inode, iattr)
802                 : bch2_setattr_nonsize(idmap, inode, iattr);
803 }
804
805 static int bch2_tmpfile(struct mnt_idmap *idmap,
806                         struct inode *vdir, struct file *file, umode_t mode)
807 {
808         struct bch_inode_info *inode =
809                 __bch2_create(idmap, to_bch_ei(vdir),
810                               file->f_path.dentry, mode, 0,
811                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
812
813         if (IS_ERR(inode))
814                 return bch2_err_class(PTR_ERR(inode));
815
816         d_mark_tmpfile(file, &inode->v);
817         d_instantiate(file->f_path.dentry, &inode->v);
818         return finish_open_simple(file, 0);
819 }
820
821 static int bch2_fill_extent(struct bch_fs *c,
822                             struct fiemap_extent_info *info,
823                             struct bkey_s_c k, unsigned flags)
824 {
825         if (bkey_extent_is_direct_data(k.k)) {
826                 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
827                 const union bch_extent_entry *entry;
828                 struct extent_ptr_decoded p;
829                 int ret;
830
831                 if (k.k->type == KEY_TYPE_reflink_v)
832                         flags |= FIEMAP_EXTENT_SHARED;
833
834                 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
835                         int flags2 = 0;
836                         u64 offset = p.ptr.offset;
837
838                         if (p.ptr.unwritten)
839                                 flags2 |= FIEMAP_EXTENT_UNWRITTEN;
840
841                         if (p.crc.compression_type)
842                                 flags2 |= FIEMAP_EXTENT_ENCODED;
843                         else
844                                 offset += p.crc.offset;
845
846                         if ((offset & (block_sectors(c) - 1)) ||
847                             (k.k->size & (block_sectors(c) - 1)))
848                                 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
849
850                         ret = fiemap_fill_next_extent(info,
851                                                 bkey_start_offset(k.k) << 9,
852                                                 offset << 9,
853                                                 k.k->size << 9, flags|flags2);
854                         if (ret)
855                                 return ret;
856                 }
857
858                 return 0;
859         } else if (bkey_extent_is_inline_data(k.k)) {
860                 return fiemap_fill_next_extent(info,
861                                                bkey_start_offset(k.k) << 9,
862                                                0, k.k->size << 9,
863                                                flags|
864                                                FIEMAP_EXTENT_DATA_INLINE);
865         } else if (k.k->type == KEY_TYPE_reservation) {
866                 return fiemap_fill_next_extent(info,
867                                                bkey_start_offset(k.k) << 9,
868                                                0, k.k->size << 9,
869                                                flags|
870                                                FIEMAP_EXTENT_DELALLOC|
871                                                FIEMAP_EXTENT_UNWRITTEN);
872         } else {
873                 BUG();
874         }
875 }
876
877 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
878                        u64 start, u64 len)
879 {
880         struct bch_fs *c = vinode->i_sb->s_fs_info;
881         struct bch_inode_info *ei = to_bch_ei(vinode);
882         struct btree_trans trans;
883         struct btree_iter iter;
884         struct bkey_s_c k;
885         struct bkey_buf cur, prev;
886         struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
887         unsigned offset_into_extent, sectors;
888         bool have_extent = false;
889         u32 snapshot;
890         int ret = 0;
891
892         ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
893         if (ret)
894                 return ret;
895
896         if (start + len < start)
897                 return -EINVAL;
898
899         start >>= 9;
900
901         bch2_bkey_buf_init(&cur);
902         bch2_bkey_buf_init(&prev);
903         bch2_trans_init(&trans, c, 0, 0);
904 retry:
905         bch2_trans_begin(&trans);
906
907         ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot);
908         if (ret)
909                 goto err;
910
911         bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
912                              SPOS(ei->v.i_ino, start, snapshot), 0);
913
914         while (!(ret = btree_trans_too_many_iters(&trans)) &&
915                (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
916                !(ret = bkey_err(k))) {
917                 enum btree_id data_btree = BTREE_ID_extents;
918
919                 if (!bkey_extent_is_data(k.k) &&
920                     k.k->type != KEY_TYPE_reservation) {
921                         bch2_btree_iter_advance(&iter);
922                         continue;
923                 }
924
925                 offset_into_extent      = iter.pos.offset -
926                         bkey_start_offset(k.k);
927                 sectors                 = k.k->size - offset_into_extent;
928
929                 bch2_bkey_buf_reassemble(&cur, c, k);
930
931                 ret = bch2_read_indirect_extent(&trans, &data_btree,
932                                         &offset_into_extent, &cur);
933                 if (ret)
934                         break;
935
936                 k = bkey_i_to_s_c(cur.k);
937                 bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
938
939                 sectors = min(sectors, k.k->size - offset_into_extent);
940
941                 bch2_cut_front(POS(k.k->p.inode,
942                                    bkey_start_offset(k.k) +
943                                    offset_into_extent),
944                                cur.k);
945                 bch2_key_resize(&cur.k->k, sectors);
946                 cur.k->k.p = iter.pos;
947                 cur.k->k.p.offset += cur.k->k.size;
948
949                 if (have_extent) {
950                         bch2_trans_unlock(&trans);
951                         ret = bch2_fill_extent(c, info,
952                                         bkey_i_to_s_c(prev.k), 0);
953                         if (ret)
954                                 break;
955                 }
956
957                 bkey_copy(prev.k, cur.k);
958                 have_extent = true;
959
960                 bch2_btree_iter_set_pos(&iter,
961                         POS(iter.pos.inode, iter.pos.offset + sectors));
962         }
963         start = iter.pos.offset;
964         bch2_trans_iter_exit(&trans, &iter);
965 err:
966         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
967                 goto retry;
968
969         if (!ret && have_extent) {
970                 bch2_trans_unlock(&trans);
971                 ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
972                                        FIEMAP_EXTENT_LAST);
973         }
974
975         bch2_trans_exit(&trans);
976         bch2_bkey_buf_exit(&cur, c);
977         bch2_bkey_buf_exit(&prev, c);
978         return ret < 0 ? ret : 0;
979 }
980
981 static const struct vm_operations_struct bch_vm_ops = {
982         .fault          = bch2_page_fault,
983         .map_pages      = filemap_map_pages,
984         .page_mkwrite   = bch2_page_mkwrite,
985 };
986
987 static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
988 {
989         file_accessed(file);
990
991         vma->vm_ops = &bch_vm_ops;
992         return 0;
993 }
994
995 /* Directories: */
996
997 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
998 {
999         return generic_file_llseek_size(file, offset, whence,
1000                                         S64_MAX, S64_MAX);
1001 }
1002
1003 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
1004 {
1005         struct bch_inode_info *inode = file_bch_inode(file);
1006         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1007         int ret;
1008
1009         if (!dir_emit_dots(file, ctx))
1010                 return 0;
1011
1012         ret = bch2_readdir(c, inode_inum(inode), ctx);
1013         if (ret)
1014                 bch_err_fn(c, ret);
1015
1016         return bch2_err_class(ret);
1017 }
1018
1019 static const struct file_operations bch_file_operations = {
1020         .llseek         = bch2_llseek,
1021         .read_iter      = bch2_read_iter,
1022         .write_iter     = bch2_write_iter,
1023         .mmap           = bch2_mmap,
1024         .open           = generic_file_open,
1025         .fsync          = bch2_fsync,
1026         .splice_read    = filemap_splice_read,
1027         .splice_write   = iter_file_splice_write,
1028         .fallocate      = bch2_fallocate_dispatch,
1029         .unlocked_ioctl = bch2_fs_file_ioctl,
1030 #ifdef CONFIG_COMPAT
1031         .compat_ioctl   = bch2_compat_fs_ioctl,
1032 #endif
1033         .remap_file_range = bch2_remap_file_range,
1034 };
1035
1036 static const struct inode_operations bch_file_inode_operations = {
1037         .getattr        = bch2_getattr,
1038         .setattr        = bch2_setattr,
1039         .fiemap         = bch2_fiemap,
1040         .listxattr      = bch2_xattr_list,
1041 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1042         .get_acl        = bch2_get_acl,
1043         .set_acl        = bch2_set_acl,
1044 #endif
1045 };
1046
1047 static const struct inode_operations bch_dir_inode_operations = {
1048         .lookup         = bch2_lookup,
1049         .create         = bch2_create,
1050         .link           = bch2_link,
1051         .unlink         = bch2_unlink,
1052         .symlink        = bch2_symlink,
1053         .mkdir          = bch2_mkdir,
1054         .rmdir          = bch2_unlink,
1055         .mknod          = bch2_mknod,
1056         .rename         = bch2_rename2,
1057         .getattr        = bch2_getattr,
1058         .setattr        = bch2_setattr,
1059         .tmpfile        = bch2_tmpfile,
1060         .listxattr      = bch2_xattr_list,
1061 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1062         .get_acl        = bch2_get_acl,
1063         .set_acl        = bch2_set_acl,
1064 #endif
1065 };
1066
1067 static const struct file_operations bch_dir_file_operations = {
1068         .llseek         = bch2_dir_llseek,
1069         .read           = generic_read_dir,
1070         .iterate_shared = bch2_vfs_readdir,
1071         .fsync          = bch2_fsync,
1072         .unlocked_ioctl = bch2_fs_file_ioctl,
1073 #ifdef CONFIG_COMPAT
1074         .compat_ioctl   = bch2_compat_fs_ioctl,
1075 #endif
1076 };
1077
1078 static const struct inode_operations bch_symlink_inode_operations = {
1079         .get_link       = page_get_link,
1080         .getattr        = bch2_getattr,
1081         .setattr        = bch2_setattr,
1082         .listxattr      = bch2_xattr_list,
1083 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1084         .get_acl        = bch2_get_acl,
1085         .set_acl        = bch2_set_acl,
1086 #endif
1087 };
1088
1089 static const struct inode_operations bch_special_inode_operations = {
1090         .getattr        = bch2_getattr,
1091         .setattr        = bch2_setattr,
1092         .listxattr      = bch2_xattr_list,
1093 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1094         .get_acl        = bch2_get_acl,
1095         .set_acl        = bch2_set_acl,
1096 #endif
1097 };
1098
1099 static const struct address_space_operations bch_address_space_operations = {
1100         .read_folio     = bch2_read_folio,
1101         .writepages     = bch2_writepages,
1102         .readahead      = bch2_readahead,
1103         .dirty_folio    = filemap_dirty_folio,
1104         .write_begin    = bch2_write_begin,
1105         .write_end      = bch2_write_end,
1106         .invalidate_folio = bch2_invalidate_folio,
1107         .release_folio  = bch2_release_folio,
1108         .direct_IO      = noop_direct_IO,
1109 #ifdef CONFIG_MIGRATION
1110         .migrate_folio  = filemap_migrate_folio,
1111 #endif
1112         .error_remove_page = generic_error_remove_page,
1113 };
1114
1115 struct bcachefs_fid {
1116         u64             inum;
1117         u32             subvol;
1118         u32             gen;
1119 } __packed;
1120
1121 struct bcachefs_fid_with_parent {
1122         struct bcachefs_fid     fid;
1123         struct bcachefs_fid     dir;
1124 } __packed;
1125
1126 static int bcachefs_fid_valid(int fh_len, int fh_type)
1127 {
1128         switch (fh_type) {
1129         case FILEID_BCACHEFS_WITHOUT_PARENT:
1130                 return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1131         case FILEID_BCACHEFS_WITH_PARENT:
1132                 return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1133         default:
1134                 return false;
1135         }
1136 }
1137
1138 static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1139 {
1140         return (struct bcachefs_fid) {
1141                 .inum   = inode->ei_inode.bi_inum,
1142                 .subvol = inode->ei_subvol,
1143                 .gen    = inode->ei_inode.bi_generation,
1144         };
1145 }
1146
1147 static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1148                           struct inode *vdir)
1149 {
1150         struct bch_inode_info *inode    = to_bch_ei(vinode);
1151         struct bch_inode_info *dir      = to_bch_ei(vdir);
1152
1153         if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32))
1154                 return FILEID_INVALID;
1155
1156         if (!S_ISDIR(inode->v.i_mode) && dir) {
1157                 struct bcachefs_fid_with_parent *fid = (void *) fh;
1158
1159                 fid->fid = bch2_inode_to_fid(inode);
1160                 fid->dir = bch2_inode_to_fid(dir);
1161
1162                 *len = sizeof(*fid) / sizeof(u32);
1163                 return FILEID_BCACHEFS_WITH_PARENT;
1164         } else {
1165                 struct bcachefs_fid *fid = (void *) fh;
1166
1167                 *fid = bch2_inode_to_fid(inode);
1168
1169                 *len = sizeof(*fid) / sizeof(u32);
1170                 return FILEID_BCACHEFS_WITHOUT_PARENT;
1171         }
1172 }
1173
1174 static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1175                                         struct bcachefs_fid fid)
1176 {
1177         struct bch_fs *c = sb->s_fs_info;
1178         struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1179                                     .subvol = fid.subvol,
1180                                     .inum = fid.inum,
1181         });
1182         if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1183                 iput(vinode);
1184                 vinode = ERR_PTR(-ESTALE);
1185         }
1186         return vinode;
1187 }
1188
1189 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1190                 int fh_len, int fh_type)
1191 {
1192         struct bcachefs_fid *fid = (void *) _fid;
1193
1194         if (!bcachefs_fid_valid(fh_len, fh_type))
1195                 return NULL;
1196
1197         return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1198 }
1199
1200 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1201                 int fh_len, int fh_type)
1202 {
1203         struct bcachefs_fid_with_parent *fid = (void *) _fid;
1204
1205         if (!bcachefs_fid_valid(fh_len, fh_type) ||
1206             fh_type != FILEID_BCACHEFS_WITH_PARENT)
1207                 return NULL;
1208
1209         return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1210 }
1211
1212 static struct dentry *bch2_get_parent(struct dentry *child)
1213 {
1214         struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1215         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1216         subvol_inum parent_inum = {
1217                 .subvol = inode->ei_inode.bi_parent_subvol ?:
1218                         inode->ei_subvol,
1219                 .inum = inode->ei_inode.bi_dir,
1220         };
1221
1222         if (!parent_inum.inum)
1223                 return NULL;
1224
1225         return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1226 }
1227
1228 static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1229 {
1230         struct bch_inode_info *inode    = to_bch_ei(child->d_inode);
1231         struct bch_inode_info *dir      = to_bch_ei(parent->d_inode);
1232         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1233         struct btree_trans trans;
1234         struct btree_iter iter1;
1235         struct btree_iter iter2;
1236         struct bkey_s_c k;
1237         struct bkey_s_c_dirent d;
1238         struct bch_inode_unpacked inode_u;
1239         subvol_inum target;
1240         u32 snapshot;
1241         struct qstr dirent_name;
1242         unsigned name_len = 0;
1243         int ret;
1244
1245         if (!S_ISDIR(dir->v.i_mode))
1246                 return -EINVAL;
1247
1248         bch2_trans_init(&trans, c, 0, 0);
1249
1250         bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents,
1251                              POS(dir->ei_inode.bi_inum, 0), 0);
1252         bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents,
1253                              POS(dir->ei_inode.bi_inum, 0), 0);
1254 retry:
1255         bch2_trans_begin(&trans);
1256
1257         ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot);
1258         if (ret)
1259                 goto err;
1260
1261         bch2_btree_iter_set_snapshot(&iter1, snapshot);
1262         bch2_btree_iter_set_snapshot(&iter2, snapshot);
1263
1264         ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u);
1265         if (ret)
1266                 goto err;
1267
1268         if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1269                 bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1270
1271                 k = bch2_btree_iter_peek_slot(&iter1);
1272                 ret = bkey_err(k);
1273                 if (ret)
1274                         goto err;
1275
1276                 if (k.k->type != KEY_TYPE_dirent) {
1277                         ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1278                         goto err;
1279                 }
1280
1281                 d = bkey_s_c_to_dirent(k);
1282                 ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
1283                 if (ret > 0)
1284                         ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1285                 if (ret)
1286                         goto err;
1287
1288                 if (target.subvol       == inode->ei_subvol &&
1289                     target.inum         == inode->ei_inode.bi_inum)
1290                         goto found;
1291         } else {
1292                 /*
1293                  * File with multiple hardlinks and our backref is to the wrong
1294                  * directory - linear search:
1295                  */
1296                 for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1297                         if (k.k->p.inode > dir->ei_inode.bi_inum)
1298                                 break;
1299
1300                         if (k.k->type != KEY_TYPE_dirent)
1301                                 continue;
1302
1303                         d = bkey_s_c_to_dirent(k);
1304                         ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
1305                         if (ret < 0)
1306                                 break;
1307                         if (ret)
1308                                 continue;
1309
1310                         if (target.subvol       == inode->ei_subvol &&
1311                             target.inum         == inode->ei_inode.bi_inum)
1312                                 goto found;
1313                 }
1314         }
1315
1316         ret = -ENOENT;
1317         goto err;
1318 found:
1319         dirent_name = bch2_dirent_get_name(d);
1320
1321         name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
1322         memcpy(name, dirent_name.name, name_len);
1323         name[name_len] = '\0';
1324 err:
1325         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1326                 goto retry;
1327
1328         bch2_trans_iter_exit(&trans, &iter1);
1329         bch2_trans_iter_exit(&trans, &iter2);
1330         bch2_trans_exit(&trans);
1331
1332         return ret;
1333 }
1334
1335 static const struct export_operations bch_export_ops = {
1336         .encode_fh      = bch2_encode_fh,
1337         .fh_to_dentry   = bch2_fh_to_dentry,
1338         .fh_to_parent   = bch2_fh_to_parent,
1339         .get_parent     = bch2_get_parent,
1340         .get_name       = bch2_get_name,
1341 };
1342
1343 static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
1344                                 struct bch_inode_info *inode,
1345                                 struct bch_inode_unpacked *bi,
1346                                 struct bch_subvolume *subvol)
1347 {
1348         bch2_inode_update_after_write(trans, inode, bi, ~0);
1349
1350         if (BCH_SUBVOLUME_SNAP(subvol))
1351                 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1352         else
1353                 clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1354
1355         inode->v.i_blocks       = bi->bi_sectors;
1356         inode->v.i_ino          = bi->bi_inum;
1357         inode->v.i_rdev         = bi->bi_dev;
1358         inode->v.i_generation   = bi->bi_generation;
1359         inode->v.i_size         = bi->bi_size;
1360
1361         inode->ei_flags         = 0;
1362         inode->ei_quota_reserved = 0;
1363         inode->ei_qid           = bch_qid(bi);
1364         inode->ei_subvol        = inum.subvol;
1365
1366         inode->v.i_mapping->a_ops = &bch_address_space_operations;
1367
1368         switch (inode->v.i_mode & S_IFMT) {
1369         case S_IFREG:
1370                 inode->v.i_op   = &bch_file_inode_operations;
1371                 inode->v.i_fop  = &bch_file_operations;
1372                 break;
1373         case S_IFDIR:
1374                 inode->v.i_op   = &bch_dir_inode_operations;
1375                 inode->v.i_fop  = &bch_dir_file_operations;
1376                 break;
1377         case S_IFLNK:
1378                 inode_nohighmem(&inode->v);
1379                 inode->v.i_op   = &bch_symlink_inode_operations;
1380                 break;
1381         default:
1382                 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1383                 inode->v.i_op   = &bch_special_inode_operations;
1384                 break;
1385         }
1386
1387         mapping_set_large_folios(inode->v.i_mapping);
1388 }
1389
1390 static struct inode *bch2_alloc_inode(struct super_block *sb)
1391 {
1392         struct bch_inode_info *inode;
1393
1394         inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
1395         if (!inode)
1396                 return NULL;
1397
1398         inode_init_once(&inode->v);
1399         mutex_init(&inode->ei_update_lock);
1400         two_state_lock_init(&inode->ei_pagecache_lock);
1401         INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
1402         mutex_init(&inode->ei_quota_lock);
1403
1404         return &inode->v;
1405 }
1406
1407 static void bch2_i_callback(struct rcu_head *head)
1408 {
1409         struct inode *vinode = container_of(head, struct inode, i_rcu);
1410         struct bch_inode_info *inode = to_bch_ei(vinode);
1411
1412         kmem_cache_free(bch2_inode_cache, inode);
1413 }
1414
1415 static void bch2_destroy_inode(struct inode *vinode)
1416 {
1417         call_rcu(&vinode->i_rcu, bch2_i_callback);
1418 }
1419
1420 static int inode_update_times_fn(struct btree_trans *trans,
1421                                  struct bch_inode_info *inode,
1422                                  struct bch_inode_unpacked *bi,
1423                                  void *p)
1424 {
1425         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1426
1427         bi->bi_atime    = timespec_to_bch2_time(c, inode->v.i_atime);
1428         bi->bi_mtime    = timespec_to_bch2_time(c, inode->v.i_mtime);
1429         bi->bi_ctime    = timespec_to_bch2_time(c, inode->v.i_ctime);
1430
1431         return 0;
1432 }
1433
1434 static int bch2_vfs_write_inode(struct inode *vinode,
1435                                 struct writeback_control *wbc)
1436 {
1437         struct bch_fs *c = vinode->i_sb->s_fs_info;
1438         struct bch_inode_info *inode = to_bch_ei(vinode);
1439         int ret;
1440
1441         mutex_lock(&inode->ei_update_lock);
1442         ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1443                                ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1444         mutex_unlock(&inode->ei_update_lock);
1445
1446         return bch2_err_class(ret);
1447 }
1448
1449 static void bch2_evict_inode(struct inode *vinode)
1450 {
1451         struct bch_fs *c = vinode->i_sb->s_fs_info;
1452         struct bch_inode_info *inode = to_bch_ei(vinode);
1453
1454         truncate_inode_pages_final(&inode->v.i_data);
1455
1456         clear_inode(&inode->v);
1457
1458         BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1459
1460         if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1461                 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
1462                                 KEY_TYPE_QUOTA_WARN);
1463                 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
1464                                 KEY_TYPE_QUOTA_WARN);
1465                 bch2_inode_rm(c, inode_inum(inode));
1466         }
1467
1468         mutex_lock(&c->vfs_inodes_lock);
1469         list_del_init(&inode->ei_vfs_inode_list);
1470         mutex_unlock(&c->vfs_inodes_lock);
1471 }
1472
1473 void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
1474 {
1475         struct bch_inode_info *inode, **i;
1476         DARRAY(struct bch_inode_info *) grabbed;
1477         bool clean_pass = false, this_pass_clean;
1478
1479         /*
1480          * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1481          * be pruned with d_mark_dontcache().
1482          *
1483          * Once we've had a clean pass where we didn't find any inodes without
1484          * I_DONTCACHE, we wait for them to be freed:
1485          */
1486
1487         darray_init(&grabbed);
1488         darray_make_room(&grabbed, 1024);
1489 again:
1490         cond_resched();
1491         this_pass_clean = true;
1492
1493         mutex_lock(&c->vfs_inodes_lock);
1494         list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
1495                 if (!snapshot_list_has_id(s, inode->ei_subvol))
1496                         continue;
1497
1498                 if (!(inode->v.i_state & I_DONTCACHE) &&
1499                     !(inode->v.i_state & I_FREEING) &&
1500                     igrab(&inode->v)) {
1501                         this_pass_clean = false;
1502
1503                         if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
1504                                 iput(&inode->v);
1505                                 break;
1506                         }
1507                 } else if (clean_pass && this_pass_clean) {
1508                         wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
1509                         DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
1510
1511                         prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1512                         mutex_unlock(&c->vfs_inodes_lock);
1513
1514                         schedule();
1515                         finish_wait(wq, &wait.wq_entry);
1516                         goto again;
1517                 }
1518         }
1519         mutex_unlock(&c->vfs_inodes_lock);
1520
1521         darray_for_each(grabbed, i) {
1522                 inode = *i;
1523                 d_mark_dontcache(&inode->v);
1524                 d_prune_aliases(&inode->v);
1525                 iput(&inode->v);
1526         }
1527         grabbed.nr = 0;
1528
1529         if (!clean_pass || !this_pass_clean) {
1530                 clean_pass = this_pass_clean;
1531                 goto again;
1532         }
1533
1534         darray_exit(&grabbed);
1535 }
1536
1537 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1538 {
1539         struct super_block *sb = dentry->d_sb;
1540         struct bch_fs *c = sb->s_fs_info;
1541         struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
1542         unsigned shift = sb->s_blocksize_bits - 9;
1543         /*
1544          * this assumes inodes take up 64 bytes, which is a decent average
1545          * number:
1546          */
1547         u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1548         u64 fsid;
1549
1550         buf->f_type     = BCACHEFS_STATFS_MAGIC;
1551         buf->f_bsize    = sb->s_blocksize;
1552         buf->f_blocks   = usage.capacity >> shift;
1553         buf->f_bfree    = usage.free >> shift;
1554         buf->f_bavail   = avail_factor(usage.free) >> shift;
1555
1556         buf->f_files    = usage.nr_inodes + avail_inodes;
1557         buf->f_ffree    = avail_inodes;
1558
1559         fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
1560                le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
1561         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1562         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1563         buf->f_namelen  = BCH_NAME_MAX;
1564
1565         return 0;
1566 }
1567
1568 static int bch2_sync_fs(struct super_block *sb, int wait)
1569 {
1570         struct bch_fs *c = sb->s_fs_info;
1571         int ret;
1572
1573         if (c->opts.journal_flush_disabled)
1574                 return 0;
1575
1576         if (!wait) {
1577                 bch2_journal_flush_async(&c->journal, NULL);
1578                 return 0;
1579         }
1580
1581         ret = bch2_journal_flush(&c->journal);
1582         return bch2_err_class(ret);
1583 }
1584
1585 static struct bch_fs *bch2_path_to_fs(const char *path)
1586 {
1587         struct bch_fs *c;
1588         dev_t dev;
1589         int ret;
1590
1591         ret = lookup_bdev(path, &dev);
1592         if (ret)
1593                 return ERR_PTR(ret);
1594
1595         c = bch2_dev_to_fs(dev);
1596         if (c)
1597                 closure_put(&c->cl);
1598         return c ?: ERR_PTR(-ENOENT);
1599 }
1600
1601 static char **split_devs(const char *_dev_name, unsigned *nr)
1602 {
1603         char *dev_name = NULL, **devs = NULL, *s;
1604         size_t i, nr_devs = 0;
1605
1606         dev_name = kstrdup(_dev_name, GFP_KERNEL);
1607         if (!dev_name)
1608                 return NULL;
1609
1610         for (s = dev_name; s; s = strchr(s + 1, ':'))
1611                 nr_devs++;
1612
1613         devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
1614         if (!devs) {
1615                 kfree(dev_name);
1616                 return NULL;
1617         }
1618
1619         for (i = 0, s = dev_name;
1620              s;
1621              (s = strchr(s, ':')) && (*s++ = '\0'))
1622                 devs[i++] = s;
1623
1624         *nr = nr_devs;
1625         return devs;
1626 }
1627
1628 static int bch2_remount(struct super_block *sb, int *flags, char *data)
1629 {
1630         struct bch_fs *c = sb->s_fs_info;
1631         struct bch_opts opts = bch2_opts_empty();
1632         int ret;
1633
1634         opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
1635
1636         ret = bch2_parse_mount_opts(c, &opts, data);
1637         if (ret)
1638                 goto err;
1639
1640         if (opts.read_only != c->opts.read_only) {
1641                 down_write(&c->state_lock);
1642
1643                 if (opts.read_only) {
1644                         bch2_fs_read_only(c);
1645
1646                         sb->s_flags |= SB_RDONLY;
1647                 } else {
1648                         ret = bch2_fs_read_write(c);
1649                         if (ret) {
1650                                 bch_err(c, "error going rw: %i", ret);
1651                                 up_write(&c->state_lock);
1652                                 ret = -EINVAL;
1653                                 goto err;
1654                         }
1655
1656                         sb->s_flags &= ~SB_RDONLY;
1657                 }
1658
1659                 c->opts.read_only = opts.read_only;
1660
1661                 up_write(&c->state_lock);
1662         }
1663
1664         if (opts.errors >= 0)
1665                 c->opts.errors = opts.errors;
1666 err:
1667         return bch2_err_class(ret);
1668 }
1669
1670 static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
1671 {
1672         struct bch_fs *c = root->d_sb->s_fs_info;
1673         struct bch_dev *ca;
1674         unsigned i;
1675         bool first = true;
1676
1677         for_each_online_member(ca, c, i) {
1678                 if (!first)
1679                         seq_putc(seq, ':');
1680                 first = false;
1681                 seq_puts(seq, "/dev/");
1682                 seq_puts(seq, ca->name);
1683         }
1684
1685         return 0;
1686 }
1687
1688 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1689 {
1690         struct bch_fs *c = root->d_sb->s_fs_info;
1691         enum bch_opt_id i;
1692         struct printbuf buf = PRINTBUF;
1693         int ret = 0;
1694
1695         for (i = 0; i < bch2_opts_nr; i++) {
1696                 const struct bch_option *opt = &bch2_opt_table[i];
1697                 u64 v = bch2_opt_get_by_id(&c->opts, i);
1698
1699                 if (!(opt->flags & OPT_MOUNT))
1700                         continue;
1701
1702                 if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1703                         continue;
1704
1705                 printbuf_reset(&buf);
1706                 bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
1707                                  OPT_SHOW_MOUNT_STYLE);
1708                 seq_putc(seq, ',');
1709                 seq_puts(seq, buf.buf);
1710         }
1711
1712         if (buf.allocation_failure)
1713                 ret = -ENOMEM;
1714         printbuf_exit(&buf);
1715         return ret;
1716 }
1717
1718 static void bch2_put_super(struct super_block *sb)
1719 {
1720         struct bch_fs *c = sb->s_fs_info;
1721
1722         __bch2_fs_stop(c);
1723 }
1724
1725 static const struct super_operations bch_super_operations = {
1726         .alloc_inode    = bch2_alloc_inode,
1727         .destroy_inode  = bch2_destroy_inode,
1728         .write_inode    = bch2_vfs_write_inode,
1729         .evict_inode    = bch2_evict_inode,
1730         .sync_fs        = bch2_sync_fs,
1731         .statfs         = bch2_statfs,
1732         .show_devname   = bch2_show_devname,
1733         .show_options   = bch2_show_options,
1734         .remount_fs     = bch2_remount,
1735         .put_super      = bch2_put_super,
1736 #if 0
1737         .freeze_fs      = bch2_freeze,
1738         .unfreeze_fs    = bch2_unfreeze,
1739 #endif
1740 };
1741
1742 static int bch2_set_super(struct super_block *s, void *data)
1743 {
1744         s->s_fs_info = data;
1745         return 0;
1746 }
1747
1748 static int bch2_noset_super(struct super_block *s, void *data)
1749 {
1750         return -EBUSY;
1751 }
1752
1753 static int bch2_test_super(struct super_block *s, void *data)
1754 {
1755         struct bch_fs *c = s->s_fs_info;
1756         struct bch_fs **devs = data;
1757         unsigned i;
1758
1759         if (!c)
1760                 return false;
1761
1762         for (i = 0; devs[i]; i++)
1763                 if (c != devs[i])
1764                         return false;
1765         return true;
1766 }
1767
1768 static struct dentry *bch2_mount(struct file_system_type *fs_type,
1769                                  int flags, const char *dev_name, void *data)
1770 {
1771         struct bch_fs *c;
1772         struct bch_dev *ca;
1773         struct super_block *sb;
1774         struct inode *vinode;
1775         struct bch_opts opts = bch2_opts_empty();
1776         char **devs;
1777         struct bch_fs **devs_to_fs = NULL;
1778         unsigned i, nr_devs;
1779         int ret;
1780
1781         opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
1782
1783         ret = bch2_parse_mount_opts(NULL, &opts, data);
1784         if (ret)
1785                 return ERR_PTR(ret);
1786
1787         if (!dev_name || strlen(dev_name) == 0)
1788                 return ERR_PTR(-EINVAL);
1789
1790         devs = split_devs(dev_name, &nr_devs);
1791         if (!devs)
1792                 return ERR_PTR(-ENOMEM);
1793
1794         devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
1795         if (!devs_to_fs) {
1796                 sb = ERR_PTR(-ENOMEM);
1797                 goto got_sb;
1798         }
1799
1800         for (i = 0; i < nr_devs; i++)
1801                 devs_to_fs[i] = bch2_path_to_fs(devs[i]);
1802
1803         sb = sget(fs_type, bch2_test_super, bch2_noset_super,
1804                   flags|SB_NOSEC, devs_to_fs);
1805         if (!IS_ERR(sb))
1806                 goto got_sb;
1807
1808         c = bch2_fs_open(devs, nr_devs, opts);
1809         if (IS_ERR(c)) {
1810                 sb = ERR_CAST(c);
1811                 goto got_sb;
1812         }
1813
1814         /* Some options can't be parsed until after the fs is started: */
1815         ret = bch2_parse_mount_opts(c, &opts, data);
1816         if (ret) {
1817                 bch2_fs_stop(c);
1818                 sb = ERR_PTR(ret);
1819                 goto got_sb;
1820         }
1821
1822         bch2_opts_apply(&c->opts, opts);
1823
1824         sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
1825         if (IS_ERR(sb))
1826                 bch2_fs_stop(c);
1827 got_sb:
1828         kfree(devs_to_fs);
1829         kfree(devs[0]);
1830         kfree(devs);
1831
1832         if (IS_ERR(sb)) {
1833                 ret = PTR_ERR(sb);
1834                 ret = bch2_err_class(ret);
1835                 return ERR_PTR(ret);
1836         }
1837
1838         c = sb->s_fs_info;
1839
1840         if (sb->s_root) {
1841                 if ((flags ^ sb->s_flags) & SB_RDONLY) {
1842                         ret = -EBUSY;
1843                         goto err_put_super;
1844                 }
1845                 goto out;
1846         }
1847
1848         sb->s_blocksize         = block_bytes(c);
1849         sb->s_blocksize_bits    = ilog2(block_bytes(c));
1850         sb->s_maxbytes          = MAX_LFS_FILESIZE;
1851         sb->s_op                = &bch_super_operations;
1852         sb->s_export_op         = &bch_export_ops;
1853 #ifdef CONFIG_BCACHEFS_QUOTA
1854         sb->s_qcop              = &bch2_quotactl_operations;
1855         sb->s_quota_types       = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
1856 #endif
1857         sb->s_xattr             = bch2_xattr_handlers;
1858         sb->s_magic             = BCACHEFS_STATFS_MAGIC;
1859         sb->s_time_gran         = c->sb.nsec_per_time_unit;
1860         sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
1861         sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
1862         c->vfs_sb               = sb;
1863         strscpy(sb->s_id, c->name, sizeof(sb->s_id));
1864
1865         ret = super_setup_bdi(sb);
1866         if (ret)
1867                 goto err_put_super;
1868
1869         sb->s_bdi->ra_pages             = VM_READAHEAD_PAGES;
1870
1871         for_each_online_member(ca, c, i) {
1872                 struct block_device *bdev = ca->disk_sb.bdev;
1873
1874                 /* XXX: create an anonymous device for multi device filesystems */
1875                 sb->s_bdev      = bdev;
1876                 sb->s_dev       = bdev->bd_dev;
1877                 percpu_ref_put(&ca->io_ref);
1878                 break;
1879         }
1880
1881         c->dev = sb->s_dev;
1882
1883 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1884         if (c->opts.acl)
1885                 sb->s_flags     |= SB_POSIXACL;
1886 #endif
1887
1888         sb->s_shrink.seeks = 0;
1889
1890         vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
1891         ret = PTR_ERR_OR_ZERO(vinode);
1892         if (ret) {
1893                 bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
1894                 goto err_put_super;
1895         }
1896
1897         sb->s_root = d_make_root(vinode);
1898         if (!sb->s_root) {
1899                 bch_err(c, "error mounting: error allocating root dentry");
1900                 ret = -ENOMEM;
1901                 goto err_put_super;
1902         }
1903
1904         sb->s_flags |= SB_ACTIVE;
1905 out:
1906         return dget(sb->s_root);
1907
1908 err_put_super:
1909         sb->s_fs_info = NULL;
1910         c->vfs_sb = NULL;
1911         deactivate_locked_super(sb);
1912         bch2_fs_stop(c);
1913         return ERR_PTR(bch2_err_class(ret));
1914 }
1915
1916 static void bch2_kill_sb(struct super_block *sb)
1917 {
1918         struct bch_fs *c = sb->s_fs_info;
1919
1920         if (c)
1921                 c->vfs_sb = NULL;
1922         generic_shutdown_super(sb);
1923         if (c)
1924                 bch2_fs_free(c);
1925 }
1926
1927 static struct file_system_type bcache_fs_type = {
1928         .owner          = THIS_MODULE,
1929         .name           = "bcachefs",
1930         .mount          = bch2_mount,
1931         .kill_sb        = bch2_kill_sb,
1932         .fs_flags       = FS_REQUIRES_DEV,
1933 };
1934
1935 MODULE_ALIAS_FS("bcachefs");
1936
1937 void bch2_vfs_exit(void)
1938 {
1939         unregister_filesystem(&bcache_fs_type);
1940         kmem_cache_destroy(bch2_inode_cache);
1941 }
1942
1943 int __init bch2_vfs_init(void)
1944 {
1945         int ret = -ENOMEM;
1946
1947         bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
1948         if (!bch2_inode_cache)
1949                 goto err;
1950
1951         ret = register_filesystem(&bcache_fs_type);
1952         if (ret)
1953                 goto err;
1954
1955         return 0;
1956 err:
1957         bch2_vfs_exit();
1958         return ret;
1959 }
1960
1961 #endif /* NO_BCACHEFS_FS */