5 #include "btree_update.h"
20 #include <linux/aio.h>
21 #include <linux/backing-dev.h>
22 #include <linux/exportfs.h>
23 #include <linux/module.h>
24 #include <linux/posix_acl.h>
25 #include <linux/random.h>
26 #include <linux/statfs.h>
27 #include <linux/xattr.h>
29 static struct kmem_cache *bch2_inode_cache;
31 static void bch2_vfs_inode_init(struct bch_fs *,
32 struct bch_inode_info *,
33 struct bch_inode_unpacked *);
36 * I_SIZE_DIRTY requires special handling:
38 * To the recovery code, the flag means that there is stale data past i_size
39 * that needs to be deleted; it's used for implementing atomic appends and
42 * On append, we set I_SIZE_DIRTY before doing the write, then after the write
43 * we clear I_SIZE_DIRTY atomically with updating i_size to the new larger size
44 * that exposes the data we just wrote.
46 * On truncate, it's the reverse: We set I_SIZE_DIRTY atomically with setting
47 * i_size to the new smaller size, then we delete the data that we just made
48 * invisible, and then we clear I_SIZE_DIRTY.
50 * Because there can be multiple appends in flight at a time, we need a refcount
51 * (i_size_dirty_count) instead of manipulating the flag directly. Nonzero
52 * refcount means I_SIZE_DIRTY is set, zero means it's cleared.
54 * Because write_inode() can be called at any time, i_size_dirty_count means
55 * something different to the runtime code - it means to write_inode() "don't
58 * We don't clear I_SIZE_DIRTY directly, we let write_inode() clear it when
59 * i_size_dirty_count is zero - but the reverse is not true, I_SIZE_DIRTY must
63 int __must_check __bch2_write_inode(struct bch_fs *c,
64 struct bch_inode_info *inode,
68 struct btree_iter iter;
69 struct bch_inode_unpacked inode_u;
70 struct bkey_inode_buf inode_p;
71 u64 inum = inode->v.i_ino;
72 unsigned i_nlink = READ_ONCE(inode->v.i_nlink);
76 * We can't write an inode with i_nlink == 0 because it's stored biased;
77 * however, we don't need to because if i_nlink is 0 the inode is
78 * getting deleted when it's evicted.
83 lockdep_assert_held(&inode->ei_update_lock);
85 bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0),
89 struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
91 if ((ret = btree_iter_err(k)))
94 if (WARN_ONCE(k.k->type != BCH_INODE_FS,
95 "inode %llu not found when updating", inum)) {
96 bch2_btree_iter_unlock(&iter);
100 ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
102 "error %i unpacking inode %llu", ret, inum)) {
108 ret = set(inode, &inode_u, p);
113 BUG_ON(i_nlink < nlink_bias(inode->v.i_mode));
115 inode_u.bi_mode = inode->v.i_mode;
116 inode_u.bi_uid = i_uid_read(&inode->v);
117 inode_u.bi_gid = i_gid_read(&inode->v);
118 inode_u.bi_nlink= i_nlink - nlink_bias(inode->v.i_mode);
119 inode_u.bi_dev = inode->v.i_rdev;
120 inode_u.bi_atime= timespec_to_bch2_time(c, inode->v.i_atime);
121 inode_u.bi_mtime= timespec_to_bch2_time(c, inode->v.i_mtime);
122 inode_u.bi_ctime= timespec_to_bch2_time(c, inode->v.i_ctime);
124 bch2_inode_pack(&inode_p, &inode_u);
126 ret = bch2_btree_insert_at(c, NULL, NULL,
127 &inode->ei_journal_seq,
130 BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i));
131 } while (ret == -EINTR);
134 inode->ei_size = inode_u.bi_size;
135 inode->ei_flags = inode_u.bi_flags;
138 bch2_btree_iter_unlock(&iter);
140 return ret < 0 ? ret : 0;
143 int __must_check bch2_write_inode(struct bch_fs *c,
144 struct bch_inode_info *inode)
146 return __bch2_write_inode(c, inode, NULL, NULL);
149 int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *inode)
153 mutex_lock(&inode->ei_update_lock);
154 inc_nlink(&inode->v);
155 ret = bch2_write_inode(c, inode);
156 mutex_unlock(&inode->ei_update_lock);
161 int bch2_dec_nlink(struct bch_fs *c, struct bch_inode_info *inode)
165 mutex_lock(&inode->ei_update_lock);
166 drop_nlink(&inode->v);
167 ret = bch2_write_inode(c, inode);
168 mutex_unlock(&inode->ei_update_lock);
173 static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
175 struct bch_inode_unpacked inode_u;
176 struct bch_inode_info *inode;
179 inode = to_bch_ei(iget_locked(c->vfs_sb, inum));
180 if (unlikely(!inode))
181 return ERR_PTR(-ENOMEM);
182 if (!(inode->v.i_state & I_NEW))
185 ret = bch2_inode_find_by_inum(c, inum, &inode_u);
187 iget_failed(&inode->v);
191 bch2_vfs_inode_init(c, inode, &inode_u);
193 inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum);
195 unlock_new_inode(&inode->v);
200 static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
201 struct bch_inode_info *dir,
202 umode_t mode, dev_t rdev)
204 struct posix_acl *default_acl = NULL, *acl = NULL;
205 struct bch_inode_info *inode;
206 struct bch_inode_unpacked inode_u;
209 inode = to_bch_ei(new_inode(c->vfs_sb));
210 if (unlikely(!inode))
211 return ERR_PTR(-ENOMEM);
213 inode_init_owner(&inode->v, &dir->v, mode);
215 #ifdef CONFIG_BCACHEFS_POSIX_ACL
216 ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl);
218 make_bad_inode(&inode->v);
223 bch2_inode_init(c, &inode_u,
224 i_uid_read(&inode->v),
225 i_gid_read(&inode->v),
226 inode->v.i_mode, rdev);
227 ret = bch2_inode_create(c, &inode_u,
228 BLOCKDEV_INODE_MAX, 0,
229 &c->unused_inode_hint);
232 * indicate to bch_evict_inode that the inode was never actually
235 make_bad_inode(&inode->v);
239 bch2_vfs_inode_init(c, inode, &inode_u);
242 ret = bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT);
248 ret = bch2_set_acl(&inode->v, acl, ACL_TYPE_ACCESS);
253 insert_inode_hash(&inode->v);
254 atomic_long_inc(&c->nr_inodes);
256 posix_acl_release(default_acl);
257 posix_acl_release(acl);
260 clear_nlink(&inode->v);
262 inode = ERR_PTR(ret);
266 static int bch2_vfs_dirent_create(struct bch_fs *c,
267 struct bch_inode_info *dir,
268 u8 type, const struct qstr *name,
273 ret = bch2_dirent_create(c, dir->v.i_ino, &dir->ei_str_hash,
275 &dir->ei_journal_seq,
276 BCH_HASH_SET_MUST_CREATE);
280 dir->v.i_mtime = dir->v.i_ctime = current_fs_time(c->vfs_sb);
281 mark_inode_dirty_sync(&dir->v);
285 static int __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
286 umode_t mode, dev_t rdev)
288 struct bch_fs *c = dir->v.i_sb->s_fs_info;
289 struct bch_inode_info *inode;
292 inode = bch2_vfs_inode_create(c, dir, mode, rdev);
293 if (unlikely(IS_ERR(inode)))
294 return PTR_ERR(inode);
296 ret = bch2_vfs_dirent_create(c, dir, mode_to_type(mode),
297 &dentry->d_name, inode->v.i_ino);
299 clear_nlink(&inode->v);
304 if (dir->ei_journal_seq > inode->ei_journal_seq)
305 inode->ei_journal_seq = dir->ei_journal_seq;
307 d_instantiate(dentry, &inode->v);
313 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
316 struct bch_fs *c = vdir->i_sb->s_fs_info;
317 struct bch_inode_info *dir = to_bch_ei(vdir);
318 struct inode *vinode = NULL;
321 inum = bch2_dirent_lookup(c, dir->v.i_ino,
326 vinode = bch2_vfs_inode_get(c, inum);
328 return d_splice_alias(vinode, dentry);
331 static int bch2_create(struct inode *vdir, struct dentry *dentry,
332 umode_t mode, bool excl)
334 return __bch2_create(to_bch_ei(vdir), dentry, mode|S_IFREG, 0);
337 static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
338 struct dentry *dentry)
340 struct bch_fs *c = vdir->i_sb->s_fs_info;
341 struct bch_inode_info *dir = to_bch_ei(vdir);
342 struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
345 lockdep_assert_held(&inode->v.i_rwsem);
347 inode->v.i_ctime = current_fs_time(dir->v.i_sb);
349 ret = bch2_inc_nlink(c, inode);
355 ret = bch2_vfs_dirent_create(c, dir, mode_to_type(inode->v.i_mode),
356 &dentry->d_name, inode->v.i_ino);
358 bch2_dec_nlink(c, inode);
363 d_instantiate(dentry, &inode->v);
367 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
369 struct bch_fs *c = vdir->i_sb->s_fs_info;
370 struct bch_inode_info *dir = to_bch_ei(vdir);
371 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
374 lockdep_assert_held(&inode->v.i_rwsem);
376 ret = bch2_dirent_delete(c, dir->v.i_ino, &dir->ei_str_hash,
377 &dentry->d_name, &dir->ei_journal_seq);
381 if (dir->ei_journal_seq > inode->ei_journal_seq)
382 inode->ei_journal_seq = dir->ei_journal_seq;
384 inode->v.i_ctime = dir->v.i_ctime;
386 if (S_ISDIR(inode->v.i_mode)) {
387 bch2_dec_nlink(c, dir);
388 drop_nlink(&inode->v);
391 bch2_dec_nlink(c, inode);
396 static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
399 struct bch_fs *c = vdir->i_sb->s_fs_info;
400 struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
403 inode = bch2_vfs_inode_create(c, dir, S_IFLNK|S_IRWXUGO, 0);
404 if (unlikely(IS_ERR(inode)))
405 return PTR_ERR(inode);
407 inode_lock(&inode->v);
408 ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
409 inode_unlock(&inode->v);
414 ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
419 if (dir->ei_journal_seq < inode->ei_journal_seq)
420 dir->ei_journal_seq = inode->ei_journal_seq;
422 ret = bch2_vfs_dirent_create(c, dir, DT_LNK, &dentry->d_name,
427 d_instantiate(dentry, &inode->v);
430 clear_nlink(&inode->v);
435 static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
437 struct bch_fs *c = vdir->i_sb->s_fs_info;
438 struct bch_inode_info *dir = to_bch_ei(vdir);
441 lockdep_assert_held(&dir->v.i_rwsem);
443 ret = __bch2_create(dir, dentry, mode|S_IFDIR, 0);
447 bch2_inc_nlink(c, dir);
452 static int bch2_rmdir(struct inode *vdir, struct dentry *dentry)
454 struct bch_fs *c = vdir->i_sb->s_fs_info;
456 if (bch2_empty_dir(c, dentry->d_inode->i_ino))
459 return bch2_unlink(vdir, dentry);
462 static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
463 umode_t mode, dev_t rdev)
465 return __bch2_create(to_bch_ei(vdir), dentry, mode, rdev);
468 static int bch2_rename(struct bch_fs *c,
469 struct bch_inode_info *old_dir,
470 struct dentry *old_dentry,
471 struct bch_inode_info *new_dir,
472 struct dentry *new_dentry)
474 struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode);
475 struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode);
476 struct timespec now = current_fs_time(old_dir->v.i_sb);
479 lockdep_assert_held(&old_dir->v.i_rwsem);
480 lockdep_assert_held(&new_dir->v.i_rwsem);
483 filemap_write_and_wait_range(old_inode->v.i_mapping,
486 if (new_inode && S_ISDIR(old_inode->v.i_mode)) {
487 lockdep_assert_held(&new_inode->v.i_rwsem);
489 if (!S_ISDIR(new_inode->v.i_mode))
492 if (bch2_empty_dir(c, new_inode->v.i_ino))
495 ret = bch2_dirent_rename(c,
496 old_dir, &old_dentry->d_name,
497 new_dir, &new_dentry->d_name,
498 &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE);
502 clear_nlink(&new_inode->v);
503 bch2_dec_nlink(c, old_dir);
504 } else if (new_inode) {
505 lockdep_assert_held(&new_inode->v.i_rwsem);
507 ret = bch2_dirent_rename(c,
508 old_dir, &old_dentry->d_name,
509 new_dir, &new_dentry->d_name,
510 &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE);
514 new_inode->v.i_ctime = now;
515 bch2_dec_nlink(c, new_inode);
516 } else if (S_ISDIR(old_inode->v.i_mode)) {
517 ret = bch2_dirent_rename(c,
518 old_dir, &old_dentry->d_name,
519 new_dir, &new_dentry->d_name,
520 &old_inode->ei_journal_seq, BCH_RENAME);
524 bch2_inc_nlink(c, new_dir);
525 bch2_dec_nlink(c, old_dir);
527 ret = bch2_dirent_rename(c,
528 old_dir, &old_dentry->d_name,
529 new_dir, &new_dentry->d_name,
530 &old_inode->ei_journal_seq, BCH_RENAME);
535 old_dir->v.i_ctime = old_dir->v.i_mtime = now;
536 new_dir->v.i_ctime = new_dir->v.i_mtime = now;
537 mark_inode_dirty_sync(&old_dir->v);
538 mark_inode_dirty_sync(&new_dir->v);
540 old_inode->v.i_ctime = now;
541 mark_inode_dirty_sync(&old_inode->v);
546 static int bch2_rename_exchange(struct bch_fs *c,
547 struct bch_inode_info *old_dir,
548 struct dentry *old_dentry,
549 struct bch_inode_info *new_dir,
550 struct dentry *new_dentry)
552 struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode);
553 struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode);
554 struct timespec now = current_fs_time(old_dir->v.i_sb);
557 ret = bch2_dirent_rename(c,
558 old_dir, &old_dentry->d_name,
559 new_dir, &new_dentry->d_name,
560 &old_inode->ei_journal_seq, BCH_RENAME_EXCHANGE);
564 if (S_ISDIR(old_inode->v.i_mode) !=
565 S_ISDIR(new_inode->v.i_mode)) {
566 if (S_ISDIR(old_inode->v.i_mode)) {
567 bch2_inc_nlink(c, new_dir);
568 bch2_dec_nlink(c, old_dir);
570 bch2_dec_nlink(c, new_dir);
571 bch2_inc_nlink(c, old_dir);
575 old_dir->v.i_ctime = old_dir->v.i_mtime = now;
576 new_dir->v.i_ctime = new_dir->v.i_mtime = now;
577 mark_inode_dirty_sync(&old_dir->v);
578 mark_inode_dirty_sync(&new_dir->v);
580 old_inode->v.i_ctime = now;
581 new_inode->v.i_ctime = now;
582 mark_inode_dirty_sync(&old_inode->v);
583 mark_inode_dirty_sync(&new_inode->v);
588 static int bch2_rename2(struct inode *old_vdir, struct dentry *old_dentry,
589 struct inode *new_vdir, struct dentry *new_dentry,
592 struct bch_fs *c = old_vdir->i_sb->s_fs_info;
593 struct bch_inode_info *old_dir = to_bch_ei(old_vdir);
594 struct bch_inode_info *new_dir = to_bch_ei(new_vdir);
596 if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
599 if (flags & RENAME_EXCHANGE)
600 return bch2_rename_exchange(c, old_dir, old_dentry,
601 new_dir, new_dentry);
603 return bch2_rename(c, old_dir, old_dentry, new_dir, new_dentry);
606 static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
608 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
609 struct bch_fs *c = inode->v.i_sb->s_fs_info;
612 lockdep_assert_held(&inode->v.i_rwsem);
614 ret = setattr_prepare(dentry, iattr);
618 if (iattr->ia_valid & ATTR_SIZE) {
619 ret = bch2_truncate(inode, iattr);
621 mutex_lock(&inode->ei_update_lock);
622 setattr_copy(&inode->v, iattr);
623 ret = bch2_write_inode(c, inode);
624 mutex_unlock(&inode->ei_update_lock);
630 if (iattr->ia_valid & ATTR_MODE)
631 ret = posix_acl_chmod(&inode->v, inode->v.i_mode);
636 static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
638 struct bch_fs *c = vdir->i_sb->s_fs_info;
639 struct bch_inode_info *dir = to_bch_ei(vdir);
640 struct bch_inode_info *inode;
642 /* XXX: i_nlink should be 0? */
643 inode = bch2_vfs_inode_create(c, dir, mode, 0);
644 if (unlikely(IS_ERR(inode)))
645 return PTR_ERR(inode);
647 d_tmpfile(dentry, &inode->v);
651 static int bch2_fill_extent(struct fiemap_extent_info *info,
652 const struct bkey_i *k, unsigned flags)
654 if (bkey_extent_is_data(&k->k)) {
655 struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
656 const struct bch_extent_ptr *ptr;
657 struct bch_extent_crc_unpacked crc;
660 extent_for_each_ptr_crc(e, ptr, crc) {
662 u64 offset = ptr->offset;
664 if (crc.compression_type)
665 flags2 |= FIEMAP_EXTENT_ENCODED;
667 offset += crc.offset;
669 if ((offset & (PAGE_SECTORS - 1)) ||
670 (e.k->size & (PAGE_SECTORS - 1)))
671 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
673 ret = fiemap_fill_next_extent(info,
674 bkey_start_offset(e.k) << 9,
676 e.k->size << 9, flags|flags2);
682 } else if (k->k.type == BCH_RESERVATION) {
683 return fiemap_fill_next_extent(info,
684 bkey_start_offset(&k->k) << 9,
687 FIEMAP_EXTENT_DELALLOC|
688 FIEMAP_EXTENT_UNWRITTEN);
694 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
697 struct bch_fs *c = vinode->i_sb->s_fs_info;
698 struct bch_inode_info *ei = to_bch_ei(vinode);
699 struct btree_iter iter;
702 bool have_extent = false;
705 if (start + len < start)
708 for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
709 POS(ei->v.i_ino, start >> 9), 0, k)
710 if (bkey_extent_is_data(k.k) ||
711 k.k->type == BCH_RESERVATION) {
712 if (bkey_cmp(bkey_start_pos(k.k),
713 POS(ei->v.i_ino, (start + len) >> 9)) >= 0)
717 ret = bch2_fill_extent(info, &tmp.k, 0);
722 bkey_reassemble(&tmp.k, k);
727 ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST);
729 bch2_btree_iter_unlock(&iter);
730 return ret < 0 ? ret : 0;
733 static const struct vm_operations_struct bch_vm_ops = {
734 .fault = filemap_fault,
735 .map_pages = filemap_map_pages,
736 .page_mkwrite = bch2_page_mkwrite,
739 static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
743 vma->vm_ops = &bch_vm_ops;
749 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
751 return generic_file_llseek_size(file, offset, whence,
755 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
757 struct bch_fs *c = file_inode(file)->i_sb->s_fs_info;
759 return bch2_readdir(c, file, ctx);
762 static const struct file_operations bch_file_operations = {
763 .llseek = bch2_llseek,
764 .read_iter = generic_file_read_iter,
765 .write_iter = bch2_write_iter,
767 .open = generic_file_open,
769 .splice_read = generic_file_splice_read,
770 .splice_write = iter_file_splice_write,
771 .fallocate = bch2_fallocate_dispatch,
772 .unlocked_ioctl = bch2_fs_file_ioctl,
774 .compat_ioctl = bch2_compat_fs_ioctl,
778 static const struct inode_operations bch_file_inode_operations = {
779 .setattr = bch2_setattr,
780 .fiemap = bch2_fiemap,
781 .listxattr = bch2_xattr_list,
782 #ifdef CONFIG_BCACHEFS_POSIX_ACL
783 .get_acl = bch2_get_acl,
784 .set_acl = bch2_set_acl,
788 static const struct inode_operations bch_dir_inode_operations = {
789 .lookup = bch2_lookup,
790 .create = bch2_create,
792 .unlink = bch2_unlink,
793 .symlink = bch2_symlink,
797 .rename = bch2_rename2,
798 .setattr = bch2_setattr,
799 .tmpfile = bch2_tmpfile,
800 .listxattr = bch2_xattr_list,
801 #ifdef CONFIG_BCACHEFS_POSIX_ACL
802 .get_acl = bch2_get_acl,
803 .set_acl = bch2_set_acl,
807 static const struct file_operations bch_dir_file_operations = {
808 .llseek = bch2_dir_llseek,
809 .read = generic_read_dir,
810 .iterate = bch2_vfs_readdir,
812 .unlocked_ioctl = bch2_fs_file_ioctl,
814 .compat_ioctl = bch2_compat_fs_ioctl,
818 static const struct inode_operations bch_symlink_inode_operations = {
819 .get_link = page_get_link,
820 .setattr = bch2_setattr,
821 .listxattr = bch2_xattr_list,
822 #ifdef CONFIG_BCACHEFS_POSIX_ACL
823 .get_acl = bch2_get_acl,
824 .set_acl = bch2_set_acl,
828 static const struct inode_operations bch_special_inode_operations = {
829 .setattr = bch2_setattr,
830 .listxattr = bch2_xattr_list,
831 #ifdef CONFIG_BCACHEFS_POSIX_ACL
832 .get_acl = bch2_get_acl,
833 .set_acl = bch2_set_acl,
837 static const struct address_space_operations bch_address_space_operations = {
838 .writepage = bch2_writepage,
839 .readpage = bch2_readpage,
840 .writepages = bch2_writepages,
841 .readpages = bch2_readpages,
842 .set_page_dirty = bch2_set_page_dirty,
843 .write_begin = bch2_write_begin,
844 .write_end = bch2_write_end,
845 .invalidatepage = bch2_invalidatepage,
846 .releasepage = bch2_releasepage,
847 .direct_IO = bch2_direct_IO,
848 #ifdef CONFIG_MIGRATION
849 .migratepage = bch2_migrate_page,
851 .error_remove_page = generic_error_remove_page,
854 static struct inode *bch2_nfs_get_inode(struct super_block *sb,
855 u64 ino, u32 generation)
857 struct bch_fs *c = sb->s_fs_info;
858 struct inode *vinode;
860 if (ino < BCACHEFS_ROOT_INO)
861 return ERR_PTR(-ESTALE);
863 vinode = bch2_vfs_inode_get(c, ino);
865 return ERR_CAST(vinode);
866 if (generation && vinode->i_generation != generation) {
867 /* we didn't find the right inode.. */
869 return ERR_PTR(-ESTALE);
874 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid,
875 int fh_len, int fh_type)
877 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
881 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
882 int fh_len, int fh_type)
884 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
888 static const struct export_operations bch_export_ops = {
889 .fh_to_dentry = bch2_fh_to_dentry,
890 .fh_to_parent = bch2_fh_to_parent,
891 //.get_parent = bch2_get_parent,
894 static void bch2_vfs_inode_init(struct bch_fs *c,
895 struct bch_inode_info *inode,
896 struct bch_inode_unpacked *bi)
898 inode->v.i_mode = bi->bi_mode;
899 i_uid_write(&inode->v, bi->bi_uid);
900 i_gid_write(&inode->v, bi->bi_gid);
901 inode->v.i_blocks = bi->bi_sectors;
902 inode->v.i_ino = bi->bi_inum;
903 set_nlink(&inode->v, bi->bi_nlink + nlink_bias(inode->v.i_mode));
904 inode->v.i_rdev = bi->bi_dev;
905 inode->v.i_generation = bi->bi_generation;
906 inode->v.i_size = bi->bi_size;
907 inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
908 inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
909 inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
911 inode->ei_journal_seq = 0;
912 inode->ei_size = bi->bi_size;
913 inode->ei_flags = bi->bi_flags;
914 atomic64_set(&inode->ei_sectors, bi->bi_sectors);
915 inode->ei_str_hash = bch2_hash_info_init(c, bi);
917 bch2_inode_flags_to_vfs(inode);
919 inode->v.i_mapping->a_ops = &bch_address_space_operations;
921 switch (inode->v.i_mode & S_IFMT) {
923 inode->v.i_op = &bch_file_inode_operations;
924 inode->v.i_fop = &bch_file_operations;
927 inode->v.i_op = &bch_dir_inode_operations;
928 inode->v.i_fop = &bch_dir_file_operations;
931 inode_nohighmem(&inode->v);
932 inode->v.i_op = &bch_symlink_inode_operations;
935 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
936 inode->v.i_op = &bch_special_inode_operations;
941 static struct inode *bch2_alloc_inode(struct super_block *sb)
943 struct bch_inode_info *inode;
945 inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
949 inode_init_once(&inode->v);
950 mutex_init(&inode->ei_update_lock);
951 inode->ei_journal_seq = 0;
952 atomic_long_set(&inode->ei_size_dirty_count, 0);
953 atomic_long_set(&inode->ei_sectors_dirty_count, 0);
958 static void bch2_i_callback(struct rcu_head *head)
960 struct inode *vinode = container_of(head, struct inode, i_rcu);
961 struct bch_inode_info *inode = to_bch_ei(vinode);
963 kmem_cache_free(bch2_inode_cache, inode);
966 static void bch2_destroy_inode(struct inode *vinode)
968 call_rcu(&vinode->i_rcu, bch2_i_callback);
971 static int bch2_vfs_write_inode(struct inode *vinode,
972 struct writeback_control *wbc)
974 struct bch_fs *c = vinode->i_sb->s_fs_info;
975 struct bch_inode_info *inode = to_bch_ei(vinode);
978 mutex_lock(&inode->ei_update_lock);
979 ret = bch2_write_inode(c, inode);
980 mutex_unlock(&inode->ei_update_lock);
982 if (c->opts.journal_flush_disabled)
985 if (!ret && wbc->sync_mode == WB_SYNC_ALL)
986 ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq);
991 static void bch2_evict_inode(struct inode *vinode)
993 struct bch_fs *c = vinode->i_sb->s_fs_info;
994 struct bch_inode_info *inode = to_bch_ei(vinode);
996 truncate_inode_pages_final(&inode->v.i_data);
998 if (!bch2_journal_error(&c->journal) && !is_bad_inode(&inode->v)) {
999 /* XXX - we want to check this stuff iff there weren't IO errors: */
1000 BUG_ON(atomic_long_read(&inode->ei_sectors_dirty_count));
1001 BUG_ON(atomic64_read(&inode->ei_sectors) != inode->v.i_blocks);
1004 clear_inode(&inode->v);
1006 if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1007 bch2_inode_rm(c, inode->v.i_ino);
1008 atomic_long_dec(&c->nr_inodes);
1012 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1014 struct super_block *sb = dentry->d_sb;
1015 struct bch_fs *c = sb->s_fs_info;
1018 buf->f_type = BCACHEFS_STATFS_MAGIC;
1019 buf->f_bsize = sb->s_blocksize;
1020 buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT;
1021 buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c)) >> PAGE_SECTOR_SHIFT;
1022 buf->f_bavail = buf->f_bfree;
1023 buf->f_files = atomic_long_read(&c->nr_inodes);
1024 buf->f_ffree = U64_MAX;
1026 fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
1027 le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
1028 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1029 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1030 buf->f_namelen = NAME_MAX;
1035 static int bch2_sync_fs(struct super_block *sb, int wait)
1037 struct bch_fs *c = sb->s_fs_info;
1040 bch2_journal_flush_async(&c->journal, NULL);
1044 return bch2_journal_flush(&c->journal);
1047 static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
1048 struct bch_opts opts)
1050 size_t nr_devs = 0, i = 0;
1051 char *dev_name, *s, **devs;
1052 struct bch_fs *c = NULL;
1053 const char *err = "cannot allocate memory";
1055 dev_name = kstrdup(_dev_name, GFP_KERNEL);
1059 for (s = dev_name; s; s = strchr(s + 1, ':'))
1062 devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL);
1066 for (i = 0, s = dev_name;
1068 (s = strchr(s, ':')) && (*s++ = '\0'))
1071 err = bch2_fs_open(devs, nr_devs, opts, &c);
1075 * Look up each block device, make sure they all belong to a
1076 * filesystem and they all belong to the _same_ filesystem
1079 for (i = 0; i < nr_devs; i++) {
1080 struct block_device *bdev = lookup_bdev(devs[i]);
1086 c2 = bch2_bdev_to_fs(bdev);
1092 closure_put(&c2->cl);
1097 closure_put(&c->cl);
1102 mutex_lock(&c->state_lock);
1104 if (!bch2_fs_running(c)) {
1105 mutex_unlock(&c->state_lock);
1106 closure_put(&c->cl);
1107 err = "incomplete filesystem";
1112 mutex_unlock(&c->state_lock);
1115 set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
1121 pr_err("bch_fs_open err %s", err);
1125 static int bch2_remount(struct super_block *sb, int *flags, char *data)
1127 struct bch_fs *c = sb->s_fs_info;
1128 struct bch_opts opts = bch2_opts_empty();
1131 opt_set(opts, read_only, (*flags & MS_RDONLY) != 0);
1133 ret = bch2_parse_mount_opts(&opts, data);
1137 if (opts.read_only != c->opts.read_only) {
1138 const char *err = NULL;
1140 mutex_lock(&c->state_lock);
1142 if (opts.read_only) {
1143 bch2_fs_read_only(c);
1145 sb->s_flags |= MS_RDONLY;
1147 err = bch2_fs_read_write(c);
1149 bch_err(c, "error going rw: %s", err);
1153 sb->s_flags &= ~MS_RDONLY;
1156 c->opts.read_only = opts.read_only;
1158 mutex_unlock(&c->state_lock);
1161 if (opts.errors >= 0)
1162 c->opts.errors = opts.errors;
1167 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1169 struct bch_fs *c = root->d_sb->s_fs_info;
1172 for (i = 0; i < bch2_opts_nr; i++) {
1173 const struct bch_option *opt = &bch2_opt_table[i];
1174 u64 v = bch2_opt_get_by_id(&c->opts, i);
1176 if (opt->mode < OPT_MOUNT)
1179 if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1182 switch (opt->type) {
1184 seq_printf(seq, ",%s%s", v ? "" : "no", opt->attr.name);
1187 seq_printf(seq, ",%s=%llu", opt->attr.name, v);
1190 seq_printf(seq, ",%s=%s", opt->attr.name, opt->choices[v]);
1199 static const struct super_operations bch_super_operations = {
1200 .alloc_inode = bch2_alloc_inode,
1201 .destroy_inode = bch2_destroy_inode,
1202 .write_inode = bch2_vfs_write_inode,
1203 .evict_inode = bch2_evict_inode,
1204 .sync_fs = bch2_sync_fs,
1205 .statfs = bch2_statfs,
1206 .show_options = bch2_show_options,
1207 .remount_fs = bch2_remount,
1209 .put_super = bch2_put_super,
1210 .freeze_fs = bch2_freeze,
1211 .unfreeze_fs = bch2_unfreeze,
1215 static int bch2_test_super(struct super_block *s, void *data)
1217 return s->s_fs_info == data;
1220 static int bch2_set_super(struct super_block *s, void *data)
1222 s->s_fs_info = data;
1226 static struct dentry *bch2_mount(struct file_system_type *fs_type,
1227 int flags, const char *dev_name, void *data)
1231 struct super_block *sb;
1232 struct inode *vinode;
1233 struct bch_opts opts = bch2_opts_empty();
1237 opt_set(opts, read_only, (flags & MS_RDONLY) != 0);
1239 ret = bch2_parse_mount_opts(&opts, data);
1241 return ERR_PTR(ret);
1243 c = bch2_open_as_blockdevs(dev_name, opts);
1245 return ERR_PTR(-ENOENT);
1247 sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|MS_NOSEC, c);
1249 closure_put(&c->cl);
1250 return ERR_CAST(sb);
1253 BUG_ON(sb->s_fs_info != c);
1256 closure_put(&c->cl);
1258 if ((flags ^ sb->s_flags) & MS_RDONLY) {
1265 /* XXX: blocksize */
1266 sb->s_blocksize = PAGE_SIZE;
1267 sb->s_blocksize_bits = PAGE_SHIFT;
1268 sb->s_maxbytes = MAX_LFS_FILESIZE;
1269 sb->s_op = &bch_super_operations;
1270 sb->s_export_op = &bch_export_ops;
1271 sb->s_xattr = bch2_xattr_handlers;
1272 sb->s_magic = BCACHEFS_STATFS_MAGIC;
1273 sb->s_time_gran = c->sb.time_precision;
1275 sb->s_bdi = &c->bdi;
1276 strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
1278 for_each_online_member(ca, c, i) {
1279 struct block_device *bdev = ca->disk_sb.bdev;
1281 /* XXX: create an anonymous device for multi device filesystems */
1283 sb->s_dev = bdev->bd_dev;
1284 percpu_ref_put(&ca->io_ref);
1288 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1290 sb->s_flags |= MS_POSIXACL;
1293 vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
1294 if (IS_ERR(vinode)) {
1295 ret = PTR_ERR(vinode);
1299 sb->s_root = d_make_root(vinode);
1305 sb->s_flags |= MS_ACTIVE;
1307 return dget(sb->s_root);
1310 deactivate_locked_super(sb);
1311 return ERR_PTR(ret);
1314 static void bch2_kill_sb(struct super_block *sb)
1316 struct bch_fs *c = sb->s_fs_info;
1318 generic_shutdown_super(sb);
1320 if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
1323 closure_put(&c->cl);
1326 static struct file_system_type bcache_fs_type = {
1327 .owner = THIS_MODULE,
1329 .mount = bch2_mount,
1330 .kill_sb = bch2_kill_sb,
1331 .fs_flags = FS_REQUIRES_DEV,
1334 MODULE_ALIAS_FS("bcachefs");
1336 void bch2_vfs_exit(void)
1338 unregister_filesystem(&bcache_fs_type);
1339 if (bch2_inode_cache)
1340 kmem_cache_destroy(bch2_inode_cache);
1343 int __init bch2_vfs_init(void)
1347 bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
1348 if (!bch2_inode_cache)
1351 ret = register_filesystem(&bcache_fs_type);
1361 #endif /* NO_BCACHEFS_FS */