9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 static char *dev_t_to_path(dev_t dev)
43 char link[PATH_MAX], *p;
46 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
47 major(dev), minor(dev));
48 ret = readlink(sysfs_dev, link, sizeof(link));
51 if (ret < 0 || ret >= sizeof(link))
52 die("readlink error while looking up block device: %m");
56 p = strrchr(link, '/');
58 die("error looking up device name");
61 return mprintf("/dev/%s", p);
64 static bool path_is_fs_root(const char *path)
66 char *line = NULL, *p, *mount;
71 f = fopen("/proc/self/mountinfo", "r");
73 die("Error getting mount information");
75 while (getline(&line, &n, f) != -1) {
78 strsep(&p, " "); /* mount id */
79 strsep(&p, " "); /* parent id */
80 strsep(&p, " "); /* dev */
81 strsep(&p, " "); /* root */
82 mount = strsep(&p, " ");
85 if (mount && !strcmp(path, mount))
96 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
98 struct bch_dev *ca = c->devs[0];
99 struct hole_iter iter;
102 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
103 struct bucket_mark new;
106 if (i.start == i.end)
109 b = sector_to_bucket(ca, i.start >> 9);
111 struct bucket *g = bucket(ca, b);
112 bucket_cmpxchg(g, new, new.nouse = 1);
114 } while (bucket_to_sector(ca, b) << 9 < i.end);
118 static void update_inode(struct bch_fs *c,
119 struct bch_inode_unpacked *inode)
121 struct bkey_inode_buf packed;
124 bch2_inode_pack(&packed, inode);
125 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
128 die("error creating file: %s", strerror(-ret));
131 static void create_dirent(struct bch_fs *c,
132 struct bch_inode_unpacked *parent,
133 const char *name, u64 inum, mode_t mode)
135 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
136 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
138 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
139 mode_to_type(mode), &qname,
140 inum, NULL, BCH_HASH_SET_MUST_CREATE);
142 die("error creating file: %s", strerror(-ret));
148 static void create_link(struct bch_fs *c,
149 struct bch_inode_unpacked *parent,
150 const char *name, u64 inum, mode_t mode)
152 struct bch_inode_unpacked inode;
153 int ret = bch2_inode_find_by_inum(c, inum, &inode);
155 die("error looking up hardlink: %s", strerror(-ret));
158 update_inode(c, &inode);
160 create_dirent(c, parent, name, inum, mode);
163 static struct bch_inode_unpacked create_file(struct bch_fs *c,
164 struct bch_inode_unpacked *parent,
166 uid_t uid, gid_t gid,
167 mode_t mode, dev_t rdev)
169 struct bch_inode_unpacked new_inode;
172 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
174 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
175 &c->unused_inode_hint);
177 die("error creating file: %s", strerror(-ret));
179 create_dirent(c, parent, name, new_inode.bi_inum, mode);
184 #define for_each_xattr_handler(handlers, handler) \
186 for ((handler) = *(handlers)++; \
188 (handler) = *(handlers)++)
190 static const struct xattr_handler *xattr_resolve_name(const char **name)
192 const struct xattr_handler **handlers = bch2_xattr_handlers;
193 const struct xattr_handler *handler;
195 for_each_xattr_handler(handlers, handler) {
198 n = strcmp_prefix(*name, xattr_prefix(handler));
200 if (!handler->prefix ^ !*n) {
203 return ERR_PTR(-EINVAL);
209 return ERR_PTR(-EOPNOTSUPP);
212 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
215 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
216 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
217 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
220 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
223 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
225 char attrs[XATTR_LIST_MAX];
226 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
228 die("listxattr error: %m");
230 const char *next, *attr;
232 attr < attrs + attrs_size;
234 next = attr + strlen(attr) + 1;
236 char val[XATTR_SIZE_MAX];
237 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
240 die("error getting xattr val: %m");
242 const struct xattr_handler *h = xattr_resolve_name(&attr);
244 int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
245 bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
246 val, val_size, h->flags, 0));
248 die("error creating xattr: %s", strerror(-ret));
252 static char buf[1 << 20] __aligned(PAGE_SIZE);
254 static void write_data(struct bch_fs *c,
255 struct bch_inode_unpacked *dst_inode,
256 u64 dst_offset, void *buf, size_t len)
259 struct bch_write_op op;
260 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
264 BUG_ON(dst_offset & (block_bytes(c) - 1));
265 BUG_ON(len & (block_bytes(c) - 1));
267 closure_init_stack(&cl);
269 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
270 o.op.wbio.bio.bi_iter.bi_size = len;
271 bch2_bio_map(&o.op.wbio.bio, buf);
273 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
274 o.op.write_point = writepoint_hashed(0);
275 o.op.nr_replicas = 1;
276 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
278 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
279 c->opts.data_replicas, 0);
281 die("error reserving space in new filesystem: %s", strerror(-ret));
283 closure_call(&o.op.cl, bch2_write, NULL, &cl);
286 dst_inode->bi_sectors += len >> 9;
289 static void copy_data(struct bch_fs *c,
290 struct bch_inode_unpacked *dst_inode,
291 int src_fd, u64 start, u64 end)
293 while (start < end) {
294 unsigned len = min_t(u64, end - start, sizeof(buf));
295 unsigned pad = round_up(len, block_bytes(c)) - len;
297 xpread(src_fd, buf, len, start);
298 memset(buf + len, 0, pad);
300 write_data(c, dst_inode, start, buf, len + pad);
305 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
306 u64 logical, u64 physical, u64 length)
308 struct bch_dev *ca = c->devs[0];
310 BUG_ON(logical & (block_bytes(c) - 1));
311 BUG_ON(physical & (block_bytes(c) - 1));
312 BUG_ON(length & (block_bytes(c) - 1));
318 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
321 struct bkey_i_extent *e;
323 u64 b = sector_to_bucket(ca, physical);
324 struct disk_reservation res;
328 sectors = min(ca->mi.bucket_size -
329 (physical & (ca->mi.bucket_size - 1)),
332 e = bkey_extent_init(&k.k);
333 e->k.p.inode = dst->bi_inum;
334 e->k.p.offset = logical + sectors;
336 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
339 .gen = bucket(ca, b)->mark.gen,
342 set_bit(b, ca->buckets_dirty);
344 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
345 BCH_DISK_RESERVATION_NOFAIL);
347 die("error reserving space in new filesystem: %s",
350 bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
352 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
355 die("btree insert error %s", strerror(-ret));
357 bch2_disk_reservation_put(c, &res);
359 dst->bi_sectors += sectors;
366 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
369 ssize_t ret = readlink(src, buf, sizeof(buf));
371 die("readlink error: %m");
373 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
376 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
377 int src_fd, u64 src_size,
378 char *src_path, ranges *extents)
380 struct fiemap_iter iter;
381 struct fiemap_extent e;
383 fiemap_for_each(src_fd, iter, e)
384 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
389 fiemap_for_each(src_fd, iter, e) {
390 if ((e.fe_logical & (block_bytes(c) - 1)) ||
391 (e.fe_length & (block_bytes(c) - 1)))
392 die("Unaligned extent in %s - can't handle", src_path);
394 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
395 FIEMAP_EXTENT_ENCODED|
396 FIEMAP_EXTENT_NOT_ALIGNED|
397 FIEMAP_EXTENT_DATA_INLINE)) {
398 copy_data(c, dst, src_fd, e.fe_logical,
399 min(src_size - e.fe_logical,
405 * if the data is below 1 MB, copy it so it doesn't conflict
406 * with bcachefs's potentially larger superblock:
408 if (e.fe_physical < 1 << 20) {
409 copy_data(c, dst, src_fd, e.fe_logical,
410 min(src_size - e.fe_logical,
415 if ((e.fe_physical & (block_bytes(c) - 1)))
416 die("Unaligned extent in %s - can't handle", src_path);
418 range_add(extents, e.fe_physical, e.fe_length);
419 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
423 struct copy_fs_state {
427 GENRADIX(u64) hardlinks;
431 static void copy_dir(struct copy_fs_state *s,
433 struct bch_inode_unpacked *dst,
434 int src_fd, const char *src_path)
436 DIR *dir = fdopendir(src_fd);
439 while ((errno = 0), (d = readdir(dir))) {
440 struct bch_inode_unpacked inode;
444 die("chdir error: %m");
447 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
449 if (!strcmp(d->d_name, ".") ||
450 !strcmp(d->d_name, "..") ||
451 stat.st_ino == s->bcachefs_inum)
454 char *child_path = mprintf("%s/%s", src_path, d->d_name);
456 if (stat.st_dev != s->dev)
457 die("%s does not have correct st_dev!", child_path);
459 u64 *dst_inum = S_ISREG(stat.st_mode)
460 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
463 if (dst_inum && *dst_inum) {
464 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
468 inode = create_file(c, dst, d->d_name,
469 stat.st_uid, stat.st_gid,
470 stat.st_mode, stat.st_rdev);
473 *dst_inum = inode.bi_inum;
475 copy_times(c, &inode, &stat);
476 copy_xattrs(c, &inode, d->d_name);
480 switch (mode_to_type(stat.st_mode)) {
482 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
483 copy_dir(s, c, &inode, fd, child_path);
487 inode.bi_size = stat.st_size;
489 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
490 copy_file(c, &inode, fd, stat.st_size,
491 child_path, &s->extents);
495 inode.bi_size = stat.st_size;
497 copy_link(c, &inode, d->d_name);
504 /* nothing else to copy for these: */
510 update_inode(c, &inode);
516 die("readdir error: %m");
519 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
520 u64 size, u64 *bcachefs_inum, dev_t dev,
524 ? open(file_path, O_RDWR|O_CREAT, 0600)
525 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
527 die("Error creating %s for bcachefs metadata: %m",
530 struct stat statbuf = xfstat(fd);
532 if (statbuf.st_dev != dev)
533 die("bcachefs file has incorrect device");
535 *bcachefs_inum = statbuf.st_ino;
537 if (fallocate(fd, 0, 0, size))
538 die("Error reserving space for bcachefs metadata: %m");
542 struct fiemap_iter iter;
543 struct fiemap_extent e;
544 ranges extents = { NULL };
546 fiemap_for_each(fd, iter, e) {
547 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
548 FIEMAP_EXTENT_ENCODED|
549 FIEMAP_EXTENT_NOT_ALIGNED|
550 FIEMAP_EXTENT_DATA_INLINE))
551 die("Unable to continue: metadata file not fully mapped");
553 if ((e.fe_physical & (block_size - 1)) ||
554 (e.fe_length & (block_size - 1)))
555 die("Unable to continue: unaligned extents in metadata file");
557 range_add(&extents, e.fe_physical, e.fe_length);
561 ranges_sort_merge(&extents);
565 static void reserve_old_fs_space(struct bch_fs *c,
566 struct bch_inode_unpacked *root_inode,
569 struct bch_dev *ca = c->devs[0];
570 struct bch_inode_unpacked dst;
571 struct hole_iter iter;
574 dst = create_file(c, root_inode, "old_migrated_filesystem",
575 0, 0, S_IFREG|0400, 0);
576 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
578 ranges_sort_merge(extents);
580 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
581 link_data(c, &dst, i.start, i.start, i.end - i.start);
583 update_inode(c, &dst);
586 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
587 u64 bcachefs_inum, ranges *extents)
591 struct bch_inode_unpacked root_inode;
592 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
594 die("error looking up root directory: %s", strerror(-ret));
597 die("chdir error: %m");
599 struct stat stat = xfstat(src_fd);
600 copy_times(c, &root_inode, &stat);
601 copy_xattrs(c, &root_inode, ".");
603 struct copy_fs_state s = {
604 .bcachefs_inum = bcachefs_inum,
610 copy_dir(&s, c, &root_inode, src_fd, src_path);
612 reserve_old_fs_space(c, &root_inode, &s.extents);
614 update_inode(c, &root_inode);
616 darray_free(s.extents);
617 genradix_free(&s.hardlinks);
622 static void find_superblock_space(ranges extents, struct dev_opts *dev)
626 darray_foreach(i, extents) {
627 u64 start = round_up(max(256ULL << 10, i->start),
628 dev->bucket_size << 9);
629 u64 end = round_down(i->end,
630 dev->bucket_size << 9);
632 if (start + (128 << 10) <= end) {
633 dev->sb_offset = start >> 9;
634 dev->sb_end = dev->sb_offset + 256;
639 die("Couldn't find a valid location for superblock");
642 static void migrate_usage(void)
644 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
645 "Usage: bcachefs migrate [OPTION]...\n"
648 " -f fs Root of filesystem to migrate(s)\n"
649 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
650 " --no_passphrase Don't encrypt master encryption key\n"
651 " -F Force, even if metadata file already exists\n"
652 " -h Display this help and exit\n"
653 "Report bugs to <linux-bcache@vger.kernel.org>");
656 static const struct option migrate_opts[] = {
657 { "encrypted", no_argument, NULL, 'e' },
658 { "no_passphrase", no_argument, NULL, 'p' },
662 static int migrate_fs(const char *fs_path,
663 struct format_opts format_opts,
666 if (!path_is_fs_root(fs_path))
667 die("%s is not a filysestem root", fs_path);
669 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
670 struct stat stat = xfstat(fs_fd);
672 if (!S_ISDIR(stat.st_mode))
673 die("%s is not a directory", fs_path);
675 struct dev_opts dev = dev_opts_default();
677 dev.path = dev_t_to_path(stat.st_dev);
678 dev.fd = xopen(dev.path, O_RDWR);
680 unsigned block_size = get_blocksize(dev.path, dev.fd) << 9;
681 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
682 format_opts.block_size = block_size >> 9;
684 char *file_path = mprintf("%s/bcachefs", fs_path);
685 printf("Creating new filesystem on %s in space reserved at %s\n",
686 dev.path, file_path);
688 bch2_pick_bucket_size(format_opts, &dev);
691 ranges extents = reserve_new_fs_space(file_path,
692 format_opts.block_size << 9,
693 get_size(dev.path, dev.fd) / 5,
694 &bcachefs_inum, stat.st_dev, force);
696 find_superblock_space(extents, &dev);
698 struct bch_sb *sb = bch2_format(format_opts, &dev, 1);
699 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
701 if (format_opts.passphrase)
702 bch2_add_key(sb, format_opts.passphrase);
706 struct bch_opts opts = bch2_opts_empty();
707 struct bch_fs *c = NULL;
708 char *path[1] = { dev.path };
710 opt_set(opts, sb, sb_offset);
711 opt_set(opts, nostart, true);
712 opt_set(opts, noexcl, true);
714 c = bch2_fs_open(path, 1, opts);
716 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
718 mark_unreserved_space(c, extents);
720 const char *err = bch2_fs_start(c);
722 die("Error starting new filesystem: %s", err);
724 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
728 printf("Migrate complete, running fsck:\n");
729 opt_set(opts, nostart, false);
730 opt_set(opts, nochanges, true);
732 c = bch2_fs_open(path, 1, opts);
734 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
737 printf("fsck complete\n");
739 printf("To mount the new filesystem, run\n"
740 " mount -t bcachefs -o sb=%llu %s dir\n"
742 "After verifying that the new filesystem is correct, to create a\n"
743 "superblock at the default offset and finish the migration run\n"
744 " bcachefs migrate-superblock -d %s -o %llu\n"
746 "The new filesystem will have a file at /old_migrated_filestem\n"
747 "referencing all disk space that might be used by the existing\n"
748 "filesystem. That file can be deleted once the old filesystem is\n"
749 "no longer needed (and should be deleted prior to running\n"
750 "bcachefs migrate-superblock)\n",
751 sb_offset, dev.path, dev.path, sb_offset);
755 int cmd_migrate(int argc, char *argv[])
757 struct format_opts format_opts = format_opts_default();
758 char *fs_path = NULL;
759 bool no_passphrase = false, force = false;
762 while ((opt = getopt_long(argc, argv, "f:Fh",
763 migrate_opts, NULL)) != -1)
769 format_opts.encrypted = true;
772 no_passphrase = true;
783 die("Please specify a filesytem to migrate");
785 if (format_opts.encrypted && !no_passphrase)
786 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
788 return migrate_fs(fs_path, format_opts, force);
791 static void migrate_superblock_usage(void)
793 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
794 "Usage: bcachefs migrate-superblock [OPTION]...\n"
797 " -d device Device to create superblock for\n"
798 " -o offset Offset of existing superblock\n"
799 " -h Display this help and exit\n"
800 "Report bugs to <linux-bcache@vger.kernel.org>");
803 int cmd_migrate_superblock(int argc, char *argv[])
809 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
815 ret = kstrtou64(optarg, 10, &offset);
817 die("Invalid offset");
820 migrate_superblock_usage();
825 die("Please specify a device");
828 die("Please specify offset of existing superblock");
830 int fd = xopen(dev, O_RDWR);
831 struct bch_sb *sb = __bch2_super_read(fd, offset);
833 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
834 die("Can't add superblock: no space left in superblock layout");
837 for (i = 0; i < sb->layout.nr_superblocks; i++)
838 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
839 die("Superblock layout already has default superblock");
841 memmove(&sb->layout.sb_offset[1],
842 &sb->layout.sb_offset[0],
843 sb->layout.nr_superblocks * sizeof(u64));
844 sb->layout.nr_superblocks++;
846 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
848 bch2_super_write(fd, sb);