9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs-common.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 /* XXX cut and pasted from fsck.c */
42 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
44 static char *dev_t_to_path(dev_t dev)
46 char link[PATH_MAX], *p;
49 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
50 major(dev), minor(dev));
51 ret = readlink(sysfs_dev, link, sizeof(link));
54 if (ret < 0 || ret >= sizeof(link))
55 die("readlink error while looking up block device: %m");
59 p = strrchr(link, '/');
61 die("error looking up device name");
64 return mprintf("/dev/%s", p);
67 static bool path_is_fs_root(const char *path)
69 char *line = NULL, *p, *mount;
74 f = fopen("/proc/self/mountinfo", "r");
76 die("Error getting mount information");
78 while (getline(&line, &n, f) != -1) {
81 strsep(&p, " "); /* mount id */
82 strsep(&p, " "); /* parent id */
83 strsep(&p, " "); /* dev */
84 strsep(&p, " "); /* root */
85 mount = strsep(&p, " ");
88 if (mount && !strcmp(path, mount))
99 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
101 struct bch_dev *ca = c->devs[0];
102 struct hole_iter iter;
105 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
108 if (i.start == i.end)
111 b = sector_to_bucket(ca, i.start >> 9);
113 set_bit(b, ca->buckets_nouse);
115 } while (bucket_to_sector(ca, b) << 9 < i.end);
119 static void update_inode(struct bch_fs *c,
120 struct bch_inode_unpacked *inode)
122 struct bkey_inode_buf packed;
125 bch2_inode_pack(c, &packed, inode);
126 packed.inode.k.p.snapshot = U32_MAX;
127 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
130 die("error updating inode: %s", strerror(-ret));
133 static void create_link(struct bch_fs *c,
134 struct bch_inode_unpacked *parent,
135 const char *name, u64 inum, mode_t mode)
137 struct qstr qstr = QSTR(name);
138 struct bch_inode_unpacked parent_u;
139 struct bch_inode_unpacked inode;
141 int ret = bch2_trans_do(c, NULL, NULL, 0,
142 bch2_link_trans(&trans,
143 (subvol_inum) { 1, parent->bi_inum }, &parent_u,
144 (subvol_inum) { 1, inum }, &inode, &qstr));
146 die("error creating hardlink: %s", strerror(-ret));
149 static struct bch_inode_unpacked create_file(struct bch_fs *c,
150 struct bch_inode_unpacked *parent,
152 uid_t uid, gid_t gid,
153 mode_t mode, dev_t rdev)
155 struct qstr qstr = QSTR(name);
156 struct bch_inode_unpacked new_inode;
158 bch2_inode_init_early(c, &new_inode);
160 int ret = bch2_trans_do(c, NULL, NULL, 0,
161 bch2_create_trans(&trans,
162 (subvol_inum) { 1, parent->bi_inum }, parent,
164 uid, gid, mode, rdev, NULL, NULL,
165 (subvol_inum) {}, 0));
167 die("error creating %s: %s", name, strerror(-ret));
172 #define for_each_xattr_handler(handlers, handler) \
174 for ((handler) = *(handlers)++; \
176 (handler) = *(handlers)++)
178 static const struct xattr_handler *xattr_resolve_name(char **name)
180 const struct xattr_handler **handlers = bch2_xattr_handlers;
181 const struct xattr_handler *handler;
183 for_each_xattr_handler(handlers, handler) {
186 n = strcmp_prefix(*name, xattr_prefix(handler));
188 if (!handler->prefix ^ !*n) {
191 return ERR_PTR(-EINVAL);
197 return ERR_PTR(-EOPNOTSUPP);
200 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
203 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
204 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
205 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
208 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
211 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
213 char attrs[XATTR_LIST_MAX];
214 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
216 die("listxattr error: %m");
220 attr < attrs + attrs_size;
222 next = attr + strlen(attr) + 1;
224 char val[XATTR_SIZE_MAX];
225 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
228 die("error getting xattr val: %m");
230 const struct xattr_handler *h = xattr_resolve_name(&attr);
232 int ret = bch2_trans_do(c, NULL, NULL, 0,
233 bch2_xattr_set(&trans,
234 (subvol_inum) { 1, dst->bi_inum },
236 val, val_size, h->flags, 0));
238 die("error creating xattr: %s", strerror(-ret));
242 #define WRITE_DATA_BUF (1 << 20)
244 static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
246 static void write_data(struct bch_fs *c,
247 struct bch_inode_unpacked *dst_inode,
248 u64 dst_offset, void *buf, size_t len)
250 struct bch_write_op op;
251 struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
254 BUG_ON(dst_offset & (block_bytes(c) - 1));
255 BUG_ON(len & (block_bytes(c) - 1));
256 BUG_ON(len > WRITE_DATA_BUF);
258 closure_init_stack(&cl);
260 bio_init(&op.wbio.bio, bv, ARRAY_SIZE(bv));
261 bch2_bio_map(&op.wbio.bio, buf, len);
263 bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
264 op.write_point = writepoint_hashed(0);
267 op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
269 int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
270 c->opts.data_replicas, 0);
272 die("error reserving space in new filesystem: %s", strerror(-ret));
274 closure_call(&op.cl, bch2_write, NULL, &cl);
277 dst_inode->bi_sectors += len >> 9;
280 static void copy_data(struct bch_fs *c,
281 struct bch_inode_unpacked *dst_inode,
282 int src_fd, u64 start, u64 end)
284 while (start < end) {
285 unsigned len = min_t(u64, end - start, sizeof(buf));
286 unsigned pad = round_up(len, block_bytes(c)) - len;
288 xpread(src_fd, buf, len, start);
289 memset(buf + len, 0, pad);
291 write_data(c, dst_inode, start, buf, len + pad);
296 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
297 u64 logical, u64 physical, u64 length)
299 struct bch_dev *ca = c->devs[0];
301 BUG_ON(logical & (block_bytes(c) - 1));
302 BUG_ON(physical & (block_bytes(c) - 1));
303 BUG_ON(length & (block_bytes(c) - 1));
309 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
312 struct bkey_i_extent *e;
313 __BKEY_PADDED(k, BKEY_EXTENT_VAL_U64s_MAX) k;
314 u64 b = sector_to_bucket(ca, physical);
315 struct disk_reservation res;
319 sectors = min(ca->mi.bucket_size -
320 (physical & (ca->mi.bucket_size - 1)),
323 e = bkey_extent_init(&k.k);
324 e->k.p.inode = dst->bi_inum;
325 e->k.p.offset = logical + sectors;
326 e->k.p.snapshot = U32_MAX;
328 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
331 .gen = *bucket_gen(ca, b),
334 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
335 BCH_DISK_RESERVATION_NOFAIL);
337 die("error reserving space in new filesystem: %s",
340 ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i,
343 die("btree insert error %s", strerror(-ret));
345 bch2_disk_reservation_put(c, &res);
347 dst->bi_sectors += sectors;
354 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
357 ssize_t ret = readlink(src, buf, sizeof(buf));
359 die("readlink error: %m");
361 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
364 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
365 int src_fd, u64 src_size,
366 char *src_path, ranges *extents)
368 struct fiemap_iter iter;
369 struct fiemap_extent e;
371 fiemap_for_each(src_fd, iter, e)
372 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
377 fiemap_for_each(src_fd, iter, e) {
378 if ((e.fe_logical & (block_bytes(c) - 1)) ||
379 (e.fe_length & (block_bytes(c) - 1)))
380 die("Unaligned extent in %s - can't handle", src_path);
382 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
383 FIEMAP_EXTENT_ENCODED|
384 FIEMAP_EXTENT_NOT_ALIGNED|
385 FIEMAP_EXTENT_DATA_INLINE)) {
386 copy_data(c, dst, src_fd, e.fe_logical,
387 min(src_size - e.fe_logical,
393 * if the data is below 1 MB, copy it so it doesn't conflict
394 * with bcachefs's potentially larger superblock:
396 if (e.fe_physical < 1 << 20) {
397 copy_data(c, dst, src_fd, e.fe_logical,
398 min(src_size - e.fe_logical,
403 if ((e.fe_physical & (block_bytes(c) - 1)))
404 die("Unaligned extent in %s - can't handle", src_path);
406 range_add(extents, e.fe_physical, e.fe_length);
407 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
411 struct copy_fs_state {
415 GENRADIX(u64) hardlinks;
419 static void copy_dir(struct copy_fs_state *s,
421 struct bch_inode_unpacked *dst,
422 int src_fd, const char *src_path)
424 DIR *dir = fdopendir(src_fd);
427 while ((errno = 0), (d = readdir(dir))) {
428 struct bch_inode_unpacked inode;
432 die("chdir error: %m");
435 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
437 if (!strcmp(d->d_name, ".") ||
438 !strcmp(d->d_name, "..") ||
439 !strcmp(d->d_name, "lost+found") ||
440 stat.st_ino == s->bcachefs_inum)
443 char *child_path = mprintf("%s/%s", src_path, d->d_name);
445 if (stat.st_dev != s->dev)
446 die("%s does not have correct st_dev!", child_path);
448 u64 *dst_inum = S_ISREG(stat.st_mode)
449 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
452 if (dst_inum && *dst_inum) {
453 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
457 inode = create_file(c, dst, d->d_name,
458 stat.st_uid, stat.st_gid,
459 stat.st_mode, stat.st_rdev);
462 *dst_inum = inode.bi_inum;
464 copy_times(c, &inode, &stat);
465 copy_xattrs(c, &inode, d->d_name);
469 switch (mode_to_type(stat.st_mode)) {
471 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
472 copy_dir(s, c, &inode, fd, child_path);
476 inode.bi_size = stat.st_size;
478 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
479 copy_file(c, &inode, fd, stat.st_size,
480 child_path, &s->extents);
484 inode.bi_size = stat.st_size;
486 copy_link(c, &inode, d->d_name);
493 /* nothing else to copy for these: */
499 update_inode(c, &inode);
505 die("readdir error: %m");
508 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
509 u64 size, u64 *bcachefs_inum, dev_t dev,
513 ? open(file_path, O_RDWR|O_CREAT, 0600)
514 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
516 die("Error creating %s for bcachefs metadata: %m",
519 struct stat statbuf = xfstat(fd);
521 if (statbuf.st_dev != dev)
522 die("bcachefs file has incorrect device");
524 *bcachefs_inum = statbuf.st_ino;
526 if (fallocate(fd, 0, 0, size))
527 die("Error reserving space for bcachefs metadata: %m");
531 struct fiemap_iter iter;
532 struct fiemap_extent e;
533 ranges extents = { NULL };
535 fiemap_for_each(fd, iter, e) {
536 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
537 FIEMAP_EXTENT_ENCODED|
538 FIEMAP_EXTENT_NOT_ALIGNED|
539 FIEMAP_EXTENT_DATA_INLINE))
540 die("Unable to continue: metadata file not fully mapped");
542 if ((e.fe_physical & (block_size - 1)) ||
543 (e.fe_length & (block_size - 1)))
544 die("Unable to continue: unaligned extents in metadata file");
546 range_add(&extents, e.fe_physical, e.fe_length);
550 ranges_sort_merge(&extents);
554 static void reserve_old_fs_space(struct bch_fs *c,
555 struct bch_inode_unpacked *root_inode,
558 struct bch_dev *ca = c->devs[0];
559 struct bch_inode_unpacked dst;
560 struct hole_iter iter;
563 dst = create_file(c, root_inode, "old_migrated_filesystem",
564 0, 0, S_IFREG|0400, 0);
565 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
567 ranges_sort_merge(extents);
569 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
570 link_data(c, &dst, i.start, i.start, i.end - i.start);
572 update_inode(c, &dst);
575 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
576 u64 bcachefs_inum, ranges *extents)
580 struct bch_inode_unpacked root_inode;
581 int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
584 die("error looking up root directory: %s", strerror(-ret));
587 die("chdir error: %m");
589 struct stat stat = xfstat(src_fd);
590 copy_times(c, &root_inode, &stat);
591 copy_xattrs(c, &root_inode, ".");
593 struct copy_fs_state s = {
594 .bcachefs_inum = bcachefs_inum,
600 copy_dir(&s, c, &root_inode, src_fd, src_path);
602 reserve_old_fs_space(c, &root_inode, &s.extents);
604 update_inode(c, &root_inode);
606 darray_free(s.extents);
607 genradix_free(&s.hardlinks);
610 static void find_superblock_space(ranges extents,
611 struct format_opts opts,
612 struct dev_opts *dev)
616 darray_foreach(i, extents) {
617 u64 start = round_up(max(256ULL << 10, i->start),
618 dev->bucket_size << 9);
619 u64 end = round_down(i->end,
620 dev->bucket_size << 9);
622 /* Need space for two superblocks: */
623 if (start + (opts.superblock_size << 9) * 2 <= end) {
624 dev->sb_offset = start >> 9;
625 dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
630 die("Couldn't find a valid location for superblock");
633 static void migrate_usage(void)
635 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
636 "Usage: bcachefs migrate [OPTION]...\n"
639 " -f fs Root of filesystem to migrate(s)\n"
640 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
641 " --no_passphrase Don't encrypt master encryption key\n"
642 " -F Force, even if metadata file already exists\n"
643 " -h Display this help and exit\n"
644 "Report bugs to <linux-bcache@vger.kernel.org>");
647 static const struct option migrate_opts[] = {
648 { "encrypted", no_argument, NULL, 'e' },
649 { "no_passphrase", no_argument, NULL, 'p' },
653 static int migrate_fs(const char *fs_path,
654 struct bch_opt_strs fs_opt_strs,
655 struct bch_opts fs_opts,
656 struct format_opts format_opts,
659 if (!path_is_fs_root(fs_path))
660 die("%s is not a filysestem root", fs_path);
662 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
663 struct stat stat = xfstat(fs_fd);
665 if (!S_ISDIR(stat.st_mode))
666 die("%s is not a directory", fs_path);
668 struct dev_opts dev = dev_opts_default();
670 dev.path = dev_t_to_path(stat.st_dev);
671 dev.fd = xopen(dev.path, O_RDWR);
673 opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
675 char *file_path = mprintf("%s/bcachefs", fs_path);
676 printf("Creating new filesystem on %s in space reserved at %s\n",
677 dev.path, file_path);
679 bch2_pick_bucket_size(fs_opts, &dev);
682 ranges extents = reserve_new_fs_space(file_path,
683 fs_opts.block_size >> 9,
684 get_size(dev.path, dev.fd) / 5,
685 &bcachefs_inum, stat.st_dev, force);
687 find_superblock_space(extents, format_opts, &dev);
689 struct bch_sb *sb = bch2_format(fs_opt_strs,
690 fs_opts,format_opts, &dev, 1);
691 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
693 if (format_opts.passphrase)
694 bch2_add_key(sb, format_opts.passphrase);
698 struct bch_opts opts = bch2_opts_empty();
699 struct bch_fs *c = NULL;
700 char *path[1] = { dev.path };
702 opt_set(opts, sb, sb_offset);
703 opt_set(opts, nostart, true);
704 opt_set(opts, noexcl, true);
706 c = bch2_fs_open(path, 1, opts);
708 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
710 mark_unreserved_space(c, extents);
712 int ret = bch2_fs_start(c);
714 die("Error starting new filesystem: %s", strerror(-ret));
716 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
720 printf("Migrate complete, running fsck:\n");
721 opt_set(opts, nostart, false);
722 opt_set(opts, nochanges, true);
724 c = bch2_fs_open(path, 1, opts);
726 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
729 printf("fsck complete\n");
731 printf("To mount the new filesystem, run\n"
732 " mount -t bcachefs -o sb=%llu %s dir\n"
734 "After verifying that the new filesystem is correct, to create a\n"
735 "superblock at the default offset and finish the migration run\n"
736 " bcachefs migrate-superblock -d %s -o %llu\n"
738 "The new filesystem will have a file at /old_migrated_filestem\n"
739 "referencing all disk space that might be used by the existing\n"
740 "filesystem. That file can be deleted once the old filesystem is\n"
741 "no longer needed (and should be deleted prior to running\n"
742 "bcachefs migrate-superblock)\n",
743 sb_offset, dev.path, dev.path, sb_offset);
747 int cmd_migrate(int argc, char *argv[])
749 struct format_opts format_opts = format_opts_default();
750 char *fs_path = NULL;
751 bool no_passphrase = false, force = false;
754 struct bch_opt_strs fs_opt_strs =
755 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
756 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
758 while ((opt = getopt_long(argc, argv, "f:Fh",
759 migrate_opts, NULL)) != -1)
765 format_opts.encrypted = true;
768 no_passphrase = true;
779 die("Please specify a filesystem to migrate");
781 if (format_opts.encrypted && !no_passphrase)
782 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
784 int ret = migrate_fs(fs_path,
788 bch2_opt_strs_free(&fs_opt_strs);
792 static void migrate_superblock_usage(void)
794 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
795 "Usage: bcachefs migrate-superblock [OPTION]...\n"
798 " -d device Device to create superblock for\n"
799 " -o offset Offset of existing superblock\n"
800 " -h Display this help and exit\n"
801 "Report bugs to <linux-bcache@vger.kernel.org>");
804 int cmd_migrate_superblock(int argc, char *argv[])
810 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
816 ret = kstrtou64(optarg, 10, &offset);
818 die("Invalid offset");
821 migrate_superblock_usage();
826 die("Please specify a device");
829 die("Please specify offset of existing superblock");
831 int fd = xopen(dev, O_RDWR);
832 struct bch_sb *sb = __bch2_super_read(fd, offset);
834 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
835 die("Can't add superblock: no space left in superblock layout");
838 for (i = 0; i < sb->layout.nr_superblocks; i++)
839 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
840 die("Superblock layout already has default superblock");
842 memmove(&sb->layout.sb_offset[1],
843 &sb->layout.sb_offset[0],
844 sb->layout.nr_superblocks * sizeof(u64));
845 sb->layout.nr_superblocks++;
847 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
849 bch2_super_write(fd, sb);