9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/errcode.h"
34 #include "libbcachefs/fs-common.h"
35 #include "libbcachefs/inode.h"
36 #include "libbcachefs/io_write.h"
37 #include "libbcachefs/replicas.h"
38 #include "libbcachefs/str_hash.h"
39 #include "libbcachefs/super.h"
40 #include "libbcachefs/xattr.h"
42 /* XXX cut and pasted from fsck.c */
43 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
45 static char *dev_t_to_path(dev_t dev)
47 char link[PATH_MAX], *p;
50 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
51 major(dev), minor(dev));
52 ret = readlink(sysfs_dev, link, sizeof(link));
55 if (ret < 0 || ret >= sizeof(link))
56 die("readlink error while looking up block device: %m");
60 p = strrchr(link, '/');
62 die("error looking up device name");
65 return mprintf("/dev/%s", p);
68 static bool path_is_fs_root(const char *path)
70 char *line = NULL, *p, *mount;
75 f = fopen("/proc/self/mountinfo", "r");
77 die("Error getting mount information");
79 while (getline(&line, &n, f) != -1) {
82 strsep(&p, " "); /* mount id */
83 strsep(&p, " "); /* parent id */
84 strsep(&p, " "); /* dev */
85 strsep(&p, " "); /* root */
86 mount = strsep(&p, " ");
89 if (mount && !strcmp(path, mount))
100 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
102 struct bch_dev *ca = c->devs[0];
103 struct hole_iter iter;
106 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
109 if (i.start == i.end)
112 b = sector_to_bucket(ca, i.start >> 9);
114 set_bit(b, ca->buckets_nouse);
116 } while (bucket_to_sector(ca, b) << 9 < i.end);
120 static void update_inode(struct bch_fs *c,
121 struct bch_inode_unpacked *inode)
123 struct bkey_inode_buf packed;
126 bch2_inode_pack(&packed, inode);
127 packed.inode.k.p.snapshot = U32_MAX;
128 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
131 die("error updating inode: %s", bch2_err_str(ret));
134 static void create_link(struct bch_fs *c,
135 struct bch_inode_unpacked *parent,
136 const char *name, u64 inum, mode_t mode)
138 struct qstr qstr = QSTR(name);
139 struct bch_inode_unpacked parent_u;
140 struct bch_inode_unpacked inode;
142 int ret = bch2_trans_do(c, NULL, NULL, 0,
143 bch2_link_trans(trans,
144 (subvol_inum) { 1, parent->bi_inum }, &parent_u,
145 (subvol_inum) { 1, inum }, &inode, &qstr));
147 die("error creating hardlink: %s", bch2_err_str(ret));
150 static struct bch_inode_unpacked create_file(struct bch_fs *c,
151 struct bch_inode_unpacked *parent,
153 uid_t uid, gid_t gid,
154 mode_t mode, dev_t rdev)
156 struct qstr qstr = QSTR(name);
157 struct bch_inode_unpacked new_inode;
159 bch2_inode_init_early(c, &new_inode);
161 int ret = bch2_trans_do(c, NULL, NULL, 0,
162 bch2_create_trans(trans,
163 (subvol_inum) { 1, parent->bi_inum }, parent,
165 uid, gid, mode, rdev, NULL, NULL,
166 (subvol_inum) {}, 0));
168 die("error creating %s: %s", name, bch2_err_str(ret));
173 #define for_each_xattr_handler(handlers, handler) \
175 for ((handler) = *(handlers)++; \
177 (handler) = *(handlers)++)
179 static const struct xattr_handler *xattr_resolve_name(char **name)
181 const struct xattr_handler **handlers = bch2_xattr_handlers;
182 const struct xattr_handler *handler;
184 for_each_xattr_handler(handlers, handler) {
187 n = strcmp_prefix(*name, xattr_prefix(handler));
189 if (!handler->prefix ^ !*n) {
192 return ERR_PTR(-EINVAL);
198 return ERR_PTR(-EOPNOTSUPP);
201 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
204 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
205 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
206 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
209 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
212 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
214 char attrs[XATTR_LIST_MAX];
215 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
217 die("listxattr error: %m");
221 attr < attrs + attrs_size;
223 next = attr + strlen(attr) + 1;
225 char val[XATTR_SIZE_MAX];
226 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
229 die("error getting xattr val: %m");
231 const struct xattr_handler *h = xattr_resolve_name(&attr);
232 struct bch_inode_unpacked inode_u;
234 int ret = bch2_trans_do(c, NULL, NULL, 0,
235 bch2_xattr_set(trans,
236 (subvol_inum) { 1, dst->bi_inum },
237 &inode_u, &hash_info, attr,
238 val, val_size, h->flags, 0));
240 die("error creating xattr: %s", bch2_err_str(ret));
244 #define WRITE_DATA_BUF (1 << 20)
246 static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
248 static void write_data(struct bch_fs *c,
249 struct bch_inode_unpacked *dst_inode,
250 u64 dst_offset, void *buf, size_t len)
252 struct bch_write_op op;
253 struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
256 BUG_ON(dst_offset & (block_bytes(c) - 1));
257 BUG_ON(len & (block_bytes(c) - 1));
258 BUG_ON(len > WRITE_DATA_BUF);
260 closure_init_stack(&cl);
262 bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
263 bch2_bio_map(&op.wbio.bio, buf, len);
265 bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
266 op.write_point = writepoint_hashed(0);
269 op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
270 op.flags |= BCH_WRITE_SYNC;
272 int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
273 c->opts.data_replicas, 0);
275 die("error reserving space in new filesystem: %s", bch2_err_str(ret));
277 closure_call(&op.cl, bch2_write, NULL, &cl);
279 dst_inode->bi_sectors += len >> 9;
282 static void copy_data(struct bch_fs *c,
283 struct bch_inode_unpacked *dst_inode,
284 int src_fd, u64 start, u64 end)
286 while (start < end) {
287 unsigned len = min_t(u64, end - start, sizeof(buf));
288 unsigned pad = round_up(len, block_bytes(c)) - len;
290 xpread(src_fd, buf, len, start);
291 memset(buf + len, 0, pad);
293 write_data(c, dst_inode, start, buf, len + pad);
298 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
299 u64 logical, u64 physical, u64 length)
301 struct bch_dev *ca = c->devs[0];
303 BUG_ON(logical & (block_bytes(c) - 1));
304 BUG_ON(physical & (block_bytes(c) - 1));
305 BUG_ON(length & (block_bytes(c) - 1));
311 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
314 struct bkey_i_extent *e;
315 BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
316 u64 b = sector_to_bucket(ca, physical);
317 struct disk_reservation res;
321 sectors = min(ca->mi.bucket_size -
322 (physical & (ca->mi.bucket_size - 1)),
325 e = bkey_extent_init(&k.k);
326 e->k.p.inode = dst->bi_inum;
327 e->k.p.offset = logical + sectors;
328 e->k.p.snapshot = U32_MAX;
330 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
333 .gen = *bucket_gen(ca, b),
336 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
337 BCH_DISK_RESERVATION_NOFAIL);
339 die("error reserving space in new filesystem: %s",
342 ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0);
344 die("btree insert error %s", bch2_err_str(ret));
346 bch2_disk_reservation_put(c, &res);
348 dst->bi_sectors += sectors;
355 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
358 ssize_t ret = readlink(src, buf, sizeof(buf));
360 die("readlink error: %m");
362 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
365 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
366 int src_fd, u64 src_size,
367 char *src_path, ranges *extents)
369 struct fiemap_iter iter;
370 struct fiemap_extent e;
372 fiemap_for_each(src_fd, iter, e)
373 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
378 fiemap_for_each(src_fd, iter, e) {
379 if ((e.fe_logical & (block_bytes(c) - 1)) ||
380 (e.fe_length & (block_bytes(c) - 1)))
381 die("Unaligned extent in %s - can't handle", src_path);
383 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
384 FIEMAP_EXTENT_ENCODED|
385 FIEMAP_EXTENT_NOT_ALIGNED|
386 FIEMAP_EXTENT_DATA_INLINE)) {
387 copy_data(c, dst, src_fd, e.fe_logical,
388 min(src_size - e.fe_logical,
394 * if the data is below 1 MB, copy it so it doesn't conflict
395 * with bcachefs's potentially larger superblock:
397 if (e.fe_physical < 1 << 20) {
398 copy_data(c, dst, src_fd, e.fe_logical,
399 min(src_size - e.fe_logical,
404 if ((e.fe_physical & (block_bytes(c) - 1)))
405 die("Unaligned extent in %s - can't handle", src_path);
407 range_add(extents, e.fe_physical, e.fe_length);
408 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
412 struct copy_fs_state {
416 GENRADIX(u64) hardlinks;
420 static void copy_dir(struct copy_fs_state *s,
422 struct bch_inode_unpacked *dst,
423 int src_fd, const char *src_path)
425 DIR *dir = fdopendir(src_fd);
428 while ((errno = 0), (d = readdir(dir))) {
429 struct bch_inode_unpacked inode;
433 die("chdir error: %m");
436 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
438 if (!strcmp(d->d_name, ".") ||
439 !strcmp(d->d_name, "..") ||
440 !strcmp(d->d_name, "lost+found") ||
441 stat.st_ino == s->bcachefs_inum)
444 char *child_path = mprintf("%s/%s", src_path, d->d_name);
446 if (stat.st_dev != s->dev)
447 die("%s does not have correct st_dev!", child_path);
449 u64 *dst_inum = S_ISREG(stat.st_mode)
450 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
453 if (dst_inum && *dst_inum) {
454 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
458 inode = create_file(c, dst, d->d_name,
459 stat.st_uid, stat.st_gid,
460 stat.st_mode, stat.st_rdev);
463 *dst_inum = inode.bi_inum;
465 copy_times(c, &inode, &stat);
466 copy_xattrs(c, &inode, d->d_name);
470 switch (mode_to_type(stat.st_mode)) {
472 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
473 copy_dir(s, c, &inode, fd, child_path);
477 inode.bi_size = stat.st_size;
479 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
480 copy_file(c, &inode, fd, stat.st_size,
481 child_path, &s->extents);
485 inode.bi_size = stat.st_size;
487 copy_link(c, &inode, d->d_name);
494 /* nothing else to copy for these: */
500 update_inode(c, &inode);
506 die("readdir error: %m");
509 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
510 u64 size, u64 *bcachefs_inum, dev_t dev,
514 ? open(file_path, O_RDWR|O_CREAT, 0600)
515 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
517 die("Error creating %s for bcachefs metadata: %m",
520 struct stat statbuf = xfstat(fd);
522 if (statbuf.st_dev != dev)
523 die("bcachefs file has incorrect device");
525 *bcachefs_inum = statbuf.st_ino;
527 if (fallocate(fd, 0, 0, size))
528 die("Error reserving space for bcachefs metadata: %m");
532 struct fiemap_iter iter;
533 struct fiemap_extent e;
534 ranges extents = { 0 };
536 fiemap_for_each(fd, iter, e) {
537 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
538 FIEMAP_EXTENT_ENCODED|
539 FIEMAP_EXTENT_NOT_ALIGNED|
540 FIEMAP_EXTENT_DATA_INLINE))
541 die("Unable to continue: metadata file not fully mapped");
543 if ((e.fe_physical & (block_size - 1)) ||
544 (e.fe_length & (block_size - 1)))
545 die("Unable to continue: unaligned extents in metadata file");
547 range_add(&extents, e.fe_physical, e.fe_length);
551 ranges_sort_merge(&extents);
555 static void reserve_old_fs_space(struct bch_fs *c,
556 struct bch_inode_unpacked *root_inode,
559 struct bch_dev *ca = c->devs[0];
560 struct bch_inode_unpacked dst;
561 struct hole_iter iter;
564 dst = create_file(c, root_inode, "old_migrated_filesystem",
565 0, 0, S_IFREG|0400, 0);
566 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
568 ranges_sort_merge(extents);
570 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
571 link_data(c, &dst, i.start, i.start, i.end - i.start);
573 update_inode(c, &dst);
576 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
577 u64 bcachefs_inum, ranges *extents)
581 struct bch_inode_unpacked root_inode;
582 int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
585 die("error looking up root directory: %s", bch2_err_str(ret));
588 die("chdir error: %m");
590 struct stat stat = xfstat(src_fd);
591 copy_times(c, &root_inode, &stat);
592 copy_xattrs(c, &root_inode, ".");
594 struct copy_fs_state s = {
595 .bcachefs_inum = bcachefs_inum,
601 copy_dir(&s, c, &root_inode, src_fd, src_path);
603 reserve_old_fs_space(c, &root_inode, &s.extents);
605 update_inode(c, &root_inode);
607 darray_exit(&s.extents);
608 genradix_free(&s.hardlinks);
611 static void find_superblock_space(ranges extents,
612 struct format_opts opts,
613 struct dev_opts *dev)
615 darray_for_each(extents, i) {
616 u64 start = round_up(max(256ULL << 10, i->start),
617 dev->bucket_size << 9);
618 u64 end = round_down(i->end,
619 dev->bucket_size << 9);
621 /* Need space for two superblocks: */
622 if (start + (opts.superblock_size << 9) * 2 <= end) {
623 dev->sb_offset = start >> 9;
624 dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
629 die("Couldn't find a valid location for superblock");
632 static void migrate_usage(void)
634 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
635 "Usage: bcachefs migrate [OPTION]...\n"
638 " -f fs Root of filesystem to migrate(s)\n"
639 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
640 " --no_passphrase Don't encrypt master encryption key\n"
641 " -F Force, even if metadata file already exists\n"
642 " -h Display this help and exit\n"
643 "Report bugs to <linux-bcachefs@vger.kernel.org>");
646 static const struct option migrate_opts[] = {
647 { "encrypted", no_argument, NULL, 'e' },
648 { "no_passphrase", no_argument, NULL, 'p' },
652 static int migrate_fs(const char *fs_path,
653 struct bch_opt_strs fs_opt_strs,
654 struct bch_opts fs_opts,
655 struct format_opts format_opts,
658 if (!path_is_fs_root(fs_path))
659 die("%s is not a filysestem root", fs_path);
661 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
662 struct stat stat = xfstat(fs_fd);
664 if (!S_ISDIR(stat.st_mode))
665 die("%s is not a directory", fs_path);
667 struct dev_opts dev = dev_opts_default();
669 dev.path = dev_t_to_path(stat.st_dev);
670 dev.bdev = blkdev_get_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
672 opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_buffered_fd));
674 char *file_path = mprintf("%s/bcachefs", fs_path);
675 printf("Creating new filesystem on %s in space reserved at %s\n",
676 dev.path, file_path);
678 dev.size = get_size(dev.bdev->bd_buffered_fd);
679 dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
680 dev.nbuckets = dev.size / dev.bucket_size;
682 bch2_check_bucket_size(fs_opts, &dev);
685 ranges extents = reserve_new_fs_space(file_path,
686 fs_opts.block_size >> 9,
687 get_size(dev.bdev->bd_buffered_fd) / 5,
688 &bcachefs_inum, stat.st_dev, force);
690 find_superblock_space(extents, format_opts, &dev);
692 struct bch_sb *sb = bch2_format(fs_opt_strs,
693 fs_opts,format_opts, &dev, 1);
694 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
696 if (format_opts.passphrase)
697 bch2_add_key(sb, "user", "user", format_opts.passphrase);
701 struct bch_opts opts = bch2_opts_empty();
702 struct bch_fs *c = NULL;
703 char *path[1] = { dev.path };
705 opt_set(opts, sb, sb_offset);
706 opt_set(opts, nostart, true);
707 opt_set(opts, noexcl, true);
708 opt_set(opts, buckets_nouse, true);
710 c = bch2_fs_open(path, 1, opts);
712 die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
714 mark_unreserved_space(c, extents);
716 int ret = bch2_fs_start(c);
718 die("Error starting new filesystem: %s", bch2_err_str(ret));
720 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
724 printf("Migrate complete, running fsck:\n");
725 opt_set(opts, nostart, false);
726 opt_set(opts, nochanges, true);
728 c = bch2_fs_open(path, 1, opts);
730 die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
733 printf("fsck complete\n");
735 printf("To mount the new filesystem, run\n"
736 " mount -t bcachefs -o sb=%llu %s dir\n"
738 "After verifying that the new filesystem is correct, to create a\n"
739 "superblock at the default offset and finish the migration run\n"
740 " bcachefs migrate-superblock -d %s -o %llu\n"
742 "The new filesystem will have a file at /old_migrated_filestem\n"
743 "referencing all disk space that might be used by the existing\n"
744 "filesystem. That file can be deleted once the old filesystem is\n"
745 "no longer needed (and should be deleted prior to running\n"
746 "bcachefs migrate-superblock)\n",
747 sb_offset, dev.path, dev.path, sb_offset);
751 int cmd_migrate(int argc, char *argv[])
753 struct format_opts format_opts = format_opts_default();
754 char *fs_path = NULL;
755 bool no_passphrase = false, force = false;
758 struct bch_opt_strs fs_opt_strs =
759 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
760 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
762 while ((opt = getopt_long(argc, argv, "f:Fh",
763 migrate_opts, NULL)) != -1)
769 format_opts.encrypted = true;
772 no_passphrase = true;
783 die("Please specify a filesystem to migrate");
785 if (format_opts.encrypted && !no_passphrase)
786 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
788 int ret = migrate_fs(fs_path,
792 bch2_opt_strs_free(&fs_opt_strs);
796 static void migrate_superblock_usage(void)
798 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
799 "Usage: bcachefs migrate-superblock [OPTION]...\n"
802 " -d device Device to create superblock for\n"
803 " -o offset Offset of existing superblock\n"
804 " -h Display this help and exit\n"
805 "Report bugs to <linux-bcachefs@vger.kernel.org>");
808 int cmd_migrate_superblock(int argc, char *argv[])
814 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
820 ret = kstrtou64(optarg, 10, &offset);
822 die("Invalid offset");
825 migrate_superblock_usage();
830 die("Please specify a device");
833 die("Please specify offset of existing superblock");
835 int fd = xopen(dev, O_RDWR);
836 struct bch_sb *sb = __bch2_super_read(fd, offset);
838 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
839 die("Can't add superblock: no space left in superblock layout");
842 for (i = 0; i < sb->layout.nr_superblocks; i++)
843 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
844 die("Superblock layout already has default superblock");
846 memmove(&sb->layout.sb_offset[1],
847 &sb->layout.sb_offset[0],
848 sb->layout.nr_superblocks * sizeof(u64));
849 sb->layout.nr_superblocks++;
851 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
853 bch2_super_write(fd, sb);