9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs-common.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 /* XXX cut and pasted from fsck.c */
42 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
44 static char *dev_t_to_path(dev_t dev)
46 char link[PATH_MAX], *p;
49 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
50 major(dev), minor(dev));
51 ret = readlink(sysfs_dev, link, sizeof(link));
54 if (ret < 0 || ret >= sizeof(link))
55 die("readlink error while looking up block device: %m");
59 p = strrchr(link, '/');
61 die("error looking up device name");
64 return mprintf("/dev/%s", p);
67 static bool path_is_fs_root(const char *path)
69 char *line = NULL, *p, *mount;
74 f = fopen("/proc/self/mountinfo", "r");
76 die("Error getting mount information");
78 while (getline(&line, &n, f) != -1) {
81 strsep(&p, " "); /* mount id */
82 strsep(&p, " "); /* parent id */
83 strsep(&p, " "); /* dev */
84 strsep(&p, " "); /* root */
85 mount = strsep(&p, " ");
88 if (mount && !strcmp(path, mount))
99 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
101 struct bch_dev *ca = c->devs[0];
102 struct hole_iter iter;
105 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
108 if (i.start == i.end)
111 b = sector_to_bucket(ca, i.start >> 9);
113 set_bit(b, ca->buckets_nouse);
115 } while (bucket_to_sector(ca, b) << 9 < i.end);
119 static void update_inode(struct bch_fs *c,
120 struct bch_inode_unpacked *inode)
122 struct bkey_inode_buf packed;
125 bch2_inode_pack(c, &packed, inode);
126 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
129 die("error updating inode: %s", strerror(-ret));
132 static void create_link(struct bch_fs *c,
133 struct bch_inode_unpacked *parent,
134 const char *name, u64 inum, mode_t mode)
136 struct qstr qstr = QSTR(name);
137 struct bch_inode_unpacked parent_u;
138 struct bch_inode_unpacked inode;
140 int ret = bch2_trans_do(c, NULL, NULL, 0,
141 bch2_link_trans(&trans,
142 (subvol_inum) { 1, parent->bi_inum }, &parent_u,
143 (subvol_inum) { 1, inum }, &inode, &qstr));
145 die("error creating hardlink: %s", strerror(-ret));
148 static struct bch_inode_unpacked create_file(struct bch_fs *c,
149 struct bch_inode_unpacked *parent,
151 uid_t uid, gid_t gid,
152 mode_t mode, dev_t rdev)
154 struct qstr qstr = QSTR(name);
155 struct bch_inode_unpacked new_inode;
157 int ret = bch2_trans_do(c, NULL, NULL, 0,
158 bch2_create_trans(&trans,
159 (subvol_inum) { 1, parent->bi_inum }, parent,
161 uid, gid, mode, rdev, NULL, NULL,
162 (subvol_inum) {}, 0));
164 die("error creating file: %s", strerror(-ret));
169 #define for_each_xattr_handler(handlers, handler) \
171 for ((handler) = *(handlers)++; \
173 (handler) = *(handlers)++)
175 static const struct xattr_handler *xattr_resolve_name(char **name)
177 const struct xattr_handler **handlers = bch2_xattr_handlers;
178 const struct xattr_handler *handler;
180 for_each_xattr_handler(handlers, handler) {
183 n = strcmp_prefix(*name, xattr_prefix(handler));
185 if (!handler->prefix ^ !*n) {
188 return ERR_PTR(-EINVAL);
194 return ERR_PTR(-EOPNOTSUPP);
197 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
200 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
201 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
202 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
205 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
208 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
210 char attrs[XATTR_LIST_MAX];
211 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
213 die("listxattr error: %m");
217 attr < attrs + attrs_size;
219 next = attr + strlen(attr) + 1;
221 char val[XATTR_SIZE_MAX];
222 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
225 die("error getting xattr val: %m");
227 const struct xattr_handler *h = xattr_resolve_name(&attr);
229 int ret = bch2_trans_do(c, NULL, NULL, 0,
230 bch2_xattr_set(&trans,
231 (subvol_inum) { 1, dst->bi_inum },
233 val, val_size, h->flags, 0));
235 die("error creating xattr: %s", strerror(-ret));
239 static char buf[1 << 20] __aligned(PAGE_SIZE);
241 static void write_data(struct bch_fs *c,
242 struct bch_inode_unpacked *dst_inode,
243 u64 dst_offset, void *buf, size_t len)
246 struct bch_write_op op;
247 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
251 BUG_ON(dst_offset & (block_bytes(c) - 1));
252 BUG_ON(len & (block_bytes(c) - 1));
254 closure_init_stack(&cl);
256 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
257 bch2_bio_map(&o.op.wbio.bio, buf, len);
259 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
260 o.op.write_point = writepoint_hashed(0);
261 o.op.nr_replicas = 1;
262 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
264 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
265 c->opts.data_replicas, 0);
267 die("error reserving space in new filesystem: %s", strerror(-ret));
269 closure_call(&o.op.cl, bch2_write, NULL, &cl);
272 dst_inode->bi_sectors += len >> 9;
275 static void copy_data(struct bch_fs *c,
276 struct bch_inode_unpacked *dst_inode,
277 int src_fd, u64 start, u64 end)
279 while (start < end) {
280 unsigned len = min_t(u64, end - start, sizeof(buf));
281 unsigned pad = round_up(len, block_bytes(c)) - len;
283 xpread(src_fd, buf, len, start);
284 memset(buf + len, 0, pad);
286 write_data(c, dst_inode, start, buf, len + pad);
291 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
292 u64 logical, u64 physical, u64 length)
294 struct bch_dev *ca = c->devs[0];
296 BUG_ON(logical & (block_bytes(c) - 1));
297 BUG_ON(physical & (block_bytes(c) - 1));
298 BUG_ON(length & (block_bytes(c) - 1));
304 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
307 struct bkey_i_extent *e;
308 __BKEY_PADDED(k, BKEY_EXTENT_VAL_U64s_MAX) k;
309 u64 b = sector_to_bucket(ca, physical);
310 struct disk_reservation res;
314 sectors = min(ca->mi.bucket_size -
315 (physical & (ca->mi.bucket_size - 1)),
318 e = bkey_extent_init(&k.k);
319 e->k.p.inode = dst->bi_inum;
320 e->k.p.offset = logical + sectors;
322 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
325 .gen = bucket(ca, b)->mark.gen,
328 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
329 BCH_DISK_RESERVATION_NOFAIL);
331 die("error reserving space in new filesystem: %s",
334 bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
336 ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i,
339 die("btree insert error %s", strerror(-ret));
341 bch2_disk_reservation_put(c, &res);
343 dst->bi_sectors += sectors;
350 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
353 ssize_t ret = readlink(src, buf, sizeof(buf));
355 die("readlink error: %m");
357 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
360 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
361 int src_fd, u64 src_size,
362 char *src_path, ranges *extents)
364 struct fiemap_iter iter;
365 struct fiemap_extent e;
367 fiemap_for_each(src_fd, iter, e)
368 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
373 fiemap_for_each(src_fd, iter, e) {
374 if ((e.fe_logical & (block_bytes(c) - 1)) ||
375 (e.fe_length & (block_bytes(c) - 1)))
376 die("Unaligned extent in %s - can't handle", src_path);
378 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
379 FIEMAP_EXTENT_ENCODED|
380 FIEMAP_EXTENT_NOT_ALIGNED|
381 FIEMAP_EXTENT_DATA_INLINE)) {
382 copy_data(c, dst, src_fd, e.fe_logical,
383 min(src_size - e.fe_logical,
389 * if the data is below 1 MB, copy it so it doesn't conflict
390 * with bcachefs's potentially larger superblock:
392 if (e.fe_physical < 1 << 20) {
393 copy_data(c, dst, src_fd, e.fe_logical,
394 min(src_size - e.fe_logical,
399 if ((e.fe_physical & (block_bytes(c) - 1)))
400 die("Unaligned extent in %s - can't handle", src_path);
402 range_add(extents, e.fe_physical, e.fe_length);
403 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
407 struct copy_fs_state {
411 GENRADIX(u64) hardlinks;
415 static void copy_dir(struct copy_fs_state *s,
417 struct bch_inode_unpacked *dst,
418 int src_fd, const char *src_path)
420 DIR *dir = fdopendir(src_fd);
423 while ((errno = 0), (d = readdir(dir))) {
424 struct bch_inode_unpacked inode;
428 die("chdir error: %m");
431 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
433 if (!strcmp(d->d_name, ".") ||
434 !strcmp(d->d_name, "..") ||
435 stat.st_ino == s->bcachefs_inum)
438 char *child_path = mprintf("%s/%s", src_path, d->d_name);
440 if (stat.st_dev != s->dev)
441 die("%s does not have correct st_dev!", child_path);
443 u64 *dst_inum = S_ISREG(stat.st_mode)
444 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
447 if (dst_inum && *dst_inum) {
448 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
452 inode = create_file(c, dst, d->d_name,
453 stat.st_uid, stat.st_gid,
454 stat.st_mode, stat.st_rdev);
457 *dst_inum = inode.bi_inum;
459 copy_times(c, &inode, &stat);
460 copy_xattrs(c, &inode, d->d_name);
464 switch (mode_to_type(stat.st_mode)) {
466 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
467 copy_dir(s, c, &inode, fd, child_path);
471 inode.bi_size = stat.st_size;
473 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
474 copy_file(c, &inode, fd, stat.st_size,
475 child_path, &s->extents);
479 inode.bi_size = stat.st_size;
481 copy_link(c, &inode, d->d_name);
488 /* nothing else to copy for these: */
494 update_inode(c, &inode);
500 die("readdir error: %m");
503 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
504 u64 size, u64 *bcachefs_inum, dev_t dev,
508 ? open(file_path, O_RDWR|O_CREAT, 0600)
509 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
511 die("Error creating %s for bcachefs metadata: %m",
514 struct stat statbuf = xfstat(fd);
516 if (statbuf.st_dev != dev)
517 die("bcachefs file has incorrect device");
519 *bcachefs_inum = statbuf.st_ino;
521 if (fallocate(fd, 0, 0, size))
522 die("Error reserving space for bcachefs metadata: %m");
526 struct fiemap_iter iter;
527 struct fiemap_extent e;
528 ranges extents = { NULL };
530 fiemap_for_each(fd, iter, e) {
531 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
532 FIEMAP_EXTENT_ENCODED|
533 FIEMAP_EXTENT_NOT_ALIGNED|
534 FIEMAP_EXTENT_DATA_INLINE))
535 die("Unable to continue: metadata file not fully mapped");
537 if ((e.fe_physical & (block_size - 1)) ||
538 (e.fe_length & (block_size - 1)))
539 die("Unable to continue: unaligned extents in metadata file");
541 range_add(&extents, e.fe_physical, e.fe_length);
545 ranges_sort_merge(&extents);
549 static void reserve_old_fs_space(struct bch_fs *c,
550 struct bch_inode_unpacked *root_inode,
553 struct bch_dev *ca = c->devs[0];
554 struct bch_inode_unpacked dst;
555 struct hole_iter iter;
558 dst = create_file(c, root_inode, "old_migrated_filesystem",
559 0, 0, S_IFREG|0400, 0);
560 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
562 ranges_sort_merge(extents);
564 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
565 link_data(c, &dst, i.start, i.start, i.end - i.start);
567 update_inode(c, &dst);
570 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
571 u64 bcachefs_inum, ranges *extents)
575 struct bch_inode_unpacked root_inode;
576 int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
579 die("error looking up root directory: %s", strerror(-ret));
582 die("chdir error: %m");
584 struct stat stat = xfstat(src_fd);
585 copy_times(c, &root_inode, &stat);
586 copy_xattrs(c, &root_inode, ".");
588 struct copy_fs_state s = {
589 .bcachefs_inum = bcachefs_inum,
595 copy_dir(&s, c, &root_inode, src_fd, src_path);
597 reserve_old_fs_space(c, &root_inode, &s.extents);
599 update_inode(c, &root_inode);
601 darray_free(s.extents);
602 genradix_free(&s.hardlinks);
604 bch2_alloc_write(c, false);
607 static void find_superblock_space(ranges extents,
608 struct format_opts opts,
609 struct dev_opts *dev)
613 darray_foreach(i, extents) {
614 u64 start = round_up(max(256ULL << 10, i->start),
615 dev->bucket_size << 9);
616 u64 end = round_down(i->end,
617 dev->bucket_size << 9);
619 /* Need space for two superblocks: */
620 if (start + (opts.superblock_size << 9) * 2 <= end) {
621 dev->sb_offset = start >> 9;
622 dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
627 die("Couldn't find a valid location for superblock");
630 static void migrate_usage(void)
632 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
633 "Usage: bcachefs migrate [OPTION]...\n"
636 " -f fs Root of filesystem to migrate(s)\n"
637 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
638 " --no_passphrase Don't encrypt master encryption key\n"
639 " -F Force, even if metadata file already exists\n"
640 " -h Display this help and exit\n"
641 "Report bugs to <linux-bcache@vger.kernel.org>");
644 static const struct option migrate_opts[] = {
645 { "encrypted", no_argument, NULL, 'e' },
646 { "no_passphrase", no_argument, NULL, 'p' },
650 static int migrate_fs(const char *fs_path,
651 struct bch_opt_strs fs_opt_strs,
652 struct bch_opts fs_opts,
653 struct format_opts format_opts,
656 if (!path_is_fs_root(fs_path))
657 die("%s is not a filysestem root", fs_path);
659 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
660 struct stat stat = xfstat(fs_fd);
662 if (!S_ISDIR(stat.st_mode))
663 die("%s is not a directory", fs_path);
665 struct dev_opts dev = dev_opts_default();
667 dev.path = dev_t_to_path(stat.st_dev);
668 dev.fd = xopen(dev.path, O_RDWR);
670 opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
672 char *file_path = mprintf("%s/bcachefs", fs_path);
673 printf("Creating new filesystem on %s in space reserved at %s\n",
674 dev.path, file_path);
676 bch2_pick_bucket_size(fs_opts, &dev);
679 ranges extents = reserve_new_fs_space(file_path,
680 fs_opts.block_size << 9,
681 get_size(dev.path, dev.fd) / 5,
682 &bcachefs_inum, stat.st_dev, force);
684 find_superblock_space(extents, format_opts, &dev);
686 struct bch_sb *sb = bch2_format(fs_opt_strs,
687 fs_opts,format_opts, &dev, 1);
688 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
690 if (format_opts.passphrase)
691 bch2_add_key(sb, format_opts.passphrase);
695 struct bch_opts opts = bch2_opts_empty();
696 struct bch_fs *c = NULL;
697 char *path[1] = { dev.path };
699 opt_set(opts, sb, sb_offset);
700 opt_set(opts, nostart, true);
701 opt_set(opts, noexcl, true);
703 c = bch2_fs_open(path, 1, opts);
705 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
707 mark_unreserved_space(c, extents);
709 int ret = bch2_fs_start(c);
711 die("Error starting new filesystem: %s", strerror(-ret));
713 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
717 printf("Migrate complete, running fsck:\n");
718 opt_set(opts, nostart, false);
719 opt_set(opts, nochanges, true);
721 c = bch2_fs_open(path, 1, opts);
723 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
726 printf("fsck complete\n");
728 printf("To mount the new filesystem, run\n"
729 " mount -t bcachefs -o sb=%llu %s dir\n"
731 "After verifying that the new filesystem is correct, to create a\n"
732 "superblock at the default offset and finish the migration run\n"
733 " bcachefs migrate-superblock -d %s -o %llu\n"
735 "The new filesystem will have a file at /old_migrated_filestem\n"
736 "referencing all disk space that might be used by the existing\n"
737 "filesystem. That file can be deleted once the old filesystem is\n"
738 "no longer needed (and should be deleted prior to running\n"
739 "bcachefs migrate-superblock)\n",
740 sb_offset, dev.path, dev.path, sb_offset);
744 int cmd_migrate(int argc, char *argv[])
746 struct format_opts format_opts = format_opts_default();
747 char *fs_path = NULL;
748 bool no_passphrase = false, force = false;
751 struct bch_opt_strs fs_opt_strs =
752 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
753 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
755 while ((opt = getopt_long(argc, argv, "f:Fh",
756 migrate_opts, NULL)) != -1)
762 format_opts.encrypted = true;
765 no_passphrase = true;
776 die("Please specify a filesystem to migrate");
778 if (format_opts.encrypted && !no_passphrase)
779 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
781 int ret = migrate_fs(fs_path,
785 bch2_opt_strs_free(&fs_opt_strs);
789 static void migrate_superblock_usage(void)
791 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
792 "Usage: bcachefs migrate-superblock [OPTION]...\n"
795 " -d device Device to create superblock for\n"
796 " -o offset Offset of existing superblock\n"
797 " -h Display this help and exit\n"
798 "Report bugs to <linux-bcache@vger.kernel.org>");
801 int cmd_migrate_superblock(int argc, char *argv[])
807 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
813 ret = kstrtou64(optarg, 10, &offset);
815 die("Invalid offset");
818 migrate_superblock_usage();
823 die("Please specify a device");
826 die("Please specify offset of existing superblock");
828 int fd = xopen(dev, O_RDWR);
829 struct bch_sb *sb = __bch2_super_read(fd, offset);
831 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
832 die("Can't add superblock: no space left in superblock layout");
835 for (i = 0; i < sb->layout.nr_superblocks; i++)
836 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
837 die("Superblock layout already has default superblock");
839 memmove(&sb->layout.sb_offset[1],
840 &sb->layout.sb_offset[0],
841 sb->layout.nr_superblocks * sizeof(u64));
842 sb->layout.nr_superblocks++;
844 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
846 bch2_super_write(fd, sb);