9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs-common.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 /* XXX cut and pasted from fsck.c */
42 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
44 static char *dev_t_to_path(dev_t dev)
46 char link[PATH_MAX], *p;
49 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
50 major(dev), minor(dev));
51 ret = readlink(sysfs_dev, link, sizeof(link));
54 if (ret < 0 || ret >= sizeof(link))
55 die("readlink error while looking up block device: %m");
59 p = strrchr(link, '/');
61 die("error looking up device name");
64 return mprintf("/dev/%s", p);
67 static bool path_is_fs_root(const char *path)
69 char *line = NULL, *p, *mount;
74 f = fopen("/proc/self/mountinfo", "r");
76 die("Error getting mount information");
78 while (getline(&line, &n, f) != -1) {
81 strsep(&p, " "); /* mount id */
82 strsep(&p, " "); /* parent id */
83 strsep(&p, " "); /* dev */
84 strsep(&p, " "); /* root */
85 mount = strsep(&p, " ");
88 if (mount && !strcmp(path, mount))
99 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
101 struct bch_dev *ca = c->devs[0];
102 struct hole_iter iter;
105 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
108 if (i.start == i.end)
111 b = sector_to_bucket(ca, i.start >> 9);
113 set_bit(b, ca->buckets_nouse);
115 } while (bucket_to_sector(ca, b) << 9 < i.end);
119 static void update_inode(struct bch_fs *c,
120 struct bch_inode_unpacked *inode)
122 struct bkey_inode_buf packed;
125 bch2_inode_pack(c, &packed, inode);
126 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
129 die("error updating inode: %s", strerror(-ret));
132 static void create_link(struct bch_fs *c,
133 struct bch_inode_unpacked *parent,
134 const char *name, u64 inum, mode_t mode)
136 struct qstr qstr = QSTR(name);
137 struct bch_inode_unpacked parent_u;
138 struct bch_inode_unpacked inode;
140 int ret = bch2_trans_do(c, NULL, NULL, 0,
141 bch2_link_trans(&trans, parent->bi_inum, inum,
142 &parent_u, &inode, &qstr));
144 die("error creating hardlink: %s", strerror(-ret));
147 static struct bch_inode_unpacked create_file(struct bch_fs *c,
148 struct bch_inode_unpacked *parent,
150 uid_t uid, gid_t gid,
151 mode_t mode, dev_t rdev)
153 struct qstr qstr = QSTR(name);
154 struct bch_inode_unpacked new_inode;
156 int ret = bch2_trans_do(c, NULL, NULL, 0,
157 bch2_create_trans(&trans,
158 parent->bi_inum, parent,
160 uid, gid, mode, rdev, NULL, NULL));
162 die("error creating file: %s", strerror(-ret));
167 #define for_each_xattr_handler(handlers, handler) \
169 for ((handler) = *(handlers)++; \
171 (handler) = *(handlers)++)
173 static const struct xattr_handler *xattr_resolve_name(char **name)
175 const struct xattr_handler **handlers = bch2_xattr_handlers;
176 const struct xattr_handler *handler;
178 for_each_xattr_handler(handlers, handler) {
181 n = strcmp_prefix(*name, xattr_prefix(handler));
183 if (!handler->prefix ^ !*n) {
186 return ERR_PTR(-EINVAL);
192 return ERR_PTR(-EOPNOTSUPP);
195 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
198 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
199 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
200 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
203 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
206 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
208 char attrs[XATTR_LIST_MAX];
209 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
211 die("listxattr error: %m");
215 attr < attrs + attrs_size;
217 next = attr + strlen(attr) + 1;
219 char val[XATTR_SIZE_MAX];
220 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
223 die("error getting xattr val: %m");
225 const struct xattr_handler *h = xattr_resolve_name(&attr);
227 int ret = bch2_trans_do(c, NULL, NULL, 0,
228 bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
229 val, val_size, h->flags, 0));
231 die("error creating xattr: %s", strerror(-ret));
235 static char buf[1 << 20] __aligned(PAGE_SIZE);
237 static void write_data(struct bch_fs *c,
238 struct bch_inode_unpacked *dst_inode,
239 u64 dst_offset, void *buf, size_t len)
242 struct bch_write_op op;
243 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
247 BUG_ON(dst_offset & (block_bytes(c) - 1));
248 BUG_ON(len & (block_bytes(c) - 1));
250 closure_init_stack(&cl);
252 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
253 bch2_bio_map(&o.op.wbio.bio, buf, len);
255 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
256 o.op.write_point = writepoint_hashed(0);
257 o.op.nr_replicas = 1;
258 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
260 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
261 c->opts.data_replicas, 0);
263 die("error reserving space in new filesystem: %s", strerror(-ret));
265 closure_call(&o.op.cl, bch2_write, NULL, &cl);
268 dst_inode->bi_sectors += len >> 9;
271 static void copy_data(struct bch_fs *c,
272 struct bch_inode_unpacked *dst_inode,
273 int src_fd, u64 start, u64 end)
275 while (start < end) {
276 unsigned len = min_t(u64, end - start, sizeof(buf));
277 unsigned pad = round_up(len, block_bytes(c)) - len;
279 xpread(src_fd, buf, len, start);
280 memset(buf + len, 0, pad);
282 write_data(c, dst_inode, start, buf, len + pad);
287 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
288 u64 logical, u64 physical, u64 length)
290 struct bch_dev *ca = c->devs[0];
292 BUG_ON(logical & (block_bytes(c) - 1));
293 BUG_ON(physical & (block_bytes(c) - 1));
294 BUG_ON(length & (block_bytes(c) - 1));
300 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
303 struct bkey_i_extent *e;
304 __BKEY_PADDED(k, BKEY_EXTENT_VAL_U64s_MAX) k;
305 u64 b = sector_to_bucket(ca, physical);
306 struct disk_reservation res;
310 sectors = min(ca->mi.bucket_size -
311 (physical & (ca->mi.bucket_size - 1)),
314 e = bkey_extent_init(&k.k);
315 e->k.p.inode = dst->bi_inum;
316 e->k.p.offset = logical + sectors;
318 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
321 .gen = bucket(ca, b)->mark.gen,
324 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
325 BCH_DISK_RESERVATION_NOFAIL);
327 die("error reserving space in new filesystem: %s",
330 bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
332 ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i,
335 die("btree insert error %s", strerror(-ret));
337 bch2_disk_reservation_put(c, &res);
339 dst->bi_sectors += sectors;
346 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
349 ssize_t ret = readlink(src, buf, sizeof(buf));
351 die("readlink error: %m");
353 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
356 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
357 int src_fd, u64 src_size,
358 char *src_path, ranges *extents)
360 struct fiemap_iter iter;
361 struct fiemap_extent e;
363 fiemap_for_each(src_fd, iter, e)
364 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
369 fiemap_for_each(src_fd, iter, e) {
370 if ((e.fe_logical & (block_bytes(c) - 1)) ||
371 (e.fe_length & (block_bytes(c) - 1)))
372 die("Unaligned extent in %s - can't handle", src_path);
374 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
375 FIEMAP_EXTENT_ENCODED|
376 FIEMAP_EXTENT_NOT_ALIGNED|
377 FIEMAP_EXTENT_DATA_INLINE)) {
378 copy_data(c, dst, src_fd, e.fe_logical,
379 min(src_size - e.fe_logical,
385 * if the data is below 1 MB, copy it so it doesn't conflict
386 * with bcachefs's potentially larger superblock:
388 if (e.fe_physical < 1 << 20) {
389 copy_data(c, dst, src_fd, e.fe_logical,
390 min(src_size - e.fe_logical,
395 if ((e.fe_physical & (block_bytes(c) - 1)))
396 die("Unaligned extent in %s - can't handle", src_path);
398 range_add(extents, e.fe_physical, e.fe_length);
399 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
403 struct copy_fs_state {
407 GENRADIX(u64) hardlinks;
411 static void copy_dir(struct copy_fs_state *s,
413 struct bch_inode_unpacked *dst,
414 int src_fd, const char *src_path)
416 DIR *dir = fdopendir(src_fd);
419 while ((errno = 0), (d = readdir(dir))) {
420 struct bch_inode_unpacked inode;
424 die("chdir error: %m");
427 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
429 if (!strcmp(d->d_name, ".") ||
430 !strcmp(d->d_name, "..") ||
431 stat.st_ino == s->bcachefs_inum)
434 char *child_path = mprintf("%s/%s", src_path, d->d_name);
436 if (stat.st_dev != s->dev)
437 die("%s does not have correct st_dev!", child_path);
439 u64 *dst_inum = S_ISREG(stat.st_mode)
440 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
443 if (dst_inum && *dst_inum) {
444 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
448 inode = create_file(c, dst, d->d_name,
449 stat.st_uid, stat.st_gid,
450 stat.st_mode, stat.st_rdev);
453 *dst_inum = inode.bi_inum;
455 copy_times(c, &inode, &stat);
456 copy_xattrs(c, &inode, d->d_name);
460 switch (mode_to_type(stat.st_mode)) {
462 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
463 copy_dir(s, c, &inode, fd, child_path);
467 inode.bi_size = stat.st_size;
469 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
470 copy_file(c, &inode, fd, stat.st_size,
471 child_path, &s->extents);
475 inode.bi_size = stat.st_size;
477 copy_link(c, &inode, d->d_name);
484 /* nothing else to copy for these: */
490 update_inode(c, &inode);
496 die("readdir error: %m");
499 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
500 u64 size, u64 *bcachefs_inum, dev_t dev,
504 ? open(file_path, O_RDWR|O_CREAT, 0600)
505 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
507 die("Error creating %s for bcachefs metadata: %m",
510 struct stat statbuf = xfstat(fd);
512 if (statbuf.st_dev != dev)
513 die("bcachefs file has incorrect device");
515 *bcachefs_inum = statbuf.st_ino;
517 if (fallocate(fd, 0, 0, size))
518 die("Error reserving space for bcachefs metadata: %m");
522 struct fiemap_iter iter;
523 struct fiemap_extent e;
524 ranges extents = { NULL };
526 fiemap_for_each(fd, iter, e) {
527 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
528 FIEMAP_EXTENT_ENCODED|
529 FIEMAP_EXTENT_NOT_ALIGNED|
530 FIEMAP_EXTENT_DATA_INLINE))
531 die("Unable to continue: metadata file not fully mapped");
533 if ((e.fe_physical & (block_size - 1)) ||
534 (e.fe_length & (block_size - 1)))
535 die("Unable to continue: unaligned extents in metadata file");
537 range_add(&extents, e.fe_physical, e.fe_length);
541 ranges_sort_merge(&extents);
545 static void reserve_old_fs_space(struct bch_fs *c,
546 struct bch_inode_unpacked *root_inode,
549 struct bch_dev *ca = c->devs[0];
550 struct bch_inode_unpacked dst;
551 struct hole_iter iter;
554 dst = create_file(c, root_inode, "old_migrated_filesystem",
555 0, 0, S_IFREG|0400, 0);
556 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
558 ranges_sort_merge(extents);
560 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
561 link_data(c, &dst, i.start, i.start, i.end - i.start);
563 update_inode(c, &dst);
566 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
567 u64 bcachefs_inum, ranges *extents)
571 struct bch_inode_unpacked root_inode;
572 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
574 die("error looking up root directory: %s", strerror(-ret));
577 die("chdir error: %m");
579 struct stat stat = xfstat(src_fd);
580 copy_times(c, &root_inode, &stat);
581 copy_xattrs(c, &root_inode, ".");
583 struct copy_fs_state s = {
584 .bcachefs_inum = bcachefs_inum,
590 copy_dir(&s, c, &root_inode, src_fd, src_path);
592 reserve_old_fs_space(c, &root_inode, &s.extents);
594 update_inode(c, &root_inode);
596 darray_free(s.extents);
597 genradix_free(&s.hardlinks);
599 bch2_alloc_write(c, false);
602 static void find_superblock_space(ranges extents, struct dev_opts *dev)
606 darray_foreach(i, extents) {
607 u64 start = round_up(max(256ULL << 10, i->start),
608 dev->bucket_size << 9);
609 u64 end = round_down(i->end,
610 dev->bucket_size << 9);
612 if (start + (128 << 10) <= end) {
613 dev->sb_offset = start >> 9;
614 dev->sb_end = dev->sb_offset + 256;
619 die("Couldn't find a valid location for superblock");
622 static void migrate_usage(void)
624 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
625 "Usage: bcachefs migrate [OPTION]...\n"
628 " -f fs Root of filesystem to migrate(s)\n"
629 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
630 " --no_passphrase Don't encrypt master encryption key\n"
631 " -F Force, even if metadata file already exists\n"
632 " -h Display this help and exit\n"
633 "Report bugs to <linux-bcache@vger.kernel.org>");
636 static const struct option migrate_opts[] = {
637 { "encrypted", no_argument, NULL, 'e' },
638 { "no_passphrase", no_argument, NULL, 'p' },
642 static int migrate_fs(const char *fs_path,
643 struct bch_opt_strs fs_opt_strs,
644 struct bch_opts fs_opts,
645 struct format_opts format_opts,
648 if (!path_is_fs_root(fs_path))
649 die("%s is not a filysestem root", fs_path);
651 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
652 struct stat stat = xfstat(fs_fd);
654 if (!S_ISDIR(stat.st_mode))
655 die("%s is not a directory", fs_path);
657 struct dev_opts dev = dev_opts_default();
659 dev.path = dev_t_to_path(stat.st_dev);
660 dev.fd = xopen(dev.path, O_RDWR);
662 opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
664 char *file_path = mprintf("%s/bcachefs", fs_path);
665 printf("Creating new filesystem on %s in space reserved at %s\n",
666 dev.path, file_path);
668 bch2_pick_bucket_size(fs_opts, &dev);
671 ranges extents = reserve_new_fs_space(file_path,
672 fs_opts.block_size << 9,
673 get_size(dev.path, dev.fd) / 5,
674 &bcachefs_inum, stat.st_dev, force);
676 find_superblock_space(extents, &dev);
678 struct bch_sb *sb = bch2_format(fs_opt_strs,
679 fs_opts,format_opts, &dev, 1);
680 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
682 if (format_opts.passphrase)
683 bch2_add_key(sb, format_opts.passphrase);
687 struct bch_opts opts = bch2_opts_empty();
688 struct bch_fs *c = NULL;
689 char *path[1] = { dev.path };
691 opt_set(opts, sb, sb_offset);
692 opt_set(opts, nostart, true);
693 opt_set(opts, noexcl, true);
695 c = bch2_fs_open(path, 1, opts);
697 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
699 mark_unreserved_space(c, extents);
701 int ret = bch2_fs_start(c);
703 die("Error starting new filesystem: %s", strerror(-ret));
705 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
709 printf("Migrate complete, running fsck:\n");
710 opt_set(opts, nostart, false);
711 opt_set(opts, nochanges, true);
713 c = bch2_fs_open(path, 1, opts);
715 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
718 printf("fsck complete\n");
720 printf("To mount the new filesystem, run\n"
721 " mount -t bcachefs -o sb=%llu %s dir\n"
723 "After verifying that the new filesystem is correct, to create a\n"
724 "superblock at the default offset and finish the migration run\n"
725 " bcachefs migrate-superblock -d %s -o %llu\n"
727 "The new filesystem will have a file at /old_migrated_filestem\n"
728 "referencing all disk space that might be used by the existing\n"
729 "filesystem. That file can be deleted once the old filesystem is\n"
730 "no longer needed (and should be deleted prior to running\n"
731 "bcachefs migrate-superblock)\n",
732 sb_offset, dev.path, dev.path, sb_offset);
736 int cmd_migrate(int argc, char *argv[])
738 struct format_opts format_opts = format_opts_default();
739 char *fs_path = NULL;
740 bool no_passphrase = false, force = false;
743 struct bch_opt_strs fs_opt_strs =
744 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
745 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
747 while ((opt = getopt_long(argc, argv, "f:Fh",
748 migrate_opts, NULL)) != -1)
754 format_opts.encrypted = true;
757 no_passphrase = true;
768 die("Please specify a filesystem to migrate");
770 if (format_opts.encrypted && !no_passphrase)
771 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
773 return migrate_fs(fs_path,
779 static void migrate_superblock_usage(void)
781 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
782 "Usage: bcachefs migrate-superblock [OPTION]...\n"
785 " -d device Device to create superblock for\n"
786 " -o offset Offset of existing superblock\n"
787 " -h Display this help and exit\n"
788 "Report bugs to <linux-bcache@vger.kernel.org>");
791 int cmd_migrate_superblock(int argc, char *argv[])
797 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
803 ret = kstrtou64(optarg, 10, &offset);
805 die("Invalid offset");
808 migrate_superblock_usage();
813 die("Please specify a device");
816 die("Please specify offset of existing superblock");
818 int fd = xopen(dev, O_RDWR);
819 struct bch_sb *sb = __bch2_super_read(fd, offset);
821 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
822 die("Can't add superblock: no space left in superblock layout");
825 for (i = 0; i < sb->layout.nr_superblocks; i++)
826 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
827 die("Superblock layout already has default superblock");
829 memmove(&sb->layout.sb_offset[1],
830 &sb->layout.sb_offset[0],
831 sb->layout.nr_superblocks * sizeof(u64));
832 sb->layout.nr_superblocks++;
834 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
836 bch2_super_write(fd, sb);