9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs-common.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 /* XXX cut and pasted from fsck.c */
42 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
44 static char *dev_t_to_path(dev_t dev)
46 char link[PATH_MAX], *p;
49 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
50 major(dev), minor(dev));
51 ret = readlink(sysfs_dev, link, sizeof(link));
54 if (ret < 0 || ret >= sizeof(link))
55 die("readlink error while looking up block device: %m");
59 p = strrchr(link, '/');
61 die("error looking up device name");
64 return mprintf("/dev/%s", p);
67 static bool path_is_fs_root(const char *path)
69 char *line = NULL, *p, *mount;
74 f = fopen("/proc/self/mountinfo", "r");
76 die("Error getting mount information");
78 while (getline(&line, &n, f) != -1) {
81 strsep(&p, " "); /* mount id */
82 strsep(&p, " "); /* parent id */
83 strsep(&p, " "); /* dev */
84 strsep(&p, " "); /* root */
85 mount = strsep(&p, " ");
88 if (mount && !strcmp(path, mount))
99 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
101 struct bch_dev *ca = c->devs[0];
102 struct hole_iter iter;
105 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
108 if (i.start == i.end)
111 b = sector_to_bucket(ca, i.start >> 9);
113 set_bit(b, ca->buckets_nouse);
115 } while (bucket_to_sector(ca, b) << 9 < i.end);
119 static void update_inode(struct bch_fs *c,
120 struct bch_inode_unpacked *inode)
122 struct bkey_inode_buf packed;
125 bch2_inode_pack(c, &packed, inode);
126 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
129 die("error updating inode: %s", strerror(-ret));
132 static void create_link(struct bch_fs *c,
133 struct bch_inode_unpacked *parent,
134 const char *name, u64 inum, mode_t mode)
136 struct qstr qstr = QSTR(name);
137 struct bch_inode_unpacked parent_u;
138 struct bch_inode_unpacked inode;
140 int ret = bch2_trans_do(c, NULL, NULL, 0,
141 bch2_link_trans(&trans,
142 (subvol_inum) { 1, parent->bi_inum }, &parent_u,
143 (subvol_inum) { 1, inum }, &inode, &qstr));
145 die("error creating hardlink: %s", strerror(-ret));
148 static struct bch_inode_unpacked create_file(struct bch_fs *c,
149 struct bch_inode_unpacked *parent,
151 uid_t uid, gid_t gid,
152 mode_t mode, dev_t rdev)
154 struct qstr qstr = QSTR(name);
155 struct bch_inode_unpacked new_inode;
157 int ret = bch2_trans_do(c, NULL, NULL, 0,
158 bch2_create_trans(&trans,
159 (subvol_inum) { 1, parent->bi_inum }, parent,
161 uid, gid, mode, rdev, NULL, NULL,
162 (subvol_inum) {}, 0));
164 die("error creating file: %s", strerror(-ret));
169 #define for_each_xattr_handler(handlers, handler) \
171 for ((handler) = *(handlers)++; \
173 (handler) = *(handlers)++)
175 static const struct xattr_handler *xattr_resolve_name(char **name)
177 const struct xattr_handler **handlers = bch2_xattr_handlers;
178 const struct xattr_handler *handler;
180 for_each_xattr_handler(handlers, handler) {
183 n = strcmp_prefix(*name, xattr_prefix(handler));
185 if (!handler->prefix ^ !*n) {
188 return ERR_PTR(-EINVAL);
194 return ERR_PTR(-EOPNOTSUPP);
197 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
200 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
201 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
202 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
205 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
208 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
210 char attrs[XATTR_LIST_MAX];
211 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
213 die("listxattr error: %m");
217 attr < attrs + attrs_size;
219 next = attr + strlen(attr) + 1;
221 char val[XATTR_SIZE_MAX];
222 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
225 die("error getting xattr val: %m");
227 const struct xattr_handler *h = xattr_resolve_name(&attr);
229 int ret = bch2_trans_do(c, NULL, NULL, 0,
230 bch2_xattr_set(&trans,
231 (subvol_inum) { 1, dst->bi_inum },
233 val, val_size, h->flags, 0));
235 die("error creating xattr: %s", strerror(-ret));
239 static char buf[1 << 20] __aligned(PAGE_SIZE);
241 static void write_data(struct bch_fs *c,
242 struct bch_inode_unpacked *dst_inode,
243 u64 dst_offset, void *buf, size_t len)
246 struct bch_write_op op;
247 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
251 BUG_ON(dst_offset & (block_bytes(c) - 1));
252 BUG_ON(len & (block_bytes(c) - 1));
254 closure_init_stack(&cl);
256 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
257 bch2_bio_map(&o.op.wbio.bio, buf, len);
259 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
260 o.op.write_point = writepoint_hashed(0);
261 o.op.nr_replicas = 1;
262 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
264 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
265 c->opts.data_replicas, 0);
267 die("error reserving space in new filesystem: %s", strerror(-ret));
269 closure_call(&o.op.cl, bch2_write, NULL, &cl);
272 dst_inode->bi_sectors += len >> 9;
275 static void copy_data(struct bch_fs *c,
276 struct bch_inode_unpacked *dst_inode,
277 int src_fd, u64 start, u64 end)
279 while (start < end) {
280 unsigned len = min_t(u64, end - start, sizeof(buf));
281 unsigned pad = round_up(len, block_bytes(c)) - len;
283 xpread(src_fd, buf, len, start);
284 memset(buf + len, 0, pad);
286 write_data(c, dst_inode, start, buf, len + pad);
291 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
292 u64 logical, u64 physical, u64 length)
294 struct bch_dev *ca = c->devs[0];
296 BUG_ON(logical & (block_bytes(c) - 1));
297 BUG_ON(physical & (block_bytes(c) - 1));
298 BUG_ON(length & (block_bytes(c) - 1));
304 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
307 struct bkey_i_extent *e;
308 __BKEY_PADDED(k, BKEY_EXTENT_VAL_U64s_MAX) k;
309 u64 b = sector_to_bucket(ca, physical);
310 struct disk_reservation res;
314 sectors = min(ca->mi.bucket_size -
315 (physical & (ca->mi.bucket_size - 1)),
318 e = bkey_extent_init(&k.k);
319 e->k.p.inode = dst->bi_inum;
320 e->k.p.offset = logical + sectors;
322 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
325 .gen = bucket(ca, b)->mark.gen,
328 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
329 BCH_DISK_RESERVATION_NOFAIL);
331 die("error reserving space in new filesystem: %s",
334 ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i,
337 die("btree insert error %s", strerror(-ret));
339 bch2_disk_reservation_put(c, &res);
341 dst->bi_sectors += sectors;
348 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
351 ssize_t ret = readlink(src, buf, sizeof(buf));
353 die("readlink error: %m");
355 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
358 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
359 int src_fd, u64 src_size,
360 char *src_path, ranges *extents)
362 struct fiemap_iter iter;
363 struct fiemap_extent e;
365 fiemap_for_each(src_fd, iter, e)
366 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
371 fiemap_for_each(src_fd, iter, e) {
372 if ((e.fe_logical & (block_bytes(c) - 1)) ||
373 (e.fe_length & (block_bytes(c) - 1)))
374 die("Unaligned extent in %s - can't handle", src_path);
376 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
377 FIEMAP_EXTENT_ENCODED|
378 FIEMAP_EXTENT_NOT_ALIGNED|
379 FIEMAP_EXTENT_DATA_INLINE)) {
380 copy_data(c, dst, src_fd, e.fe_logical,
381 min(src_size - e.fe_logical,
387 * if the data is below 1 MB, copy it so it doesn't conflict
388 * with bcachefs's potentially larger superblock:
390 if (e.fe_physical < 1 << 20) {
391 copy_data(c, dst, src_fd, e.fe_logical,
392 min(src_size - e.fe_logical,
397 if ((e.fe_physical & (block_bytes(c) - 1)))
398 die("Unaligned extent in %s - can't handle", src_path);
400 range_add(extents, e.fe_physical, e.fe_length);
401 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
405 struct copy_fs_state {
409 GENRADIX(u64) hardlinks;
413 static void copy_dir(struct copy_fs_state *s,
415 struct bch_inode_unpacked *dst,
416 int src_fd, const char *src_path)
418 DIR *dir = fdopendir(src_fd);
421 while ((errno = 0), (d = readdir(dir))) {
422 struct bch_inode_unpacked inode;
426 die("chdir error: %m");
429 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
431 if (!strcmp(d->d_name, ".") ||
432 !strcmp(d->d_name, "..") ||
433 stat.st_ino == s->bcachefs_inum)
436 char *child_path = mprintf("%s/%s", src_path, d->d_name);
438 if (stat.st_dev != s->dev)
439 die("%s does not have correct st_dev!", child_path);
441 u64 *dst_inum = S_ISREG(stat.st_mode)
442 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
445 if (dst_inum && *dst_inum) {
446 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
450 inode = create_file(c, dst, d->d_name,
451 stat.st_uid, stat.st_gid,
452 stat.st_mode, stat.st_rdev);
455 *dst_inum = inode.bi_inum;
457 copy_times(c, &inode, &stat);
458 copy_xattrs(c, &inode, d->d_name);
462 switch (mode_to_type(stat.st_mode)) {
464 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
465 copy_dir(s, c, &inode, fd, child_path);
469 inode.bi_size = stat.st_size;
471 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
472 copy_file(c, &inode, fd, stat.st_size,
473 child_path, &s->extents);
477 inode.bi_size = stat.st_size;
479 copy_link(c, &inode, d->d_name);
486 /* nothing else to copy for these: */
492 update_inode(c, &inode);
498 die("readdir error: %m");
501 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
502 u64 size, u64 *bcachefs_inum, dev_t dev,
506 ? open(file_path, O_RDWR|O_CREAT, 0600)
507 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
509 die("Error creating %s for bcachefs metadata: %m",
512 struct stat statbuf = xfstat(fd);
514 if (statbuf.st_dev != dev)
515 die("bcachefs file has incorrect device");
517 *bcachefs_inum = statbuf.st_ino;
519 if (fallocate(fd, 0, 0, size))
520 die("Error reserving space for bcachefs metadata: %m");
524 struct fiemap_iter iter;
525 struct fiemap_extent e;
526 ranges extents = { NULL };
528 fiemap_for_each(fd, iter, e) {
529 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
530 FIEMAP_EXTENT_ENCODED|
531 FIEMAP_EXTENT_NOT_ALIGNED|
532 FIEMAP_EXTENT_DATA_INLINE))
533 die("Unable to continue: metadata file not fully mapped");
535 if ((e.fe_physical & (block_size - 1)) ||
536 (e.fe_length & (block_size - 1)))
537 die("Unable to continue: unaligned extents in metadata file");
539 range_add(&extents, e.fe_physical, e.fe_length);
543 ranges_sort_merge(&extents);
547 static void reserve_old_fs_space(struct bch_fs *c,
548 struct bch_inode_unpacked *root_inode,
551 struct bch_dev *ca = c->devs[0];
552 struct bch_inode_unpacked dst;
553 struct hole_iter iter;
556 dst = create_file(c, root_inode, "old_migrated_filesystem",
557 0, 0, S_IFREG|0400, 0);
558 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
560 ranges_sort_merge(extents);
562 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
563 link_data(c, &dst, i.start, i.start, i.end - i.start);
565 update_inode(c, &dst);
568 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
569 u64 bcachefs_inum, ranges *extents)
573 struct bch_inode_unpacked root_inode;
574 int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
577 die("error looking up root directory: %s", strerror(-ret));
580 die("chdir error: %m");
582 struct stat stat = xfstat(src_fd);
583 copy_times(c, &root_inode, &stat);
584 copy_xattrs(c, &root_inode, ".");
586 struct copy_fs_state s = {
587 .bcachefs_inum = bcachefs_inum,
593 copy_dir(&s, c, &root_inode, src_fd, src_path);
595 reserve_old_fs_space(c, &root_inode, &s.extents);
597 update_inode(c, &root_inode);
599 darray_free(s.extents);
600 genradix_free(&s.hardlinks);
602 bch2_alloc_write(c, false);
605 static void find_superblock_space(ranges extents,
606 struct format_opts opts,
607 struct dev_opts *dev)
611 darray_foreach(i, extents) {
612 u64 start = round_up(max(256ULL << 10, i->start),
613 dev->bucket_size << 9);
614 u64 end = round_down(i->end,
615 dev->bucket_size << 9);
617 /* Need space for two superblocks: */
618 if (start + (opts.superblock_size << 9) * 2 <= end) {
619 dev->sb_offset = start >> 9;
620 dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
625 die("Couldn't find a valid location for superblock");
628 static void migrate_usage(void)
630 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
631 "Usage: bcachefs migrate [OPTION]...\n"
634 " -f fs Root of filesystem to migrate(s)\n"
635 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
636 " --no_passphrase Don't encrypt master encryption key\n"
637 " -F Force, even if metadata file already exists\n"
638 " -h Display this help and exit\n"
639 "Report bugs to <linux-bcache@vger.kernel.org>");
642 static const struct option migrate_opts[] = {
643 { "encrypted", no_argument, NULL, 'e' },
644 { "no_passphrase", no_argument, NULL, 'p' },
648 static int migrate_fs(const char *fs_path,
649 struct bch_opt_strs fs_opt_strs,
650 struct bch_opts fs_opts,
651 struct format_opts format_opts,
654 if (!path_is_fs_root(fs_path))
655 die("%s is not a filysestem root", fs_path);
657 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
658 struct stat stat = xfstat(fs_fd);
660 if (!S_ISDIR(stat.st_mode))
661 die("%s is not a directory", fs_path);
663 struct dev_opts dev = dev_opts_default();
665 dev.path = dev_t_to_path(stat.st_dev);
666 dev.fd = xopen(dev.path, O_RDWR);
668 opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
670 char *file_path = mprintf("%s/bcachefs", fs_path);
671 printf("Creating new filesystem on %s in space reserved at %s\n",
672 dev.path, file_path);
674 bch2_pick_bucket_size(fs_opts, &dev);
677 ranges extents = reserve_new_fs_space(file_path,
678 fs_opts.block_size << 9,
679 get_size(dev.path, dev.fd) / 5,
680 &bcachefs_inum, stat.st_dev, force);
682 find_superblock_space(extents, format_opts, &dev);
684 struct bch_sb *sb = bch2_format(fs_opt_strs,
685 fs_opts,format_opts, &dev, 1);
686 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
688 if (format_opts.passphrase)
689 bch2_add_key(sb, format_opts.passphrase);
693 struct bch_opts opts = bch2_opts_empty();
694 struct bch_fs *c = NULL;
695 char *path[1] = { dev.path };
697 opt_set(opts, sb, sb_offset);
698 opt_set(opts, nostart, true);
699 opt_set(opts, noexcl, true);
701 c = bch2_fs_open(path, 1, opts);
703 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
705 mark_unreserved_space(c, extents);
707 int ret = bch2_fs_start(c);
709 die("Error starting new filesystem: %s", strerror(-ret));
711 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
715 printf("Migrate complete, running fsck:\n");
716 opt_set(opts, nostart, false);
717 opt_set(opts, nochanges, true);
719 c = bch2_fs_open(path, 1, opts);
721 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
724 printf("fsck complete\n");
726 printf("To mount the new filesystem, run\n"
727 " mount -t bcachefs -o sb=%llu %s dir\n"
729 "After verifying that the new filesystem is correct, to create a\n"
730 "superblock at the default offset and finish the migration run\n"
731 " bcachefs migrate-superblock -d %s -o %llu\n"
733 "The new filesystem will have a file at /old_migrated_filestem\n"
734 "referencing all disk space that might be used by the existing\n"
735 "filesystem. That file can be deleted once the old filesystem is\n"
736 "no longer needed (and should be deleted prior to running\n"
737 "bcachefs migrate-superblock)\n",
738 sb_offset, dev.path, dev.path, sb_offset);
742 int cmd_migrate(int argc, char *argv[])
744 struct format_opts format_opts = format_opts_default();
745 char *fs_path = NULL;
746 bool no_passphrase = false, force = false;
749 struct bch_opt_strs fs_opt_strs =
750 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
751 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
753 while ((opt = getopt_long(argc, argv, "f:Fh",
754 migrate_opts, NULL)) != -1)
760 format_opts.encrypted = true;
763 no_passphrase = true;
774 die("Please specify a filesystem to migrate");
776 if (format_opts.encrypted && !no_passphrase)
777 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
779 int ret = migrate_fs(fs_path,
783 bch2_opt_strs_free(&fs_opt_strs);
787 static void migrate_superblock_usage(void)
789 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
790 "Usage: bcachefs migrate-superblock [OPTION]...\n"
793 " -d device Device to create superblock for\n"
794 " -o offset Offset of existing superblock\n"
795 " -h Display this help and exit\n"
796 "Report bugs to <linux-bcache@vger.kernel.org>");
799 int cmd_migrate_superblock(int argc, char *argv[])
805 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
811 ret = kstrtou64(optarg, 10, &offset);
813 die("Invalid offset");
816 migrate_superblock_usage();
821 die("Please specify a device");
824 die("Please specify offset of existing superblock");
826 int fd = xopen(dev, O_RDWR);
827 struct bch_sb *sb = __bch2_super_read(fd, offset);
829 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
830 die("Can't add superblock: no space left in superblock layout");
833 for (i = 0; i < sb->layout.nr_superblocks; i++)
834 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
835 die("Superblock layout already has default superblock");
837 memmove(&sb->layout.sb_offset[1],
838 &sb->layout.sb_offset[0],
839 sb->layout.nr_superblocks * sizeof(u64));
840 sb->layout.nr_superblocks++;
842 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
844 bch2_super_write(fd, sb);