1 #include </usr/include/dirent.h>
8 #include <sys/sysmacros.h>
12 #include <attr/xattr.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "btree_update.h"
37 static char *dev_t_to_path(dev_t dev)
39 char link[PATH_MAX], *p;
42 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
43 major(dev), minor(dev));
44 ret = readlink(sysfs_dev, link, sizeof(link));
47 if (ret < 0 || ret >= sizeof(link))
48 die("readlink error while looking up block device: %m");
52 p = strrchr(link, '/');
54 die("error looking up device name");
57 return mprintf("/dev/%s", p);
60 static bool path_is_fs_root(char *path)
62 char *line = NULL, *p, *mount;
67 f = fopen("/proc/self/mountinfo", "r");
69 die("Error getting mount information");
71 while (getline(&line, &n, f) != -1) {
74 strsep(&p, " "); /* mount id */
75 strsep(&p, " "); /* parent id */
76 strsep(&p, " "); /* dev */
77 strsep(&p, " "); /* root */
78 mount = strsep(&p, " ");
81 if (mount && !strcmp(path, mount))
92 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
94 struct bch_dev *ca = c->devs[0];
95 struct hole_iter iter;
98 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
99 struct bucket_mark new;
102 if (i.start == i.end)
105 b = sector_to_bucket(ca, i.start >> 9);
107 bucket_cmpxchg(&ca->buckets[b], new, new.nouse = 1);
109 } while (bucket_to_sector(ca, b) << 9 < i.end);
113 static void update_inode(struct bch_fs *c,
114 struct bch_inode_unpacked *inode)
116 struct bkey_inode_buf packed;
119 bch2_inode_pack(&packed, inode);
120 ret = bch2_btree_update(c, BTREE_ID_INODES, &packed.inode.k_i, NULL);
122 die("error creating file: %s", strerror(-ret));
125 static void create_dirent(struct bch_fs *c,
126 struct bch_inode_unpacked *parent,
127 const char *name, u64 inum, mode_t mode)
129 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
130 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
132 int ret = bch2_dirent_create(c, parent->inum, &parent_hash_info,
133 mode_to_type(mode), &qname,
134 inum, NULL, BCH_HASH_SET_MUST_CREATE);
136 die("error creating file: %s", strerror(-ret));
142 static void create_link(struct bch_fs *c,
143 struct bch_inode_unpacked *parent,
144 const char *name, u64 inum, mode_t mode)
146 struct bch_inode_unpacked inode;
147 int ret = bch2_inode_find_by_inum(c, inum, &inode);
149 die("error looking up hardlink: %s", strerror(-ret));
152 update_inode(c, &inode);
154 create_dirent(c, parent, name, inum, mode);
157 static struct bch_inode_unpacked create_file(struct bch_fs *c,
158 struct bch_inode_unpacked *parent,
160 uid_t uid, gid_t gid,
161 mode_t mode, dev_t rdev)
163 struct bch_inode_unpacked new_inode;
164 struct bkey_inode_buf packed;
167 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev);
168 bch2_inode_pack(&packed, &new_inode);
170 ret = bch2_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
171 &c->unused_inode_hint);
173 die("error creating file: %s", strerror(-ret));
175 new_inode.inum = packed.inode.k.p.inode;
176 create_dirent(c, parent, name, new_inode.inum, mode);
181 #define for_each_xattr_handler(handlers, handler) \
183 for ((handler) = *(handlers)++; \
185 (handler) = *(handlers)++)
187 static const struct xattr_handler *xattr_resolve_name(const char **name)
189 const struct xattr_handler **handlers = bch2_xattr_handlers;
190 const struct xattr_handler *handler;
192 for_each_xattr_handler(handlers, handler) {
195 n = strcmp_prefix(*name, xattr_prefix(handler));
197 if (!handler->prefix ^ !*n) {
200 return ERR_PTR(-EINVAL);
206 return ERR_PTR(-EOPNOTSUPP);
209 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
212 dst->i_atime = timespec_to_bch2_time(c, src->st_atim);
213 dst->i_mtime = timespec_to_bch2_time(c, src->st_mtim);
214 dst->i_ctime = timespec_to_bch2_time(c, src->st_ctim);
217 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
220 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
222 char attrs[XATTR_LIST_MAX];
223 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
225 die("listxattr error: %m");
227 for (const char *next, *attr = attrs;
228 attr < attrs + attrs_size;
230 next = attr + strlen(attr) + 1;
232 char val[XATTR_SIZE_MAX];
233 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
236 die("error getting xattr val: %m");
238 const struct xattr_handler *h = xattr_resolve_name(&attr);
240 int ret = __bch2_xattr_set(c, dst->inum, &hash_info, attr,
241 val, val_size, 0, h->flags, NULL);
243 die("error creating xattr: %s", strerror(-ret));
247 static void write_data(struct bch_fs *c,
248 struct bch_inode_unpacked *dst_inode,
249 u64 dst_offset, void *buf, size_t len)
251 struct disk_reservation res;
252 struct bch_write_op op;
256 BUG_ON(dst_offset & (block_bytes(c) - 1));
257 BUG_ON(len & (block_bytes(c) - 1));
259 closure_init_stack(&cl);
261 bio_init(&op.wbio.bio, &bv, 1);
262 op.wbio.bio.bi_iter.bi_size = len;
263 bch2_bio_map(&op.wbio.bio, buf);
265 int ret = bch2_disk_reservation_get(c, &res, len >> 9, 0);
267 die("error reserving space in new filesystem: %s", strerror(-ret));
269 bch2_write_op_init(&op, c, res, c->write_points,
270 POS(dst_inode->inum, dst_offset >> 9), NULL, 0);
271 closure_call(&op.cl, bch2_write, NULL, &cl);
274 dst_inode->i_sectors += len >> 9;
277 static char buf[1 << 20] __aligned(PAGE_SIZE);
279 static void copy_data(struct bch_fs *c,
280 struct bch_inode_unpacked *dst_inode,
281 int src_fd, u64 start, u64 end)
283 while (start < end) {
284 unsigned len = min_t(u64, end - start, sizeof(buf));
286 xpread(src_fd, buf, len, start);
287 write_data(c, dst_inode, start, buf, len);
292 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
293 u64 logical, u64 physical, u64 length)
295 struct bch_dev *ca = c->devs[0];
297 BUG_ON(logical & (block_bytes(c) - 1));
298 BUG_ON(physical & (block_bytes(c) - 1));
299 BUG_ON(length & (block_bytes(c) - 1));
305 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
308 struct bkey_i_extent *e;
310 u64 b = sector_to_bucket(ca, physical >> 9);
311 struct disk_reservation res;
315 sectors = min(ca->mi.bucket_size -
316 (physical & (ca->mi.bucket_size - 1)),
319 e = bkey_extent_init(&k.k);
320 e->k.p.inode = dst->inum;
321 e->k.p.offset = logical + sectors;
323 extent_ptr_append(e, (struct bch_extent_ptr) {
326 .gen = ca->buckets[b].mark.gen,
329 ret = bch2_disk_reservation_get(c, &res, sectors,
330 BCH_DISK_RESERVATION_NOFAIL);
332 die("error reserving space in new filesystem: %s",
335 bch2_check_mark_super(c, extent_i_to_s_c(e), false);
337 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
338 &res, NULL, NULL, 0);
340 die("btree insert error %s", strerror(-ret));
342 bch2_disk_reservation_put(c, &res);
344 dst->i_sectors += sectors;
351 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
354 ssize_t ret = readlink(src, buf, sizeof(buf));
356 die("readlink error: %m");
358 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
361 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
362 int src, char *src_path, ranges *extents)
364 struct fiemap_iter iter;
365 struct fiemap_extent e;
367 fiemap_for_each(src, iter, e)
368 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
373 fiemap_for_each(src, iter, e) {
374 if ((e.fe_logical & (block_bytes(c) - 1)) ||
375 (e.fe_length & (block_bytes(c) - 1)))
376 die("Unaligned extent in %s - can't handle", src_path);
378 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
379 FIEMAP_EXTENT_ENCODED|
380 FIEMAP_EXTENT_NOT_ALIGNED|
381 FIEMAP_EXTENT_DATA_INLINE)) {
384 round_down(e.fe_logical, block_bytes(c)),
385 round_up(e.fe_logical + e.fe_length,
390 if (e.fe_physical < 1 << 20) {
393 round_down(e.fe_logical, block_bytes(c)),
394 round_up(e.fe_logical + e.fe_length,
399 if ((e.fe_physical & (block_bytes(c) - 1)))
400 die("Unaligned extent in %s - can't handle", src_path);
402 range_add(extents, e.fe_physical, e.fe_length);
403 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
407 struct copy_fs_state {
411 GENRADIX(u64) hardlinks;
415 static void copy_dir(struct copy_fs_state *s,
417 struct bch_inode_unpacked *dst,
418 int src_fd, const char *src_path)
420 DIR *dir = fdopendir(src_fd);
423 while ((errno = 0), (d = readdir(dir))) {
424 struct bch_inode_unpacked inode;
428 die("chdir error: %m");
431 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
433 if (!strcmp(d->d_name, ".") ||
434 !strcmp(d->d_name, "..") ||
435 stat.st_ino == s->bcachefs_inum)
438 char *child_path = mprintf("%s/%s", src_path, d->d_name);
440 if (stat.st_dev != s->dev)
441 die("%s does not have correct st_dev!", child_path);
443 u64 *dst_inum = S_ISREG(stat.st_mode)
444 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
447 if (dst_inum && *dst_inum) {
448 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
452 inode = create_file(c, dst, d->d_name,
453 stat.st_uid, stat.st_gid,
454 stat.st_mode, stat.st_rdev);
457 *dst_inum = inode.inum;
459 copy_times(c, &inode, &stat);
460 copy_xattrs(c, &inode, d->d_name);
464 switch (mode_to_type(stat.st_mode)) {
466 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
467 copy_dir(s, c, &inode, fd, child_path);
471 inode.i_size = stat.st_size;
473 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
474 copy_file(c, &inode, fd, child_path, &s->extents);
478 inode.i_size = stat.st_size;
480 copy_link(c, &inode, d->d_name);
487 /* nothing else to copy for these: */
493 update_inode(c, &inode);
499 die("readdir error: %m");
502 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
503 u64 size, u64 *bcachefs_inum, dev_t dev,
507 ? open(file_path, O_RDWR|O_CREAT, 0600)
508 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
510 die("Error creating %s for bcachefs metadata: %m",
513 struct stat statbuf = xfstat(fd);
515 if (statbuf.st_dev != dev)
516 die("bcachefs file has incorrect device");
518 *bcachefs_inum = statbuf.st_ino;
520 if (fallocate(fd, 0, 0, size))
521 die("Error reserving space for bcachefs metadata: %m");
525 struct fiemap_iter iter;
526 struct fiemap_extent e;
527 ranges extents = { NULL };
529 fiemap_for_each(fd, iter, e) {
530 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
531 FIEMAP_EXTENT_ENCODED|
532 FIEMAP_EXTENT_NOT_ALIGNED|
533 FIEMAP_EXTENT_DATA_INLINE))
534 die("Unable to continue: metadata file not fully mapped");
536 if ((e.fe_physical & (block_size - 1)) ||
537 (e.fe_length & (block_size - 1)))
538 die("Unable to continue: unaligned extents in metadata file");
540 range_add(&extents, e.fe_physical, e.fe_length);
544 ranges_sort_merge(&extents);
548 static void reserve_old_fs_space(struct bch_fs *c,
549 struct bch_inode_unpacked *root_inode,
552 struct bch_dev *ca = c->devs[0];
553 struct bch_inode_unpacked dst;
554 struct hole_iter iter;
557 dst = create_file(c, root_inode, "old_migrated_filesystem",
558 0, 0, S_IFREG|0400, 0);
559 dst.i_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
561 ranges_sort_merge(extents);
563 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
564 link_data(c, &dst, i.start, i.start, i.end - i.start);
566 update_inode(c, &dst);
569 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
570 u64 bcachefs_inum, ranges *extents)
574 struct bch_inode_unpacked root_inode;
575 int ret = bch2_inode_find_by_inum(c, BCACHE_ROOT_INO, &root_inode);
577 die("error looking up root directory: %s", strerror(-ret));
580 die("chdir error: %m");
582 struct stat stat = xfstat(src_fd);
583 copy_times(c, &root_inode, &stat);
584 copy_xattrs(c, &root_inode, ".");
586 struct copy_fs_state s = {
587 .bcachefs_inum = bcachefs_inum,
593 copy_dir(&s, c, &root_inode, src_fd, src_path);
595 reserve_old_fs_space(c, &root_inode, &s.extents);
597 update_inode(c, &root_inode);
599 darray_free(s.extents);
600 genradix_free(&s.hardlinks);
603 static void find_superblock_space(ranges extents, struct dev_opts *dev)
607 darray_foreach(i, extents) {
608 u64 start = round_up(max(256ULL << 10, i->start),
609 dev->bucket_size << 9);
610 u64 end = round_down(i->end,
611 dev->bucket_size << 9);
613 if (start + (128 << 10) <= end) {
614 dev->sb_offset = start >> 9;
615 dev->sb_end = dev->sb_offset + 256;
620 die("Couldn't find a valid location for superblock");
623 static void migrate_usage(void)
625 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
626 "Usage: bcachefs migrate [OPTION]...\n"
629 " -f fs Root of filesystem to migrate(s)\n"
630 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
631 " --no_passphrase Don't encrypt master encryption key\n"
632 " -F Force, even if metadata file already exists\n"
633 " -h Display this help and exit\n"
634 "Report bugs to <linux-bcache@vger.kernel.org>");
637 static const struct option migrate_opts[] = {
638 { "encrypted", no_argument, NULL, 'e' },
639 { "no_passphrase", no_argument, NULL, 'p' },
643 int cmd_migrate(int argc, char *argv[])
645 struct format_opts format_opts = format_opts_default();
646 char *fs_path = NULL;
648 bool no_passphrase = false, force = false;
651 while ((opt = getopt_long(argc, argv, "f:Fh",
652 migrate_opts, NULL)) != -1)
658 format_opts.encrypted = true;
661 no_passphrase = true;
672 die("Please specify a filesytem to migrate");
674 if (!path_is_fs_root(fs_path))
675 die("%s is not a filysestem root", fs_path);
677 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
678 struct stat stat = xfstat(fs_fd);
680 if (!S_ISDIR(stat.st_mode))
681 die("%s is not a directory", fs_path);
683 struct dev_opts dev = { 0 };
685 dev.path = dev_t_to_path(stat.st_dev);
686 dev.fd = xopen(dev.path, O_RDWR);
688 block_size = min_t(unsigned, stat.st_blksize,
689 get_blocksize(dev.path, dev.fd) << 9);
691 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
692 format_opts.block_size = block_size >> 9;
695 char *file_path = mprintf("%s/bcachefs", fs_path);
697 bch2_pick_bucket_size(format_opts, &dev);
699 ranges extents = reserve_new_fs_space(file_path,
700 block_size, get_size(dev.path, dev.fd) / 5,
701 &bcachefs_inum, stat.st_dev, force);
703 find_superblock_space(extents, &dev);
705 if (format_opts.encrypted && !no_passphrase) {
706 format_opts.passphrase = read_passphrase("Enter passphrase: ");
708 if (isatty(STDIN_FILENO)) {
710 read_passphrase("Enter same passphrase again: ");
712 if (strcmp(format_opts.passphrase, pass2)) {
713 memzero_explicit(format_opts.passphrase,
714 strlen(format_opts.passphrase));
715 memzero_explicit(pass2, strlen(pass2));
716 die("Passphrases do not match");
719 memzero_explicit(pass2, strlen(pass2));
724 struct bch_sb *sb = bch2_format(format_opts, &dev, 1);
725 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
727 if (format_opts.passphrase)
728 bch2_add_key(sb, format_opts.passphrase);
732 printf("Creating new filesystem on %s in space reserved at %s\n"
734 " mount -t bcachefs -o sb=%llu %s dir\n"
736 "After verifying that the new filesystem is correct, to create a\n"
737 "superblock at the default offset and finish the migration run\n"
738 " bcachefs migrate_superblock -d %s -o %llu\n"
740 "The new filesystem will have a file at /old_migrated_filestem\n"
741 "referencing all disk space that might be used by the existing\n"
742 "filesystem. That file can be deleted once the old filesystem is\n"
743 "no longer needed (and should be deleted prior to running\n"
744 "bcachefs migrate_superblock)\n",
745 dev.path, file_path, sb_offset, dev.path,
746 dev.path, sb_offset);
748 struct bch_opts opts = bch2_opts_empty();
749 struct bch_fs *c = NULL;
750 char *path[1] = { dev.path };
757 err = bch2_fs_open(path, 1, opts, &c);
759 die("Error opening new filesystem: %s", err);
761 mark_unreserved_space(c, extents);
763 err = bch2_fs_start(c);
765 die("Error starting new filesystem: %s", err);
767 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
771 printf("Migrate complete, running fsck:\n");
772 opts.nostart = false;
773 opts.nochanges = true;
775 err = bch2_fs_open(path, 1, opts, &c);
777 die("Error opening new filesystem: %s", err);
780 printf("fsck complete\n");
784 static void migrate_superblock_usage(void)
786 puts("bcachefs migrate_superblock - create default superblock after migrating\n"
787 "Usage: bcachefs migrate_superblock [OPTION]...\n"
790 " -d device Device to create superblock for\n"
791 " -o offset Offset of existing superblock\n"
792 " -h Display this help and exit\n"
793 "Report bugs to <linux-bcache@vger.kernel.org>");
796 int cmd_migrate_superblock(int argc, char *argv[])
802 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
808 ret = kstrtou64(optarg, 10, &offset);
810 die("Invalid offset");
813 migrate_superblock_usage();
818 die("Please specify a device");
821 die("Please specify offset of existing superblock");
823 int fd = xopen(dev, O_RDWR);
824 struct bch_sb *sb = __bch2_super_read(fd, offset);
826 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
827 die("Can't add superblock: no space left in superblock layout");
829 for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
830 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
831 die("Superblock layout already has default superblock");
833 memmove(&sb->layout.sb_offset[1],
834 &sb->layout.sb_offset[0],
835 sb->layout.nr_superblocks * sizeof(u64));
836 sb->layout.nr_superblocks++;
838 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
840 bch2_super_write(fd, sb);