8 #include <sys/sysmacros.h>
12 #include <attr/xattr.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/btree_update.h"
29 #include "libbcachefs/buckets.h"
30 #include "libbcachefs/dirent.h"
31 #include "libbcachefs/fs.h"
32 #include "libbcachefs/inode.h"
33 #include "libbcachefs/io.h"
34 #include "libbcachefs/str_hash.h"
35 #include "libbcachefs/super.h"
36 #include "libbcachefs/xattr.h"
38 static char *dev_t_to_path(dev_t dev)
40 char link[PATH_MAX], *p;
43 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
44 major(dev), minor(dev));
45 ret = readlink(sysfs_dev, link, sizeof(link));
48 if (ret < 0 || ret >= sizeof(link))
49 die("readlink error while looking up block device: %m");
53 p = strrchr(link, '/');
55 die("error looking up device name");
58 return mprintf("/dev/%s", p);
61 static bool path_is_fs_root(char *path)
63 char *line = NULL, *p, *mount;
68 f = fopen("/proc/self/mountinfo", "r");
70 die("Error getting mount information");
72 while (getline(&line, &n, f) != -1) {
75 strsep(&p, " "); /* mount id */
76 strsep(&p, " "); /* parent id */
77 strsep(&p, " "); /* dev */
78 strsep(&p, " "); /* root */
79 mount = strsep(&p, " ");
82 if (mount && !strcmp(path, mount))
93 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
95 struct bch_dev *ca = c->devs[0];
96 struct hole_iter iter;
99 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
100 struct bucket_mark new;
103 if (i.start == i.end)
106 b = sector_to_bucket(ca, i.start >> 9);
108 struct bucket *g = bucket(ca, b);
109 bucket_cmpxchg(g, new, new.nouse = 1);
111 } while (bucket_to_sector(ca, b) << 9 < i.end);
115 static void update_inode(struct bch_fs *c,
116 struct bch_inode_unpacked *inode)
118 struct bkey_inode_buf packed;
121 bch2_inode_pack(&packed, inode);
122 ret = bch2_btree_update(c, BTREE_ID_INODES, &packed.inode.k_i, NULL);
124 die("error creating file: %s", strerror(-ret));
127 static void create_dirent(struct bch_fs *c,
128 struct bch_inode_unpacked *parent,
129 const char *name, u64 inum, mode_t mode)
131 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
132 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
134 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
135 mode_to_type(mode), &qname,
136 inum, NULL, BCH_HASH_SET_MUST_CREATE);
138 die("error creating file: %s", strerror(-ret));
144 static void create_link(struct bch_fs *c,
145 struct bch_inode_unpacked *parent,
146 const char *name, u64 inum, mode_t mode)
148 struct bch_inode_unpacked inode;
149 int ret = bch2_inode_find_by_inum(c, inum, &inode);
151 die("error looking up hardlink: %s", strerror(-ret));
154 update_inode(c, &inode);
156 create_dirent(c, parent, name, inum, mode);
159 static struct bch_inode_unpacked create_file(struct bch_fs *c,
160 struct bch_inode_unpacked *parent,
162 uid_t uid, gid_t gid,
163 mode_t mode, dev_t rdev)
165 struct bch_inode_unpacked new_inode;
168 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
170 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
171 &c->unused_inode_hint);
173 die("error creating file: %s", strerror(-ret));
175 create_dirent(c, parent, name, new_inode.bi_inum, mode);
180 #define for_each_xattr_handler(handlers, handler) \
182 for ((handler) = *(handlers)++; \
184 (handler) = *(handlers)++)
186 static const struct xattr_handler *xattr_resolve_name(const char **name)
188 const struct xattr_handler **handlers = bch2_xattr_handlers;
189 const struct xattr_handler *handler;
191 for_each_xattr_handler(handlers, handler) {
194 n = strcmp_prefix(*name, xattr_prefix(handler));
196 if (!handler->prefix ^ !*n) {
199 return ERR_PTR(-EINVAL);
205 return ERR_PTR(-EOPNOTSUPP);
208 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
211 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
212 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
213 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
216 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
219 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
221 char attrs[XATTR_LIST_MAX];
222 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
224 die("listxattr error: %m");
226 const char *next, *attr;
228 attr < attrs + attrs_size;
230 next = attr + strlen(attr) + 1;
232 char val[XATTR_SIZE_MAX];
233 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
236 die("error getting xattr val: %m");
238 const struct xattr_handler *h = xattr_resolve_name(&attr);
240 int ret = __bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
241 val, val_size, 0, h->flags, NULL);
243 die("error creating xattr: %s", strerror(-ret));
247 static void write_data(struct bch_fs *c,
248 struct bch_inode_unpacked *dst_inode,
249 u64 dst_offset, void *buf, size_t len)
251 struct bch_write_op op;
255 BUG_ON(dst_offset & (block_bytes(c) - 1));
256 BUG_ON(len & (block_bytes(c) - 1));
258 closure_init_stack(&cl);
260 bio_init(&op.wbio.bio, &bv, 1);
261 op.wbio.bio.bi_iter.bi_size = len;
262 bch2_bio_map(&op.wbio.bio, buf);
264 bch2_write_op_init(&op, c);
266 op.write_point = writepoint_hashed(0);
267 op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
269 int ret = bch2_disk_reservation_get(c, &op.res, len >> 9, 0);
271 die("error reserving space in new filesystem: %s", strerror(-ret));
273 closure_call(&op.cl, bch2_write, NULL, &cl);
276 dst_inode->bi_sectors += len >> 9;
279 static char buf[1 << 20] __aligned(PAGE_SIZE);
281 static void copy_data(struct bch_fs *c,
282 struct bch_inode_unpacked *dst_inode,
283 int src_fd, u64 start, u64 end)
285 while (start < end) {
286 unsigned len = min_t(u64, end - start, sizeof(buf));
288 xpread(src_fd, buf, len, start);
289 write_data(c, dst_inode, start, buf, len);
294 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
295 u64 logical, u64 physical, u64 length)
297 struct bch_dev *ca = c->devs[0];
299 BUG_ON(logical & (block_bytes(c) - 1));
300 BUG_ON(physical & (block_bytes(c) - 1));
301 BUG_ON(length & (block_bytes(c) - 1));
307 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
310 struct bkey_i_extent *e;
312 u64 b = sector_to_bucket(ca, physical >> 9);
313 struct disk_reservation res;
317 sectors = min(ca->mi.bucket_size -
318 (physical & (ca->mi.bucket_size - 1)),
321 e = bkey_extent_init(&k.k);
322 e->k.p.inode = dst->bi_inum;
323 e->k.p.offset = logical + sectors;
325 extent_ptr_append(e, (struct bch_extent_ptr) {
328 .gen = bucket(ca, b)->mark.gen,
331 ret = bch2_disk_reservation_get(c, &res, sectors,
332 BCH_DISK_RESERVATION_NOFAIL);
334 die("error reserving space in new filesystem: %s",
337 bch2_check_mark_super(c, BCH_DATA_USER,
338 bch2_bkey_devs(extent_i_to_s_c(e).s_c));
340 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
341 &res, NULL, NULL, 0);
343 die("btree insert error %s", strerror(-ret));
345 bch2_disk_reservation_put(c, &res);
347 dst->bi_sectors += sectors;
354 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
357 ssize_t ret = readlink(src, buf, sizeof(buf));
359 die("readlink error: %m");
361 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
364 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
365 int src, char *src_path, ranges *extents)
367 struct fiemap_iter iter;
368 struct fiemap_extent e;
370 fiemap_for_each(src, iter, e)
371 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
376 fiemap_for_each(src, iter, e) {
377 if ((e.fe_logical & (block_bytes(c) - 1)) ||
378 (e.fe_length & (block_bytes(c) - 1)))
379 die("Unaligned extent in %s - can't handle", src_path);
381 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
382 FIEMAP_EXTENT_ENCODED|
383 FIEMAP_EXTENT_NOT_ALIGNED|
384 FIEMAP_EXTENT_DATA_INLINE)) {
387 round_down(e.fe_logical, block_bytes(c)),
388 round_up(e.fe_logical + e.fe_length,
393 if (e.fe_physical < 1 << 20) {
396 round_down(e.fe_logical, block_bytes(c)),
397 round_up(e.fe_logical + e.fe_length,
402 if ((e.fe_physical & (block_bytes(c) - 1)))
403 die("Unaligned extent in %s - can't handle", src_path);
405 range_add(extents, e.fe_physical, e.fe_length);
406 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
410 struct copy_fs_state {
414 GENRADIX(u64) hardlinks;
418 static void copy_dir(struct copy_fs_state *s,
420 struct bch_inode_unpacked *dst,
421 int src_fd, const char *src_path)
423 DIR *dir = fdopendir(src_fd);
426 while ((errno = 0), (d = readdir(dir))) {
427 struct bch_inode_unpacked inode;
431 die("chdir error: %m");
434 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
436 if (!strcmp(d->d_name, ".") ||
437 !strcmp(d->d_name, "..") ||
438 stat.st_ino == s->bcachefs_inum)
441 char *child_path = mprintf("%s/%s", src_path, d->d_name);
443 if (stat.st_dev != s->dev)
444 die("%s does not have correct st_dev!", child_path);
446 u64 *dst_inum = S_ISREG(stat.st_mode)
447 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
450 if (dst_inum && *dst_inum) {
451 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
455 inode = create_file(c, dst, d->d_name,
456 stat.st_uid, stat.st_gid,
457 stat.st_mode, stat.st_rdev);
460 *dst_inum = inode.bi_inum;
462 copy_times(c, &inode, &stat);
463 copy_xattrs(c, &inode, d->d_name);
467 switch (mode_to_type(stat.st_mode)) {
469 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
470 copy_dir(s, c, &inode, fd, child_path);
474 inode.bi_size = stat.st_size;
476 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
477 copy_file(c, &inode, fd, child_path, &s->extents);
481 inode.bi_size = stat.st_size;
483 copy_link(c, &inode, d->d_name);
490 /* nothing else to copy for these: */
496 update_inode(c, &inode);
502 die("readdir error: %m");
505 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
506 u64 size, u64 *bcachefs_inum, dev_t dev,
510 ? open(file_path, O_RDWR|O_CREAT, 0600)
511 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
513 die("Error creating %s for bcachefs metadata: %m",
516 struct stat statbuf = xfstat(fd);
518 if (statbuf.st_dev != dev)
519 die("bcachefs file has incorrect device");
521 *bcachefs_inum = statbuf.st_ino;
523 if (fallocate(fd, 0, 0, size))
524 die("Error reserving space for bcachefs metadata: %m");
528 struct fiemap_iter iter;
529 struct fiemap_extent e;
530 ranges extents = { NULL };
532 fiemap_for_each(fd, iter, e) {
533 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
534 FIEMAP_EXTENT_ENCODED|
535 FIEMAP_EXTENT_NOT_ALIGNED|
536 FIEMAP_EXTENT_DATA_INLINE))
537 die("Unable to continue: metadata file not fully mapped");
539 if ((e.fe_physical & (block_size - 1)) ||
540 (e.fe_length & (block_size - 1)))
541 die("Unable to continue: unaligned extents in metadata file");
543 range_add(&extents, e.fe_physical, e.fe_length);
547 ranges_sort_merge(&extents);
551 static void reserve_old_fs_space(struct bch_fs *c,
552 struct bch_inode_unpacked *root_inode,
555 struct bch_dev *ca = c->devs[0];
556 struct bch_inode_unpacked dst;
557 struct hole_iter iter;
560 dst = create_file(c, root_inode, "old_migrated_filesystem",
561 0, 0, S_IFREG|0400, 0);
562 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
564 ranges_sort_merge(extents);
566 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
567 link_data(c, &dst, i.start, i.start, i.end - i.start);
569 update_inode(c, &dst);
572 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
573 u64 bcachefs_inum, ranges *extents)
577 struct bch_inode_unpacked root_inode;
578 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
580 die("error looking up root directory: %s", strerror(-ret));
583 die("chdir error: %m");
585 struct stat stat = xfstat(src_fd);
586 copy_times(c, &root_inode, &stat);
587 copy_xattrs(c, &root_inode, ".");
589 struct copy_fs_state s = {
590 .bcachefs_inum = bcachefs_inum,
596 copy_dir(&s, c, &root_inode, src_fd, src_path);
598 reserve_old_fs_space(c, &root_inode, &s.extents);
600 update_inode(c, &root_inode);
602 darray_free(s.extents);
603 genradix_free(&s.hardlinks);
606 static void find_superblock_space(ranges extents, struct dev_opts *dev)
610 darray_foreach(i, extents) {
611 u64 start = round_up(max(256ULL << 10, i->start),
612 dev->bucket_size << 9);
613 u64 end = round_down(i->end,
614 dev->bucket_size << 9);
616 if (start + (128 << 10) <= end) {
617 dev->sb_offset = start >> 9;
618 dev->sb_end = dev->sb_offset + 256;
623 die("Couldn't find a valid location for superblock");
626 static void migrate_usage(void)
628 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
629 "Usage: bcachefs migrate [OPTION]...\n"
632 " -f fs Root of filesystem to migrate(s)\n"
633 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
634 " --no_passphrase Don't encrypt master encryption key\n"
635 " -F Force, even if metadata file already exists\n"
636 " -h Display this help and exit\n"
637 "Report bugs to <linux-bcache@vger.kernel.org>");
640 static const struct option migrate_opts[] = {
641 { "encrypted", no_argument, NULL, 'e' },
642 { "no_passphrase", no_argument, NULL, 'p' },
646 int cmd_migrate(int argc, char *argv[])
648 struct format_opts format_opts = format_opts_default();
649 char *fs_path = NULL;
651 bool no_passphrase = false, force = false;
654 while ((opt = getopt_long(argc, argv, "f:Fh",
655 migrate_opts, NULL)) != -1)
661 format_opts.encrypted = true;
664 no_passphrase = true;
675 die("Please specify a filesytem to migrate");
677 if (!path_is_fs_root(fs_path))
678 die("%s is not a filysestem root", fs_path);
680 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
681 struct stat stat = xfstat(fs_fd);
683 if (!S_ISDIR(stat.st_mode))
684 die("%s is not a directory", fs_path);
686 struct dev_opts dev = dev_opts_default();
688 dev.path = dev_t_to_path(stat.st_dev);
689 dev.fd = xopen(dev.path, O_RDWR);
691 block_size = min_t(unsigned, stat.st_blksize,
692 get_blocksize(dev.path, dev.fd) << 9);
694 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
695 format_opts.block_size = block_size >> 9;
698 char *file_path = mprintf("%s/bcachefs", fs_path);
700 bch2_pick_bucket_size(format_opts, &dev);
702 ranges extents = reserve_new_fs_space(file_path,
703 block_size, get_size(dev.path, dev.fd) / 5,
704 &bcachefs_inum, stat.st_dev, force);
706 find_superblock_space(extents, &dev);
708 if (format_opts.encrypted && !no_passphrase)
709 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
711 struct bch_sb *sb = bch2_format(format_opts, &dev, 1);
712 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
714 if (format_opts.passphrase)
715 bch2_add_key(sb, format_opts.passphrase);
719 printf("Creating new filesystem on %s in space reserved at %s\n"
721 " mount -t bcachefs -o sb=%llu %s dir\n"
723 "After verifying that the new filesystem is correct, to create a\n"
724 "superblock at the default offset and finish the migration run\n"
725 " bcachefs migrate_superblock -d %s -o %llu\n"
727 "The new filesystem will have a file at /old_migrated_filestem\n"
728 "referencing all disk space that might be used by the existing\n"
729 "filesystem. That file can be deleted once the old filesystem is\n"
730 "no longer needed (and should be deleted prior to running\n"
731 "bcachefs migrate_superblock)\n",
732 dev.path, file_path, sb_offset, dev.path,
733 dev.path, sb_offset);
735 struct bch_opts opts = bch2_opts_empty();
736 struct bch_fs *c = NULL;
737 char *path[1] = { dev.path };
739 opt_set(opts, sb, sb_offset);
740 opt_set(opts, nostart, true);
741 opt_set(opts, noexcl, true);
743 c = bch2_fs_open(path, 1, opts);
745 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
747 mark_unreserved_space(c, extents);
749 const char *err = bch2_fs_start(c);
751 die("Error starting new filesystem: %s", err);
753 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
757 printf("Migrate complete, running fsck:\n");
758 opt_set(opts, nostart, false);
759 opt_set(opts, nochanges, true);
761 c = bch2_fs_open(path, 1, opts);
763 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
766 printf("fsck complete\n");
770 static void migrate_superblock_usage(void)
772 puts("bcachefs migrate_superblock - create default superblock after migrating\n"
773 "Usage: bcachefs migrate_superblock [OPTION]...\n"
776 " -d device Device to create superblock for\n"
777 " -o offset Offset of existing superblock\n"
778 " -h Display this help and exit\n"
779 "Report bugs to <linux-bcache@vger.kernel.org>");
782 int cmd_migrate_superblock(int argc, char *argv[])
788 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
794 ret = kstrtou64(optarg, 10, &offset);
796 die("Invalid offset");
799 migrate_superblock_usage();
804 die("Please specify a device");
807 die("Please specify offset of existing superblock");
809 int fd = xopen(dev, O_RDWR);
810 struct bch_sb *sb = __bch2_super_read(fd, offset);
812 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
813 die("Can't add superblock: no space left in superblock layout");
816 for (i = 0; i < sb->layout.nr_superblocks; i++)
817 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
818 die("Superblock layout already has default superblock");
820 memmove(&sb->layout.sb_offset[1],
821 &sb->layout.sb_offset[0],
822 sb->layout.nr_superblocks * sizeof(u64));
823 sb->layout.nr_superblocks++;
825 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
827 bch2_super_write(fd, sb);