9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 static char *dev_t_to_path(dev_t dev)
43 char link[PATH_MAX], *p;
46 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
47 major(dev), minor(dev));
48 ret = readlink(sysfs_dev, link, sizeof(link));
51 if (ret < 0 || ret >= sizeof(link))
52 die("readlink error while looking up block device: %m");
56 p = strrchr(link, '/');
58 die("error looking up device name");
61 return mprintf("/dev/%s", p);
64 static bool path_is_fs_root(const char *path)
66 char *line = NULL, *p, *mount;
71 f = fopen("/proc/self/mountinfo", "r");
73 die("Error getting mount information");
75 while (getline(&line, &n, f) != -1) {
78 strsep(&p, " "); /* mount id */
79 strsep(&p, " "); /* parent id */
80 strsep(&p, " "); /* dev */
81 strsep(&p, " "); /* root */
82 mount = strsep(&p, " ");
85 if (mount && !strcmp(path, mount))
96 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
98 struct bch_dev *ca = c->devs[0];
99 struct hole_iter iter;
102 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
105 if (i.start == i.end)
108 b = sector_to_bucket(ca, i.start >> 9);
110 set_bit(b, ca->buckets_nouse);
112 } while (bucket_to_sector(ca, b) << 9 < i.end);
116 static void update_inode(struct bch_fs *c,
117 struct bch_inode_unpacked *inode)
119 struct bkey_inode_buf packed;
122 bch2_inode_pack(&packed, inode);
123 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
126 die("error creating file: %s", strerror(-ret));
129 static void create_dirent(struct bch_fs *c,
130 struct bch_inode_unpacked *parent,
131 const char *name, u64 inum, mode_t mode)
133 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
134 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
136 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
137 mode_to_type(mode), &qname,
138 inum, NULL, BCH_HASH_SET_MUST_CREATE);
140 die("error creating file: %s", strerror(-ret));
146 static void create_link(struct bch_fs *c,
147 struct bch_inode_unpacked *parent,
148 const char *name, u64 inum, mode_t mode)
150 struct bch_inode_unpacked inode;
151 int ret = bch2_inode_find_by_inum(c, inum, &inode);
153 die("error looking up hardlink: %s", strerror(-ret));
156 update_inode(c, &inode);
158 create_dirent(c, parent, name, inum, mode);
161 static struct bch_inode_unpacked create_file(struct bch_fs *c,
162 struct bch_inode_unpacked *parent,
164 uid_t uid, gid_t gid,
165 mode_t mode, dev_t rdev)
167 struct bch_inode_unpacked new_inode;
170 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
172 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
173 &c->unused_inode_hint);
175 die("error creating file: %s", strerror(-ret));
177 create_dirent(c, parent, name, new_inode.bi_inum, mode);
182 #define for_each_xattr_handler(handlers, handler) \
184 for ((handler) = *(handlers)++; \
186 (handler) = *(handlers)++)
188 static const struct xattr_handler *xattr_resolve_name(char **name)
190 const struct xattr_handler **handlers = bch2_xattr_handlers;
191 const struct xattr_handler *handler;
193 for_each_xattr_handler(handlers, handler) {
196 n = strcmp_prefix(*name, xattr_prefix(handler));
198 if (!handler->prefix ^ !*n) {
201 return ERR_PTR(-EINVAL);
207 return ERR_PTR(-EOPNOTSUPP);
210 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
213 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
214 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
215 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
218 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
221 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
223 char attrs[XATTR_LIST_MAX];
224 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
226 die("listxattr error: %m");
230 attr < attrs + attrs_size;
232 next = attr + strlen(attr) + 1;
234 char val[XATTR_SIZE_MAX];
235 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
238 die("error getting xattr val: %m");
240 const struct xattr_handler *h = xattr_resolve_name(&attr);
242 int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
243 bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
244 val, val_size, h->flags, 0));
246 die("error creating xattr: %s", strerror(-ret));
250 static char buf[1 << 20] __aligned(PAGE_SIZE);
252 static void write_data(struct bch_fs *c,
253 struct bch_inode_unpacked *dst_inode,
254 u64 dst_offset, void *buf, size_t len)
257 struct bch_write_op op;
258 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
262 BUG_ON(dst_offset & (block_bytes(c) - 1));
263 BUG_ON(len & (block_bytes(c) - 1));
265 closure_init_stack(&cl);
267 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
268 bch2_bio_map(&o.op.wbio.bio, buf, len);
270 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
271 o.op.write_point = writepoint_hashed(0);
272 o.op.nr_replicas = 1;
273 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
275 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
276 c->opts.data_replicas, 0);
278 die("error reserving space in new filesystem: %s", strerror(-ret));
280 closure_call(&o.op.cl, bch2_write, NULL, &cl);
283 dst_inode->bi_sectors += len >> 9;
286 static void copy_data(struct bch_fs *c,
287 struct bch_inode_unpacked *dst_inode,
288 int src_fd, u64 start, u64 end)
290 while (start < end) {
291 unsigned len = min_t(u64, end - start, sizeof(buf));
292 unsigned pad = round_up(len, block_bytes(c)) - len;
294 xpread(src_fd, buf, len, start);
295 memset(buf + len, 0, pad);
297 write_data(c, dst_inode, start, buf, len + pad);
302 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
303 u64 logical, u64 physical, u64 length)
305 struct bch_dev *ca = c->devs[0];
307 BUG_ON(logical & (block_bytes(c) - 1));
308 BUG_ON(physical & (block_bytes(c) - 1));
309 BUG_ON(length & (block_bytes(c) - 1));
315 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
318 struct bkey_i_extent *e;
320 u64 b = sector_to_bucket(ca, physical);
321 struct disk_reservation res;
325 sectors = min(ca->mi.bucket_size -
326 (physical & (ca->mi.bucket_size - 1)),
329 e = bkey_extent_init(&k.k);
330 e->k.p.inode = dst->bi_inum;
331 e->k.p.offset = logical + sectors;
333 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
336 .gen = bucket(ca, b)->mark.gen,
339 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
340 BCH_DISK_RESERVATION_NOFAIL);
342 die("error reserving space in new filesystem: %s",
345 bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
347 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
350 die("btree insert error %s", strerror(-ret));
352 bch2_disk_reservation_put(c, &res);
354 dst->bi_sectors += sectors;
361 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
364 ssize_t ret = readlink(src, buf, sizeof(buf));
366 die("readlink error: %m");
368 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
371 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
372 int src_fd, u64 src_size,
373 char *src_path, ranges *extents)
375 struct fiemap_iter iter;
376 struct fiemap_extent e;
378 fiemap_for_each(src_fd, iter, e)
379 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
384 fiemap_for_each(src_fd, iter, e) {
385 if ((e.fe_logical & (block_bytes(c) - 1)) ||
386 (e.fe_length & (block_bytes(c) - 1)))
387 die("Unaligned extent in %s - can't handle", src_path);
389 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
390 FIEMAP_EXTENT_ENCODED|
391 FIEMAP_EXTENT_NOT_ALIGNED|
392 FIEMAP_EXTENT_DATA_INLINE)) {
393 copy_data(c, dst, src_fd, e.fe_logical,
394 min(src_size - e.fe_logical,
400 * if the data is below 1 MB, copy it so it doesn't conflict
401 * with bcachefs's potentially larger superblock:
403 if (e.fe_physical < 1 << 20) {
404 copy_data(c, dst, src_fd, e.fe_logical,
405 min(src_size - e.fe_logical,
410 if ((e.fe_physical & (block_bytes(c) - 1)))
411 die("Unaligned extent in %s - can't handle", src_path);
413 range_add(extents, e.fe_physical, e.fe_length);
414 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
418 struct copy_fs_state {
422 GENRADIX(u64) hardlinks;
426 static void copy_dir(struct copy_fs_state *s,
428 struct bch_inode_unpacked *dst,
429 int src_fd, const char *src_path)
431 DIR *dir = fdopendir(src_fd);
434 while ((errno = 0), (d = readdir(dir))) {
435 struct bch_inode_unpacked inode;
439 die("chdir error: %m");
442 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
444 if (!strcmp(d->d_name, ".") ||
445 !strcmp(d->d_name, "..") ||
446 stat.st_ino == s->bcachefs_inum)
449 char *child_path = mprintf("%s/%s", src_path, d->d_name);
451 if (stat.st_dev != s->dev)
452 die("%s does not have correct st_dev!", child_path);
454 u64 *dst_inum = S_ISREG(stat.st_mode)
455 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
458 if (dst_inum && *dst_inum) {
459 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
463 inode = create_file(c, dst, d->d_name,
464 stat.st_uid, stat.st_gid,
465 stat.st_mode, stat.st_rdev);
468 *dst_inum = inode.bi_inum;
470 copy_times(c, &inode, &stat);
471 copy_xattrs(c, &inode, d->d_name);
475 switch (mode_to_type(stat.st_mode)) {
477 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
478 copy_dir(s, c, &inode, fd, child_path);
482 inode.bi_size = stat.st_size;
484 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
485 copy_file(c, &inode, fd, stat.st_size,
486 child_path, &s->extents);
490 inode.bi_size = stat.st_size;
492 copy_link(c, &inode, d->d_name);
499 /* nothing else to copy for these: */
505 update_inode(c, &inode);
511 die("readdir error: %m");
514 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
515 u64 size, u64 *bcachefs_inum, dev_t dev,
519 ? open(file_path, O_RDWR|O_CREAT, 0600)
520 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
522 die("Error creating %s for bcachefs metadata: %m",
525 struct stat statbuf = xfstat(fd);
527 if (statbuf.st_dev != dev)
528 die("bcachefs file has incorrect device");
530 *bcachefs_inum = statbuf.st_ino;
532 if (fallocate(fd, 0, 0, size))
533 die("Error reserving space for bcachefs metadata: %m");
537 struct fiemap_iter iter;
538 struct fiemap_extent e;
539 ranges extents = { NULL };
541 fiemap_for_each(fd, iter, e) {
542 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
543 FIEMAP_EXTENT_ENCODED|
544 FIEMAP_EXTENT_NOT_ALIGNED|
545 FIEMAP_EXTENT_DATA_INLINE))
546 die("Unable to continue: metadata file not fully mapped");
548 if ((e.fe_physical & (block_size - 1)) ||
549 (e.fe_length & (block_size - 1)))
550 die("Unable to continue: unaligned extents in metadata file");
552 range_add(&extents, e.fe_physical, e.fe_length);
556 ranges_sort_merge(&extents);
560 static void reserve_old_fs_space(struct bch_fs *c,
561 struct bch_inode_unpacked *root_inode,
564 struct bch_dev *ca = c->devs[0];
565 struct bch_inode_unpacked dst;
566 struct hole_iter iter;
569 dst = create_file(c, root_inode, "old_migrated_filesystem",
570 0, 0, S_IFREG|0400, 0);
571 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
573 ranges_sort_merge(extents);
575 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
576 link_data(c, &dst, i.start, i.start, i.end - i.start);
578 update_inode(c, &dst);
581 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
582 u64 bcachefs_inum, ranges *extents)
586 struct bch_inode_unpacked root_inode;
587 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
589 die("error looking up root directory: %s", strerror(-ret));
592 die("chdir error: %m");
594 struct stat stat = xfstat(src_fd);
595 copy_times(c, &root_inode, &stat);
596 copy_xattrs(c, &root_inode, ".");
598 struct copy_fs_state s = {
599 .bcachefs_inum = bcachefs_inum,
605 copy_dir(&s, c, &root_inode, src_fd, src_path);
607 reserve_old_fs_space(c, &root_inode, &s.extents);
609 update_inode(c, &root_inode);
611 darray_free(s.extents);
612 genradix_free(&s.hardlinks);
615 bch2_alloc_write(c, false, &wrote);
618 static void find_superblock_space(ranges extents, struct dev_opts *dev)
622 darray_foreach(i, extents) {
623 u64 start = round_up(max(256ULL << 10, i->start),
624 dev->bucket_size << 9);
625 u64 end = round_down(i->end,
626 dev->bucket_size << 9);
628 if (start + (128 << 10) <= end) {
629 dev->sb_offset = start >> 9;
630 dev->sb_end = dev->sb_offset + 256;
635 die("Couldn't find a valid location for superblock");
638 static void migrate_usage(void)
640 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
641 "Usage: bcachefs migrate [OPTION]...\n"
644 " -f fs Root of filesystem to migrate(s)\n"
645 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
646 " --no_passphrase Don't encrypt master encryption key\n"
647 " -F Force, even if metadata file already exists\n"
648 " -h Display this help and exit\n"
649 "Report bugs to <linux-bcache@vger.kernel.org>");
652 static const struct option migrate_opts[] = {
653 { "encrypted", no_argument, NULL, 'e' },
654 { "no_passphrase", no_argument, NULL, 'p' },
658 static int migrate_fs(const char *fs_path,
659 struct bch_opt_strs fs_opt_strs,
660 struct bch_opts fs_opts,
661 struct format_opts format_opts,
664 if (!path_is_fs_root(fs_path))
665 die("%s is not a filysestem root", fs_path);
667 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
668 struct stat stat = xfstat(fs_fd);
670 if (!S_ISDIR(stat.st_mode))
671 die("%s is not a directory", fs_path);
673 struct dev_opts dev = dev_opts_default();
675 dev.path = dev_t_to_path(stat.st_dev);
676 dev.fd = xopen(dev.path, O_RDWR);
678 opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
680 char *file_path = mprintf("%s/bcachefs", fs_path);
681 printf("Creating new filesystem on %s in space reserved at %s\n",
682 dev.path, file_path);
684 bch2_pick_bucket_size(fs_opts, &dev);
687 ranges extents = reserve_new_fs_space(file_path,
688 fs_opts.block_size << 9,
689 get_size(dev.path, dev.fd) / 5,
690 &bcachefs_inum, stat.st_dev, force);
692 find_superblock_space(extents, &dev);
694 struct bch_sb *sb = bch2_format(fs_opt_strs,
695 fs_opts,format_opts, &dev, 1);
696 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
698 if (format_opts.passphrase)
699 bch2_add_key(sb, format_opts.passphrase);
703 struct bch_opts opts = bch2_opts_empty();
704 struct bch_fs *c = NULL;
705 char *path[1] = { dev.path };
707 opt_set(opts, sb, sb_offset);
708 opt_set(opts, nostart, true);
709 opt_set(opts, noexcl, true);
711 c = bch2_fs_open(path, 1, opts);
713 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
715 mark_unreserved_space(c, extents);
717 int ret = bch2_fs_start(c);
719 die("Error starting new filesystem: %s", strerror(-ret));
721 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
725 printf("Migrate complete, running fsck:\n");
726 opt_set(opts, nostart, false);
727 opt_set(opts, nochanges, true);
729 c = bch2_fs_open(path, 1, opts);
731 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
734 printf("fsck complete\n");
736 printf("To mount the new filesystem, run\n"
737 " mount -t bcachefs -o sb=%llu %s dir\n"
739 "After verifying that the new filesystem is correct, to create a\n"
740 "superblock at the default offset and finish the migration run\n"
741 " bcachefs migrate-superblock -d %s -o %llu\n"
743 "The new filesystem will have a file at /old_migrated_filestem\n"
744 "referencing all disk space that might be used by the existing\n"
745 "filesystem. That file can be deleted once the old filesystem is\n"
746 "no longer needed (and should be deleted prior to running\n"
747 "bcachefs migrate-superblock)\n",
748 sb_offset, dev.path, dev.path, sb_offset);
752 int cmd_migrate(int argc, char *argv[])
754 struct format_opts format_opts = format_opts_default();
755 char *fs_path = NULL;
756 bool no_passphrase = false, force = false;
759 struct bch_opt_strs fs_opt_strs =
760 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
761 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
763 while ((opt = getopt_long(argc, argv, "f:Fh",
764 migrate_opts, NULL)) != -1)
770 format_opts.encrypted = true;
773 no_passphrase = true;
784 die("Please specify a filesystem to migrate");
786 if (format_opts.encrypted && !no_passphrase)
787 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
789 return migrate_fs(fs_path,
795 static void migrate_superblock_usage(void)
797 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
798 "Usage: bcachefs migrate-superblock [OPTION]...\n"
801 " -d device Device to create superblock for\n"
802 " -o offset Offset of existing superblock\n"
803 " -h Display this help and exit\n"
804 "Report bugs to <linux-bcache@vger.kernel.org>");
807 int cmd_migrate_superblock(int argc, char *argv[])
813 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
819 ret = kstrtou64(optarg, 10, &offset);
821 die("Invalid offset");
824 migrate_superblock_usage();
829 die("Please specify a device");
832 die("Please specify offset of existing superblock");
834 int fd = xopen(dev, O_RDWR);
835 struct bch_sb *sb = __bch2_super_read(fd, offset);
837 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
838 die("Can't add superblock: no space left in superblock layout");
841 for (i = 0; i < sb->layout.nr_superblocks; i++)
842 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
843 die("Superblock layout already has default superblock");
845 memmove(&sb->layout.sb_offset[1],
846 &sb->layout.sb_offset[0],
847 sb->layout.nr_superblocks * sizeof(u64));
848 sb->layout.nr_superblocks++;
850 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
852 bch2_super_write(fd, sb);