9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/errcode.h"
34 #include "libbcachefs/fs-common.h"
35 #include "libbcachefs/inode.h"
36 #include "libbcachefs/io_write.h"
37 #include "libbcachefs/replicas.h"
38 #include "libbcachefs/str_hash.h"
39 #include "libbcachefs/super.h"
40 #include "libbcachefs/xattr.h"
42 /* XXX cut and pasted from fsck.c */
43 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
45 static char *dev_t_to_path(dev_t dev)
47 char link[PATH_MAX], *p;
50 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
51 major(dev), minor(dev));
52 ret = readlink(sysfs_dev, link, sizeof(link));
55 if (ret < 0 || ret >= sizeof(link))
56 die("readlink error while looking up block device: %m");
60 p = strrchr(link, '/');
62 die("error looking up device name");
65 return mprintf("/dev/%s", p);
68 static bool path_is_fs_root(const char *path)
70 char *line = NULL, *p, *mount;
75 f = fopen("/proc/self/mountinfo", "r");
77 die("Error getting mount information");
79 while (getline(&line, &n, f) != -1) {
82 strsep(&p, " "); /* mount id */
83 strsep(&p, " "); /* parent id */
84 strsep(&p, " "); /* dev */
85 strsep(&p, " "); /* root */
86 mount = strsep(&p, " ");
89 if (mount && !strcmp(path, mount))
100 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
102 struct bch_dev *ca = c->devs[0];
103 struct hole_iter iter;
106 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
109 if (i.start == i.end)
112 b = sector_to_bucket(ca, i.start >> 9);
114 set_bit(b, ca->buckets_nouse);
116 } while (bucket_to_sector(ca, b) << 9 < i.end);
120 static void update_inode(struct bch_fs *c,
121 struct bch_inode_unpacked *inode)
123 struct bkey_inode_buf packed;
126 bch2_inode_pack(&packed, inode);
127 packed.inode.k.p.snapshot = U32_MAX;
128 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
131 die("error updating inode: %s", bch2_err_str(ret));
134 static void create_link(struct bch_fs *c,
135 struct bch_inode_unpacked *parent,
136 const char *name, u64 inum, mode_t mode)
138 struct qstr qstr = QSTR(name);
139 struct bch_inode_unpacked parent_u;
140 struct bch_inode_unpacked inode;
142 int ret = bch2_trans_do(c, NULL, NULL, 0,
143 bch2_link_trans(trans,
144 (subvol_inum) { 1, parent->bi_inum }, &parent_u,
145 (subvol_inum) { 1, inum }, &inode, &qstr));
147 die("error creating hardlink: %s", bch2_err_str(ret));
150 static struct bch_inode_unpacked create_file(struct bch_fs *c,
151 struct bch_inode_unpacked *parent,
153 uid_t uid, gid_t gid,
154 mode_t mode, dev_t rdev)
156 struct qstr qstr = QSTR(name);
157 struct bch_inode_unpacked new_inode;
159 bch2_inode_init_early(c, &new_inode);
161 int ret = bch2_trans_do(c, NULL, NULL, 0,
162 bch2_create_trans(trans,
163 (subvol_inum) { 1, parent->bi_inum }, parent,
165 uid, gid, mode, rdev, NULL, NULL,
166 (subvol_inum) {}, 0));
168 die("error creating %s: %s", name, bch2_err_str(ret));
173 #define for_each_xattr_handler(handlers, handler) \
175 for ((handler) = *(handlers)++; \
177 (handler) = *(handlers)++)
179 static const struct xattr_handler *xattr_resolve_name(char **name)
181 const struct xattr_handler **handlers = bch2_xattr_handlers;
182 const struct xattr_handler *handler;
184 for_each_xattr_handler(handlers, handler) {
187 n = strcmp_prefix(*name, xattr_prefix(handler));
189 if (!handler->prefix ^ !*n) {
192 return ERR_PTR(-EINVAL);
198 return ERR_PTR(-EOPNOTSUPP);
201 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
204 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
205 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
206 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
209 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
212 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
214 char attrs[XATTR_LIST_MAX];
215 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
217 die("listxattr error: %m");
221 attr < attrs + attrs_size;
223 next = attr + strlen(attr) + 1;
225 char val[XATTR_SIZE_MAX];
226 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
229 die("error getting xattr val: %m");
231 const struct xattr_handler *h = xattr_resolve_name(&attr);
232 struct bch_inode_unpacked inode_u;
234 int ret = bch2_trans_do(c, NULL, NULL, 0,
235 bch2_xattr_set(trans,
236 (subvol_inum) { 1, dst->bi_inum },
237 &inode_u, &hash_info, attr,
238 val, val_size, h->flags, 0));
240 die("error creating xattr: %s", bch2_err_str(ret));
244 #define WRITE_DATA_BUF (1 << 20)
246 static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
248 static void write_data(struct bch_fs *c,
249 struct bch_inode_unpacked *dst_inode,
250 u64 dst_offset, void *buf, size_t len)
252 struct bch_write_op op;
253 struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
255 BUG_ON(dst_offset & (block_bytes(c) - 1));
256 BUG_ON(len & (block_bytes(c) - 1));
257 BUG_ON(len > WRITE_DATA_BUF);
259 bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
260 bch2_bio_map(&op.wbio.bio, buf, len);
262 bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
263 op.write_point = writepoint_hashed(0);
266 op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
267 op.flags |= BCH_WRITE_SYNC;
269 int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
270 c->opts.data_replicas, 0);
272 die("error reserving space in new filesystem: %s", bch2_err_str(ret));
274 closure_call(&op.cl, bch2_write, NULL, NULL);
276 BUG_ON(!(op.flags & BCH_WRITE_DONE));
277 dst_inode->bi_sectors += len >> 9;
280 die("write error: %s", bch2_err_str(op.error));
283 static void copy_data(struct bch_fs *c,
284 struct bch_inode_unpacked *dst_inode,
285 int src_fd, u64 start, u64 end)
287 while (start < end) {
288 unsigned len = min_t(u64, end - start, sizeof(buf));
289 unsigned pad = round_up(len, block_bytes(c)) - len;
291 xpread(src_fd, buf, len, start);
292 memset(buf + len, 0, pad);
294 write_data(c, dst_inode, start, buf, len + pad);
299 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
300 u64 logical, u64 physical, u64 length)
302 struct bch_dev *ca = c->devs[0];
304 BUG_ON(logical & (block_bytes(c) - 1));
305 BUG_ON(physical & (block_bytes(c) - 1));
306 BUG_ON(length & (block_bytes(c) - 1));
312 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
315 struct bkey_i_extent *e;
316 BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
317 u64 b = sector_to_bucket(ca, physical);
318 struct disk_reservation res;
322 sectors = min(ca->mi.bucket_size -
323 (physical & (ca->mi.bucket_size - 1)),
326 e = bkey_extent_init(&k.k);
327 e->k.p.inode = dst->bi_inum;
328 e->k.p.offset = logical + sectors;
329 e->k.p.snapshot = U32_MAX;
331 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
334 .gen = *bucket_gen(ca, b),
337 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
338 BCH_DISK_RESERVATION_NOFAIL);
340 die("error reserving space in new filesystem: %s",
343 ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0);
345 die("btree insert error %s", bch2_err_str(ret));
347 bch2_disk_reservation_put(c, &res);
349 dst->bi_sectors += sectors;
356 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
359 ssize_t ret = readlink(src, buf, sizeof(buf));
361 die("readlink error: %m");
363 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
366 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
367 int src_fd, u64 src_size,
368 char *src_path, ranges *extents)
370 struct fiemap_iter iter;
371 struct fiemap_extent e;
373 fiemap_for_each(src_fd, iter, e)
374 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
378 fiemap_iter_exit(&iter);
380 fiemap_for_each(src_fd, iter, e) {
381 if ((e.fe_logical & (block_bytes(c) - 1)) ||
382 (e.fe_length & (block_bytes(c) - 1)))
383 die("Unaligned extent in %s - can't handle", src_path);
385 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
386 FIEMAP_EXTENT_ENCODED|
387 FIEMAP_EXTENT_NOT_ALIGNED|
388 FIEMAP_EXTENT_DATA_INLINE)) {
389 copy_data(c, dst, src_fd, e.fe_logical,
390 min(src_size - e.fe_logical,
396 * if the data is below 1 MB, copy it so it doesn't conflict
397 * with bcachefs's potentially larger superblock:
399 if (e.fe_physical < 1 << 20) {
400 copy_data(c, dst, src_fd, e.fe_logical,
401 min(src_size - e.fe_logical,
406 if ((e.fe_physical & (block_bytes(c) - 1)))
407 die("Unaligned extent in %s - can't handle", src_path);
409 range_add(extents, e.fe_physical, e.fe_length);
410 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
412 fiemap_iter_exit(&iter);
415 struct copy_fs_state {
419 GENRADIX(u64) hardlinks;
423 static void copy_dir(struct copy_fs_state *s,
425 struct bch_inode_unpacked *dst,
426 int src_fd, const char *src_path)
428 DIR *dir = fdopendir(src_fd);
431 while ((errno = 0), (d = readdir(dir))) {
432 struct bch_inode_unpacked inode;
436 die("chdir error: %m");
439 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
441 if (!strcmp(d->d_name, ".") ||
442 !strcmp(d->d_name, "..") ||
443 !strcmp(d->d_name, "lost+found") ||
444 stat.st_ino == s->bcachefs_inum)
447 char *child_path = mprintf("%s/%s", src_path, d->d_name);
449 if (stat.st_dev != s->dev)
450 die("%s does not have correct st_dev!", child_path);
452 u64 *dst_inum = S_ISREG(stat.st_mode)
453 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
456 if (dst_inum && *dst_inum) {
457 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
461 inode = create_file(c, dst, d->d_name,
462 stat.st_uid, stat.st_gid,
463 stat.st_mode, stat.st_rdev);
466 *dst_inum = inode.bi_inum;
468 copy_times(c, &inode, &stat);
469 copy_xattrs(c, &inode, d->d_name);
473 switch (mode_to_type(stat.st_mode)) {
475 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
476 copy_dir(s, c, &inode, fd, child_path);
480 inode.bi_size = stat.st_size;
482 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
483 copy_file(c, &inode, fd, stat.st_size,
484 child_path, &s->extents);
488 inode.bi_size = stat.st_size;
490 copy_link(c, &inode, d->d_name);
497 /* nothing else to copy for these: */
503 update_inode(c, &inode);
509 die("readdir error: %m");
513 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
514 u64 size, u64 *bcachefs_inum, dev_t dev,
518 ? open(file_path, O_RDWR|O_CREAT, 0600)
519 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
521 die("Error creating %s for bcachefs metadata: %m",
524 struct stat statbuf = xfstat(fd);
526 if (statbuf.st_dev != dev)
527 die("bcachefs file has incorrect device");
529 *bcachefs_inum = statbuf.st_ino;
531 if (fallocate(fd, 0, 0, size))
532 die("Error reserving space for bcachefs metadata: %m");
536 struct fiemap_iter iter;
537 struct fiemap_extent e;
538 ranges extents = { 0 };
540 fiemap_for_each(fd, iter, e) {
541 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
542 FIEMAP_EXTENT_ENCODED|
543 FIEMAP_EXTENT_NOT_ALIGNED|
544 FIEMAP_EXTENT_DATA_INLINE))
545 die("Unable to continue: metadata file not fully mapped");
547 if ((e.fe_physical & (block_size - 1)) ||
548 (e.fe_length & (block_size - 1)))
549 die("Unable to continue: unaligned extents in metadata file");
551 range_add(&extents, e.fe_physical, e.fe_length);
553 fiemap_iter_exit(&iter);
556 ranges_sort_merge(&extents);
560 static void reserve_old_fs_space(struct bch_fs *c,
561 struct bch_inode_unpacked *root_inode,
564 struct bch_dev *ca = c->devs[0];
565 struct bch_inode_unpacked dst;
566 struct hole_iter iter;
569 dst = create_file(c, root_inode, "old_migrated_filesystem",
570 0, 0, S_IFREG|0400, 0);
571 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
573 ranges_sort_merge(extents);
575 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
576 link_data(c, &dst, i.start, i.start, i.end - i.start);
578 update_inode(c, &dst);
581 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
582 u64 bcachefs_inum, ranges *extents)
586 struct bch_inode_unpacked root_inode;
587 int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
590 die("error looking up root directory: %s", bch2_err_str(ret));
593 die("chdir error: %m");
595 struct stat stat = xfstat(src_fd);
596 copy_times(c, &root_inode, &stat);
597 copy_xattrs(c, &root_inode, ".");
599 struct copy_fs_state s = {
600 .bcachefs_inum = bcachefs_inum,
606 copy_dir(&s, c, &root_inode, src_fd, src_path);
608 reserve_old_fs_space(c, &root_inode, &s.extents);
610 update_inode(c, &root_inode);
612 darray_exit(&s.extents);
613 genradix_free(&s.hardlinks);
616 static void find_superblock_space(ranges extents,
617 struct format_opts opts,
618 struct dev_opts *dev)
620 darray_for_each(extents, i) {
621 u64 start = round_up(max(256ULL << 10, i->start),
622 dev->bucket_size << 9);
623 u64 end = round_down(i->end,
624 dev->bucket_size << 9);
626 /* Need space for two superblocks: */
627 if (start + (opts.superblock_size << 9) * 2 <= end) {
628 dev->sb_offset = start >> 9;
629 dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
634 die("Couldn't find a valid location for superblock");
637 static void migrate_usage(void)
639 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
640 "Usage: bcachefs migrate [OPTION]...\n"
643 " -f fs Root of filesystem to migrate(s)\n"
644 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
645 " --no_passphrase Don't encrypt master encryption key\n"
646 " -F Force, even if metadata file already exists\n"
647 " -h Display this help and exit\n"
648 "Report bugs to <linux-bcachefs@vger.kernel.org>");
651 static const struct option migrate_opts[] = {
652 { "encrypted", no_argument, NULL, 'e' },
653 { "no_passphrase", no_argument, NULL, 'p' },
657 static int migrate_fs(const char *fs_path,
658 struct bch_opt_strs fs_opt_strs,
659 struct bch_opts fs_opts,
660 struct format_opts format_opts,
663 if (!path_is_fs_root(fs_path))
664 die("%s is not a filysestem root", fs_path);
666 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
667 struct stat stat = xfstat(fs_fd);
669 if (!S_ISDIR(stat.st_mode))
670 die("%s is not a directory", fs_path);
672 struct dev_opts dev = dev_opts_default();
674 dev.path = dev_t_to_path(stat.st_dev);
675 dev.bdev = blkdev_get_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
677 opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_buffered_fd));
679 char *file_path = mprintf("%s/bcachefs", fs_path);
680 printf("Creating new filesystem on %s in space reserved at %s\n",
681 dev.path, file_path);
683 dev.size = get_size(dev.bdev->bd_buffered_fd);
684 dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
685 dev.nbuckets = dev.size / dev.bucket_size;
687 bch2_check_bucket_size(fs_opts, &dev);
690 ranges extents = reserve_new_fs_space(file_path,
691 fs_opts.block_size >> 9,
692 get_size(dev.bdev->bd_buffered_fd) / 5,
693 &bcachefs_inum, stat.st_dev, force);
695 find_superblock_space(extents, format_opts, &dev);
697 struct bch_sb *sb = bch2_format(fs_opt_strs,
698 fs_opts,format_opts, &dev, 1);
699 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
701 if (format_opts.passphrase)
702 bch2_add_key(sb, "user", "user", format_opts.passphrase);
706 struct bch_opts opts = bch2_opts_empty();
707 struct bch_fs *c = NULL;
708 char *path[1] = { dev.path };
710 opt_set(opts, sb, sb_offset);
711 opt_set(opts, nostart, true);
712 opt_set(opts, noexcl, true);
713 opt_set(opts, buckets_nouse, true);
715 c = bch2_fs_open(path, 1, opts);
717 die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
719 mark_unreserved_space(c, extents);
721 int ret = bch2_fs_start(c);
723 die("Error starting new filesystem: %s", bch2_err_str(ret));
725 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
729 printf("Migrate complete, running fsck:\n");
730 opt_set(opts, nostart, false);
731 opt_set(opts, nochanges, true);
733 c = bch2_fs_open(path, 1, opts);
735 die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
738 printf("fsck complete\n");
740 printf("To mount the new filesystem, run\n"
741 " mount -t bcachefs -o sb=%llu %s dir\n"
743 "After verifying that the new filesystem is correct, to create a\n"
744 "superblock at the default offset and finish the migration run\n"
745 " bcachefs migrate-superblock -d %s -o %llu\n"
747 "The new filesystem will have a file at /old_migrated_filestem\n"
748 "referencing all disk space that might be used by the existing\n"
749 "filesystem. That file can be deleted once the old filesystem is\n"
750 "no longer needed (and should be deleted prior to running\n"
751 "bcachefs migrate-superblock)\n",
752 sb_offset, dev.path, dev.path, sb_offset);
756 int cmd_migrate(int argc, char *argv[])
758 struct format_opts format_opts = format_opts_default();
759 char *fs_path = NULL;
760 bool no_passphrase = false, force = false;
763 struct bch_opt_strs fs_opt_strs =
764 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
765 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
767 while ((opt = getopt_long(argc, argv, "f:Fh",
768 migrate_opts, NULL)) != -1)
774 format_opts.encrypted = true;
777 no_passphrase = true;
788 die("Please specify a filesystem to migrate");
790 if (format_opts.encrypted && !no_passphrase)
791 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
793 int ret = migrate_fs(fs_path,
797 bch2_opt_strs_free(&fs_opt_strs);
801 static void migrate_superblock_usage(void)
803 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
804 "Usage: bcachefs migrate-superblock [OPTION]...\n"
807 " -d device Device to create superblock for\n"
808 " -o offset Offset of existing superblock\n"
809 " -h Display this help and exit\n"
810 "Report bugs to <linux-bcachefs@vger.kernel.org>");
813 int cmd_migrate_superblock(int argc, char *argv[])
819 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
825 ret = kstrtou64(optarg, 10, &offset);
827 die("Invalid offset");
830 migrate_superblock_usage();
835 die("Please specify a device");
838 die("Please specify offset of existing superblock");
840 int fd = xopen(dev, O_RDWR);
841 struct bch_sb *sb = __bch2_super_read(fd, offset);
843 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
844 die("Can't add superblock: no space left in superblock layout");
847 for (i = 0; i < sb->layout.nr_superblocks; i++)
848 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
849 die("Superblock layout already has default superblock");
851 memmove(&sb->layout.sb_offset[1],
852 &sb->layout.sb_offset[0],
853 sb->layout.nr_superblocks * sizeof(u64));
854 sb->layout.nr_superblocks++;
856 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
858 bch2_super_write(fd, sb);