9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/errcode.h"
34 #include "libbcachefs/fs-common.h"
35 #include "libbcachefs/inode.h"
36 #include "libbcachefs/io_write.h"
37 #include "libbcachefs/replicas.h"
38 #include "libbcachefs/str_hash.h"
39 #include "libbcachefs/super.h"
40 #include "libbcachefs/xattr.h"
42 /* XXX cut and pasted from fsck.c */
43 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
45 static char *dev_t_to_path(dev_t dev)
47 char link[PATH_MAX], *p;
50 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
51 major(dev), minor(dev));
52 ret = readlink(sysfs_dev, link, sizeof(link));
55 if (ret < 0 || ret >= sizeof(link))
56 die("readlink error while looking up block device: %m");
60 p = strrchr(link, '/');
62 die("error looking up device name");
65 return mprintf("/dev/%s", p);
68 static bool path_is_fs_root(const char *path)
70 char *line = NULL, *p, *mount;
75 f = fopen("/proc/self/mountinfo", "r");
77 die("Error getting mount information");
79 while (getline(&line, &n, f) != -1) {
82 strsep(&p, " "); /* mount id */
83 strsep(&p, " "); /* parent id */
84 strsep(&p, " "); /* dev */
85 strsep(&p, " "); /* root */
86 mount = strsep(&p, " ");
89 if (mount && !strcmp(path, mount))
100 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
102 struct bch_dev *ca = c->devs[0];
103 struct hole_iter iter;
106 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
109 if (i.start == i.end)
112 b = sector_to_bucket(ca, i.start >> 9);
114 set_bit(b, ca->buckets_nouse);
116 } while (bucket_to_sector(ca, b) << 9 < i.end);
120 static void update_inode(struct bch_fs *c,
121 struct bch_inode_unpacked *inode)
123 struct bkey_inode_buf packed;
126 bch2_inode_pack(&packed, inode);
127 packed.inode.k.p.snapshot = U32_MAX;
128 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
131 die("error updating inode: %s", bch2_err_str(ret));
134 static void create_link(struct bch_fs *c,
135 struct bch_inode_unpacked *parent,
136 const char *name, u64 inum, mode_t mode)
138 struct qstr qstr = QSTR(name);
139 struct bch_inode_unpacked parent_u;
140 struct bch_inode_unpacked inode;
142 int ret = bch2_trans_do(c, NULL, NULL, 0,
143 bch2_link_trans(trans,
144 (subvol_inum) { 1, parent->bi_inum }, &parent_u,
145 (subvol_inum) { 1, inum }, &inode, &qstr));
147 die("error creating hardlink: %s", bch2_err_str(ret));
150 static struct bch_inode_unpacked create_file(struct bch_fs *c,
151 struct bch_inode_unpacked *parent,
153 uid_t uid, gid_t gid,
154 mode_t mode, dev_t rdev)
156 struct qstr qstr = QSTR(name);
157 struct bch_inode_unpacked new_inode;
159 bch2_inode_init_early(c, &new_inode);
161 int ret = bch2_trans_do(c, NULL, NULL, 0,
162 bch2_create_trans(trans,
163 (subvol_inum) { 1, parent->bi_inum }, parent,
165 uid, gid, mode, rdev, NULL, NULL,
166 (subvol_inum) {}, 0));
168 die("error creating %s: %s", name, bch2_err_str(ret));
173 #define for_each_xattr_handler(handlers, handler) \
175 for ((handler) = *(handlers)++; \
177 (handler) = *(handlers)++)
179 static const struct xattr_handler *xattr_resolve_name(char **name)
181 const struct xattr_handler **handlers = bch2_xattr_handlers;
182 const struct xattr_handler *handler;
184 for_each_xattr_handler(handlers, handler) {
187 n = strcmp_prefix(*name, xattr_prefix(handler));
189 if (!handler->prefix ^ !*n) {
192 return ERR_PTR(-EINVAL);
198 return ERR_PTR(-EOPNOTSUPP);
201 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
204 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
205 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
206 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
209 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
212 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
214 char attrs[XATTR_LIST_MAX];
215 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
217 die("listxattr error: %m");
221 attr < attrs + attrs_size;
223 next = attr + strlen(attr) + 1;
225 char val[XATTR_SIZE_MAX];
226 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
229 die("error getting xattr val: %m");
231 const struct xattr_handler *h = xattr_resolve_name(&attr);
232 struct bch_inode_unpacked inode_u;
234 int ret = bch2_trans_do(c, NULL, NULL, 0,
235 bch2_xattr_set(trans,
236 (subvol_inum) { 1, dst->bi_inum },
237 &inode_u, &hash_info, attr,
238 val, val_size, h->flags, 0));
240 die("error creating xattr: %s", bch2_err_str(ret));
244 #define WRITE_DATA_BUF (1 << 20)
246 static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
248 static void write_data(struct bch_fs *c,
249 struct bch_inode_unpacked *dst_inode,
250 u64 dst_offset, void *buf, size_t len)
252 struct bch_write_op op;
253 struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
255 BUG_ON(dst_offset & (block_bytes(c) - 1));
256 BUG_ON(len & (block_bytes(c) - 1));
257 BUG_ON(len > WRITE_DATA_BUF);
259 bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
260 bch2_bio_map(&op.wbio.bio, buf, len);
262 bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
263 op.write_point = writepoint_hashed(0);
266 op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
267 op.flags |= BCH_WRITE_SYNC;
269 int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
270 c->opts.data_replicas, 0);
272 die("error reserving space in new filesystem: %s", bch2_err_str(ret));
274 closure_call(&op.cl, bch2_write, NULL, NULL);
276 BUG_ON(!(op.flags & BCH_WRITE_DONE));
277 dst_inode->bi_sectors += len >> 9;
280 die("write error: %s", bch2_err_str(op.error));
283 static void copy_data(struct bch_fs *c,
284 struct bch_inode_unpacked *dst_inode,
285 int src_fd, u64 start, u64 end)
287 while (start < end) {
288 unsigned len = min_t(u64, end - start, sizeof(buf));
289 unsigned pad = round_up(len, block_bytes(c)) - len;
291 xpread(src_fd, buf, len, start);
292 memset(buf + len, 0, pad);
294 write_data(c, dst_inode, start, buf, len + pad);
299 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
300 u64 logical, u64 physical, u64 length)
302 struct bch_dev *ca = c->devs[0];
304 BUG_ON(logical & (block_bytes(c) - 1));
305 BUG_ON(physical & (block_bytes(c) - 1));
306 BUG_ON(length & (block_bytes(c) - 1));
312 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
315 struct bkey_i_extent *e;
316 BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
317 u64 b = sector_to_bucket(ca, physical);
318 struct disk_reservation res;
322 sectors = min(ca->mi.bucket_size -
323 (physical & (ca->mi.bucket_size - 1)),
326 e = bkey_extent_init(&k.k);
327 e->k.p.inode = dst->bi_inum;
328 e->k.p.offset = logical + sectors;
329 e->k.p.snapshot = U32_MAX;
331 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
334 .gen = *bucket_gen(ca, b),
337 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
338 BCH_DISK_RESERVATION_NOFAIL);
340 die("error reserving space in new filesystem: %s",
343 ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0);
345 die("btree insert error %s", bch2_err_str(ret));
347 bch2_disk_reservation_put(c, &res);
349 dst->bi_sectors += sectors;
356 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
359 ssize_t ret = readlink(src, buf, sizeof(buf));
361 die("readlink error: %m");
363 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
366 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
367 int src_fd, u64 src_size,
368 char *src_path, ranges *extents)
370 struct fiemap_iter iter;
371 struct fiemap_extent e;
373 fiemap_for_each(src_fd, iter, e)
374 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
378 fiemap_iter_exit(&iter);
380 fiemap_for_each(src_fd, iter, e) {
381 u64 src_max = roundup(src_size, block_bytes(c));
383 e.fe_length = min(e.fe_length, src_max - e.fe_logical);
385 if ((e.fe_logical & (block_bytes(c) - 1)) ||
386 (e.fe_length & (block_bytes(c) - 1)))
387 die("Unaligned extent in %s - can't handle", src_path);
389 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
390 FIEMAP_EXTENT_ENCODED|
391 FIEMAP_EXTENT_NOT_ALIGNED|
392 FIEMAP_EXTENT_DATA_INLINE)) {
393 copy_data(c, dst, src_fd, e.fe_logical,
394 min(src_size - e.fe_logical,
400 * if the data is below 1 MB, copy it so it doesn't conflict
401 * with bcachefs's potentially larger superblock:
403 if (e.fe_physical < 1 << 20) {
404 copy_data(c, dst, src_fd, e.fe_logical,
405 min(src_size - e.fe_logical,
410 if ((e.fe_physical & (block_bytes(c) - 1)))
411 die("Unaligned extent in %s - can't handle", src_path);
413 range_add(extents, e.fe_physical, e.fe_length);
414 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
416 fiemap_iter_exit(&iter);
419 struct copy_fs_state {
423 GENRADIX(u64) hardlinks;
427 static void copy_dir(struct copy_fs_state *s,
429 struct bch_inode_unpacked *dst,
430 int src_fd, const char *src_path)
432 DIR *dir = fdopendir(src_fd);
435 while ((errno = 0), (d = readdir(dir))) {
436 struct bch_inode_unpacked inode;
440 die("chdir error: %m");
443 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
445 if (!strcmp(d->d_name, ".") ||
446 !strcmp(d->d_name, "..") ||
447 !strcmp(d->d_name, "lost+found") ||
448 stat.st_ino == s->bcachefs_inum)
451 char *child_path = mprintf("%s/%s", src_path, d->d_name);
453 if (stat.st_dev != s->dev)
454 die("%s does not have correct st_dev!", child_path);
456 u64 *dst_inum = S_ISREG(stat.st_mode)
457 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
460 if (dst_inum && *dst_inum) {
461 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
465 inode = create_file(c, dst, d->d_name,
466 stat.st_uid, stat.st_gid,
467 stat.st_mode, stat.st_rdev);
470 *dst_inum = inode.bi_inum;
472 copy_times(c, &inode, &stat);
473 copy_xattrs(c, &inode, d->d_name);
477 switch (mode_to_type(stat.st_mode)) {
479 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
480 copy_dir(s, c, &inode, fd, child_path);
484 inode.bi_size = stat.st_size;
486 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
487 copy_file(c, &inode, fd, stat.st_size,
488 child_path, &s->extents);
492 inode.bi_size = stat.st_size;
494 copy_link(c, &inode, d->d_name);
501 /* nothing else to copy for these: */
507 update_inode(c, &inode);
513 die("readdir error: %m");
517 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
518 u64 size, u64 *bcachefs_inum, dev_t dev,
522 ? open(file_path, O_RDWR|O_CREAT, 0600)
523 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
525 die("Error creating %s for bcachefs metadata: %m",
528 struct stat statbuf = xfstat(fd);
530 if (statbuf.st_dev != dev)
531 die("bcachefs file has incorrect device");
533 *bcachefs_inum = statbuf.st_ino;
535 if (fallocate(fd, 0, 0, size))
536 die("Error reserving space for bcachefs metadata: %m");
540 struct fiemap_iter iter;
541 struct fiemap_extent e;
542 ranges extents = { 0 };
544 fiemap_for_each(fd, iter, e) {
545 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
546 FIEMAP_EXTENT_ENCODED|
547 FIEMAP_EXTENT_NOT_ALIGNED|
548 FIEMAP_EXTENT_DATA_INLINE))
549 die("Unable to continue: metadata file not fully mapped");
551 if ((e.fe_physical & (block_size - 1)) ||
552 (e.fe_length & (block_size - 1)))
553 die("Unable to continue: unaligned extents in metadata file");
555 range_add(&extents, e.fe_physical, e.fe_length);
557 fiemap_iter_exit(&iter);
560 ranges_sort_merge(&extents);
564 static void reserve_old_fs_space(struct bch_fs *c,
565 struct bch_inode_unpacked *root_inode,
568 struct bch_dev *ca = c->devs[0];
569 struct bch_inode_unpacked dst;
570 struct hole_iter iter;
573 dst = create_file(c, root_inode, "old_migrated_filesystem",
574 0, 0, S_IFREG|0400, 0);
575 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
577 ranges_sort_merge(extents);
579 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
580 link_data(c, &dst, i.start, i.start, i.end - i.start);
582 update_inode(c, &dst);
585 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
586 u64 bcachefs_inum, ranges *extents)
590 struct bch_inode_unpacked root_inode;
591 int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
594 die("error looking up root directory: %s", bch2_err_str(ret));
597 die("chdir error: %m");
599 struct stat stat = xfstat(src_fd);
600 copy_times(c, &root_inode, &stat);
601 copy_xattrs(c, &root_inode, ".");
603 struct copy_fs_state s = {
604 .bcachefs_inum = bcachefs_inum,
610 copy_dir(&s, c, &root_inode, src_fd, src_path);
612 reserve_old_fs_space(c, &root_inode, &s.extents);
614 update_inode(c, &root_inode);
616 darray_exit(&s.extents);
617 genradix_free(&s.hardlinks);
620 static void find_superblock_space(ranges extents,
621 struct format_opts opts,
622 struct dev_opts *dev)
624 darray_for_each(extents, i) {
625 u64 start = round_up(max(256ULL << 10, i->start),
626 dev->bucket_size << 9);
627 u64 end = round_down(i->end,
628 dev->bucket_size << 9);
630 /* Need space for two superblocks: */
631 if (start + (opts.superblock_size << 9) * 2 <= end) {
632 dev->sb_offset = start >> 9;
633 dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
638 die("Couldn't find a valid location for superblock");
641 static void migrate_usage(void)
643 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
644 "Usage: bcachefs migrate [OPTION]...\n"
647 " -f fs Root of filesystem to migrate(s)\n"
648 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
649 " --no_passphrase Don't encrypt master encryption key\n"
650 " -F Force, even if metadata file already exists\n"
651 " -h Display this help and exit\n"
652 "Report bugs to <linux-bcachefs@vger.kernel.org>");
655 static const struct option migrate_opts[] = {
656 { "encrypted", no_argument, NULL, 'e' },
657 { "no_passphrase", no_argument, NULL, 'p' },
661 static int migrate_fs(const char *fs_path,
662 struct bch_opt_strs fs_opt_strs,
663 struct bch_opts fs_opts,
664 struct format_opts format_opts,
667 if (!path_is_fs_root(fs_path))
668 die("%s is not a filysestem root", fs_path);
670 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
671 struct stat stat = xfstat(fs_fd);
673 if (!S_ISDIR(stat.st_mode))
674 die("%s is not a directory", fs_path);
676 struct dev_opts dev = dev_opts_default();
678 dev.path = dev_t_to_path(stat.st_dev);
679 dev.handle = bdev_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
681 int ret = PTR_ERR_OR_ZERO(dev.handle);
683 die("Error opening device to format %s: %s", dev.path, strerror(-ret));
684 dev.bdev = dev.handle->bdev;
686 opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
688 char *file_path = mprintf("%s/bcachefs", fs_path);
689 printf("Creating new filesystem on %s in space reserved at %s\n",
690 dev.path, file_path);
692 dev.size = get_size(dev.bdev->bd_fd);
693 dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
694 dev.nbuckets = dev.size / dev.bucket_size;
696 bch2_check_bucket_size(fs_opts, &dev);
699 ranges extents = reserve_new_fs_space(file_path,
700 fs_opts.block_size >> 9,
701 get_size(dev.bdev->bd_fd) / 5,
702 &bcachefs_inum, stat.st_dev, force);
704 find_superblock_space(extents, format_opts, &dev);
706 struct bch_sb *sb = bch2_format(fs_opt_strs,
707 fs_opts, format_opts, &dev, 1);
708 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
710 if (format_opts.passphrase)
711 bch2_add_key(sb, "user", "user", format_opts.passphrase);
715 struct bch_opts opts = bch2_opts_empty();
716 struct bch_fs *c = NULL;
717 char *path[1] = { dev.path };
719 opt_set(opts, sb, sb_offset);
720 opt_set(opts, nostart, true);
721 opt_set(opts, noexcl, true);
722 opt_set(opts, buckets_nouse, true);
724 c = bch2_fs_open(path, 1, opts);
726 die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
728 mark_unreserved_space(c, extents);
730 ret = bch2_fs_start(c);
732 die("Error starting new filesystem: %s", bch2_err_str(ret));
734 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
738 printf("Migrate complete, running fsck:\n");
739 opt_set(opts, nostart, false);
740 opt_set(opts, nochanges, true);
741 opt_set(opts, read_only, true);
743 c = bch2_fs_open(path, 1, opts);
745 die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
748 printf("fsck complete\n");
750 printf("To mount the new filesystem, run\n"
751 " mount -t bcachefs -o sb=%llu %s dir\n"
753 "After verifying that the new filesystem is correct, to create a\n"
754 "superblock at the default offset and finish the migration run\n"
755 " bcachefs migrate-superblock -d %s -o %llu\n"
757 "The new filesystem will have a file at /old_migrated_filestem\n"
758 "referencing all disk space that might be used by the existing\n"
759 "filesystem. That file can be deleted once the old filesystem is\n"
760 "no longer needed (and should be deleted prior to running\n"
761 "bcachefs migrate-superblock)\n",
762 sb_offset, dev.path, dev.path, sb_offset);
766 int cmd_migrate(int argc, char *argv[])
768 struct format_opts format_opts = format_opts_default();
769 char *fs_path = NULL;
770 bool no_passphrase = false, force = false;
773 struct bch_opt_strs fs_opt_strs =
774 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
775 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
777 while ((opt = getopt_long(argc, argv, "f:Fh",
778 migrate_opts, NULL)) != -1)
784 format_opts.encrypted = true;
787 no_passphrase = true;
798 die("Please specify a filesystem to migrate");
800 if (format_opts.encrypted && !no_passphrase)
801 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
803 int ret = migrate_fs(fs_path,
807 bch2_opt_strs_free(&fs_opt_strs);
811 static void migrate_superblock_usage(void)
813 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
814 "Usage: bcachefs migrate-superblock [OPTION]...\n"
817 " -d device Device to create superblock for\n"
818 " -o offset Offset of existing superblock\n"
819 " -h Display this help and exit\n"
820 "Report bugs to <linux-bcachefs@vger.kernel.org>");
823 int cmd_migrate_superblock(int argc, char *argv[])
829 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
835 ret = kstrtou64(optarg, 10, &offset);
837 die("Invalid offset");
840 migrate_superblock_usage();
845 die("Please specify a device");
848 die("Please specify offset of existing superblock");
850 int fd = xopen(dev, O_RDWR);
851 struct bch_sb *sb = __bch2_super_read(fd, offset);
853 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
854 die("Can't add superblock: no space left in superblock layout");
857 for (i = 0; i < sb->layout.nr_superblocks; i++)
858 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
859 die("Superblock layout already has default superblock");
861 memmove(&sb->layout.sb_offset[1],
862 &sb->layout.sb_offset[0],
863 sb->layout.nr_superblocks * sizeof(u64));
864 sb->layout.nr_superblocks++;
866 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
868 bch2_super_write(fd, sb);