9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 static char *dev_t_to_path(dev_t dev)
43 char link[PATH_MAX], *p;
46 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
47 major(dev), minor(dev));
48 ret = readlink(sysfs_dev, link, sizeof(link));
51 if (ret < 0 || ret >= sizeof(link))
52 die("readlink error while looking up block device: %m");
56 p = strrchr(link, '/');
58 die("error looking up device name");
61 return mprintf("/dev/%s", p);
64 static bool path_is_fs_root(const char *path)
66 char *line = NULL, *p, *mount;
71 f = fopen("/proc/self/mountinfo", "r");
73 die("Error getting mount information");
75 while (getline(&line, &n, f) != -1) {
78 strsep(&p, " "); /* mount id */
79 strsep(&p, " "); /* parent id */
80 strsep(&p, " "); /* dev */
81 strsep(&p, " "); /* root */
82 mount = strsep(&p, " ");
85 if (mount && !strcmp(path, mount))
96 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
98 struct bch_dev *ca = c->devs[0];
99 struct hole_iter iter;
102 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
105 if (i.start == i.end)
108 b = sector_to_bucket(ca, i.start >> 9);
110 set_bit(b, ca->buckets_nouse);
112 } while (bucket_to_sector(ca, b) << 9 < i.end);
116 static void update_inode(struct bch_fs *c,
117 struct bch_inode_unpacked *inode)
119 struct bkey_inode_buf packed;
122 bch2_inode_pack(&packed, inode);
123 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
126 die("error creating file: %s", strerror(-ret));
129 static void create_dirent(struct bch_fs *c,
130 struct bch_inode_unpacked *parent,
131 const char *name, u64 inum, mode_t mode)
133 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
134 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
136 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
137 mode_to_type(mode), &qname,
138 inum, NULL, BCH_HASH_SET_MUST_CREATE);
140 die("error creating file: %s", strerror(-ret));
146 static void create_link(struct bch_fs *c,
147 struct bch_inode_unpacked *parent,
148 const char *name, u64 inum, mode_t mode)
150 struct bch_inode_unpacked inode;
151 int ret = bch2_inode_find_by_inum(c, inum, &inode);
153 die("error looking up hardlink: %s", strerror(-ret));
156 update_inode(c, &inode);
158 create_dirent(c, parent, name, inum, mode);
161 static struct bch_inode_unpacked create_file(struct bch_fs *c,
162 struct bch_inode_unpacked *parent,
164 uid_t uid, gid_t gid,
165 mode_t mode, dev_t rdev)
167 struct bch_inode_unpacked new_inode;
170 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
172 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
173 &c->unused_inode_hint);
175 die("error creating file: %s", strerror(-ret));
177 create_dirent(c, parent, name, new_inode.bi_inum, mode);
182 #define for_each_xattr_handler(handlers, handler) \
184 for ((handler) = *(handlers)++; \
186 (handler) = *(handlers)++)
188 static const struct xattr_handler *xattr_resolve_name(char **name)
190 const struct xattr_handler **handlers = bch2_xattr_handlers;
191 const struct xattr_handler *handler;
193 for_each_xattr_handler(handlers, handler) {
196 n = strcmp_prefix(*name, xattr_prefix(handler));
198 if (!handler->prefix ^ !*n) {
201 return ERR_PTR(-EINVAL);
207 return ERR_PTR(-EOPNOTSUPP);
210 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
213 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
214 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
215 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
218 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
221 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
223 char attrs[XATTR_LIST_MAX];
224 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
226 die("listxattr error: %m");
230 attr < attrs + attrs_size;
232 next = attr + strlen(attr) + 1;
234 char val[XATTR_SIZE_MAX];
235 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
238 die("error getting xattr val: %m");
240 const struct xattr_handler *h = xattr_resolve_name(&attr);
242 int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
243 bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
244 val, val_size, h->flags, 0));
246 die("error creating xattr: %s", strerror(-ret));
250 static char buf[1 << 20] __aligned(PAGE_SIZE);
252 static void write_data(struct bch_fs *c,
253 struct bch_inode_unpacked *dst_inode,
254 u64 dst_offset, void *buf, size_t len)
257 struct bch_write_op op;
258 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
262 BUG_ON(dst_offset & (block_bytes(c) - 1));
263 BUG_ON(len & (block_bytes(c) - 1));
265 closure_init_stack(&cl);
267 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
268 o.op.wbio.bio.bi_iter.bi_size = len;
269 bch2_bio_map(&o.op.wbio.bio, buf);
271 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
272 o.op.write_point = writepoint_hashed(0);
273 o.op.nr_replicas = 1;
274 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
276 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
277 c->opts.data_replicas, 0);
279 die("error reserving space in new filesystem: %s", strerror(-ret));
281 closure_call(&o.op.cl, bch2_write, NULL, &cl);
284 dst_inode->bi_sectors += len >> 9;
287 static void copy_data(struct bch_fs *c,
288 struct bch_inode_unpacked *dst_inode,
289 int src_fd, u64 start, u64 end)
291 while (start < end) {
292 unsigned len = min_t(u64, end - start, sizeof(buf));
293 unsigned pad = round_up(len, block_bytes(c)) - len;
295 xpread(src_fd, buf, len, start);
296 memset(buf + len, 0, pad);
298 write_data(c, dst_inode, start, buf, len + pad);
303 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
304 u64 logical, u64 physical, u64 length)
306 struct bch_dev *ca = c->devs[0];
308 BUG_ON(logical & (block_bytes(c) - 1));
309 BUG_ON(physical & (block_bytes(c) - 1));
310 BUG_ON(length & (block_bytes(c) - 1));
316 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
319 struct bkey_i_extent *e;
321 u64 b = sector_to_bucket(ca, physical);
322 struct disk_reservation res;
326 sectors = min(ca->mi.bucket_size -
327 (physical & (ca->mi.bucket_size - 1)),
330 e = bkey_extent_init(&k.k);
331 e->k.p.inode = dst->bi_inum;
332 e->k.p.offset = logical + sectors;
334 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
337 .gen = bucket(ca, b)->mark.gen,
340 bucket_set_dirty(ca, b);
342 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
343 BCH_DISK_RESERVATION_NOFAIL);
345 die("error reserving space in new filesystem: %s",
348 bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
350 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
353 die("btree insert error %s", strerror(-ret));
355 bch2_disk_reservation_put(c, &res);
357 dst->bi_sectors += sectors;
364 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
367 ssize_t ret = readlink(src, buf, sizeof(buf));
369 die("readlink error: %m");
371 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
374 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
375 int src_fd, u64 src_size,
376 char *src_path, ranges *extents)
378 struct fiemap_iter iter;
379 struct fiemap_extent e;
381 fiemap_for_each(src_fd, iter, e)
382 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
387 fiemap_for_each(src_fd, iter, e) {
388 if ((e.fe_logical & (block_bytes(c) - 1)) ||
389 (e.fe_length & (block_bytes(c) - 1)))
390 die("Unaligned extent in %s - can't handle", src_path);
392 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
393 FIEMAP_EXTENT_ENCODED|
394 FIEMAP_EXTENT_NOT_ALIGNED|
395 FIEMAP_EXTENT_DATA_INLINE)) {
396 copy_data(c, dst, src_fd, e.fe_logical,
397 min(src_size - e.fe_logical,
403 * if the data is below 1 MB, copy it so it doesn't conflict
404 * with bcachefs's potentially larger superblock:
406 if (e.fe_physical < 1 << 20) {
407 copy_data(c, dst, src_fd, e.fe_logical,
408 min(src_size - e.fe_logical,
413 if ((e.fe_physical & (block_bytes(c) - 1)))
414 die("Unaligned extent in %s - can't handle", src_path);
416 range_add(extents, e.fe_physical, e.fe_length);
417 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
421 struct copy_fs_state {
425 GENRADIX(u64) hardlinks;
429 static void copy_dir(struct copy_fs_state *s,
431 struct bch_inode_unpacked *dst,
432 int src_fd, const char *src_path)
434 DIR *dir = fdopendir(src_fd);
437 while ((errno = 0), (d = readdir(dir))) {
438 struct bch_inode_unpacked inode;
442 die("chdir error: %m");
445 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
447 if (!strcmp(d->d_name, ".") ||
448 !strcmp(d->d_name, "..") ||
449 stat.st_ino == s->bcachefs_inum)
452 char *child_path = mprintf("%s/%s", src_path, d->d_name);
454 if (stat.st_dev != s->dev)
455 die("%s does not have correct st_dev!", child_path);
457 u64 *dst_inum = S_ISREG(stat.st_mode)
458 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
461 if (dst_inum && *dst_inum) {
462 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
466 inode = create_file(c, dst, d->d_name,
467 stat.st_uid, stat.st_gid,
468 stat.st_mode, stat.st_rdev);
471 *dst_inum = inode.bi_inum;
473 copy_times(c, &inode, &stat);
474 copy_xattrs(c, &inode, d->d_name);
478 switch (mode_to_type(stat.st_mode)) {
480 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
481 copy_dir(s, c, &inode, fd, child_path);
485 inode.bi_size = stat.st_size;
487 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
488 copy_file(c, &inode, fd, stat.st_size,
489 child_path, &s->extents);
493 inode.bi_size = stat.st_size;
495 copy_link(c, &inode, d->d_name);
502 /* nothing else to copy for these: */
508 update_inode(c, &inode);
514 die("readdir error: %m");
517 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
518 u64 size, u64 *bcachefs_inum, dev_t dev,
522 ? open(file_path, O_RDWR|O_CREAT, 0600)
523 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
525 die("Error creating %s for bcachefs metadata: %m",
528 struct stat statbuf = xfstat(fd);
530 if (statbuf.st_dev != dev)
531 die("bcachefs file has incorrect device");
533 *bcachefs_inum = statbuf.st_ino;
535 if (fallocate(fd, 0, 0, size))
536 die("Error reserving space for bcachefs metadata: %m");
540 struct fiemap_iter iter;
541 struct fiemap_extent e;
542 ranges extents = { NULL };
544 fiemap_for_each(fd, iter, e) {
545 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
546 FIEMAP_EXTENT_ENCODED|
547 FIEMAP_EXTENT_NOT_ALIGNED|
548 FIEMAP_EXTENT_DATA_INLINE))
549 die("Unable to continue: metadata file not fully mapped");
551 if ((e.fe_physical & (block_size - 1)) ||
552 (e.fe_length & (block_size - 1)))
553 die("Unable to continue: unaligned extents in metadata file");
555 range_add(&extents, e.fe_physical, e.fe_length);
559 ranges_sort_merge(&extents);
563 static void reserve_old_fs_space(struct bch_fs *c,
564 struct bch_inode_unpacked *root_inode,
567 struct bch_dev *ca = c->devs[0];
568 struct bch_inode_unpacked dst;
569 struct hole_iter iter;
572 dst = create_file(c, root_inode, "old_migrated_filesystem",
573 0, 0, S_IFREG|0400, 0);
574 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
576 ranges_sort_merge(extents);
578 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
579 link_data(c, &dst, i.start, i.start, i.end - i.start);
581 update_inode(c, &dst);
584 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
585 u64 bcachefs_inum, ranges *extents)
589 struct bch_inode_unpacked root_inode;
590 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
592 die("error looking up root directory: %s", strerror(-ret));
595 die("chdir error: %m");
597 struct stat stat = xfstat(src_fd);
598 copy_times(c, &root_inode, &stat);
599 copy_xattrs(c, &root_inode, ".");
601 struct copy_fs_state s = {
602 .bcachefs_inum = bcachefs_inum,
608 copy_dir(&s, c, &root_inode, src_fd, src_path);
610 reserve_old_fs_space(c, &root_inode, &s.extents);
612 update_inode(c, &root_inode);
614 darray_free(s.extents);
615 genradix_free(&s.hardlinks);
618 bch2_alloc_write(c, false, &wrote);
621 static void find_superblock_space(ranges extents, struct dev_opts *dev)
625 darray_foreach(i, extents) {
626 u64 start = round_up(max(256ULL << 10, i->start),
627 dev->bucket_size << 9);
628 u64 end = round_down(i->end,
629 dev->bucket_size << 9);
631 if (start + (128 << 10) <= end) {
632 dev->sb_offset = start >> 9;
633 dev->sb_end = dev->sb_offset + 256;
638 die("Couldn't find a valid location for superblock");
641 static void migrate_usage(void)
643 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
644 "Usage: bcachefs migrate [OPTION]...\n"
647 " -f fs Root of filesystem to migrate(s)\n"
648 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
649 " --no_passphrase Don't encrypt master encryption key\n"
650 " -F Force, even if metadata file already exists\n"
651 " -h Display this help and exit\n"
652 "Report bugs to <linux-bcache@vger.kernel.org>");
655 static const struct option migrate_opts[] = {
656 { "encrypted", no_argument, NULL, 'e' },
657 { "no_passphrase", no_argument, NULL, 'p' },
661 static int migrate_fs(const char *fs_path,
662 struct bch_opt_strs fs_opt_strs,
663 struct bch_opts fs_opts,
664 struct format_opts format_opts,
667 if (!path_is_fs_root(fs_path))
668 die("%s is not a filysestem root", fs_path);
670 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
671 struct stat stat = xfstat(fs_fd);
673 if (!S_ISDIR(stat.st_mode))
674 die("%s is not a directory", fs_path);
676 struct dev_opts dev = dev_opts_default();
678 dev.path = dev_t_to_path(stat.st_dev);
679 dev.fd = xopen(dev.path, O_RDWR);
681 opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
683 char *file_path = mprintf("%s/bcachefs", fs_path);
684 printf("Creating new filesystem on %s in space reserved at %s\n",
685 dev.path, file_path);
687 bch2_pick_bucket_size(fs_opts, &dev);
690 ranges extents = reserve_new_fs_space(file_path,
691 fs_opts.block_size << 9,
692 get_size(dev.path, dev.fd) / 5,
693 &bcachefs_inum, stat.st_dev, force);
695 find_superblock_space(extents, &dev);
697 struct bch_sb *sb = bch2_format(fs_opt_strs,
698 fs_opts,format_opts, &dev, 1);
699 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
701 if (format_opts.passphrase)
702 bch2_add_key(sb, format_opts.passphrase);
706 struct bch_opts opts = bch2_opts_empty();
707 struct bch_fs *c = NULL;
708 char *path[1] = { dev.path };
710 opt_set(opts, sb, sb_offset);
711 opt_set(opts, nostart, true);
712 opt_set(opts, noexcl, true);
714 c = bch2_fs_open(path, 1, opts);
716 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
718 mark_unreserved_space(c, extents);
720 const char *err = bch2_fs_start(c);
722 die("Error starting new filesystem: %s", err);
724 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
728 printf("Migrate complete, running fsck:\n");
729 opt_set(opts, nostart, false);
730 opt_set(opts, nochanges, true);
732 c = bch2_fs_open(path, 1, opts);
734 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
737 printf("fsck complete\n");
739 printf("To mount the new filesystem, run\n"
740 " mount -t bcachefs -o sb=%llu %s dir\n"
742 "After verifying that the new filesystem is correct, to create a\n"
743 "superblock at the default offset and finish the migration run\n"
744 " bcachefs migrate-superblock -d %s -o %llu\n"
746 "The new filesystem will have a file at /old_migrated_filestem\n"
747 "referencing all disk space that might be used by the existing\n"
748 "filesystem. That file can be deleted once the old filesystem is\n"
749 "no longer needed (and should be deleted prior to running\n"
750 "bcachefs migrate-superblock)\n",
751 sb_offset, dev.path, dev.path, sb_offset);
755 int cmd_migrate(int argc, char *argv[])
757 struct format_opts format_opts = format_opts_default();
758 char *fs_path = NULL;
759 bool no_passphrase = false, force = false;
762 struct bch_opt_strs fs_opt_strs =
763 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
764 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
766 while ((opt = getopt_long(argc, argv, "f:Fh",
767 migrate_opts, NULL)) != -1)
773 format_opts.encrypted = true;
776 no_passphrase = true;
787 die("Please specify a filesytem to migrate");
789 if (format_opts.encrypted && !no_passphrase)
790 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
792 return migrate_fs(fs_path,
798 static void migrate_superblock_usage(void)
800 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
801 "Usage: bcachefs migrate-superblock [OPTION]...\n"
804 " -d device Device to create superblock for\n"
805 " -o offset Offset of existing superblock\n"
806 " -h Display this help and exit\n"
807 "Report bugs to <linux-bcache@vger.kernel.org>");
810 int cmd_migrate_superblock(int argc, char *argv[])
816 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
822 ret = kstrtou64(optarg, 10, &offset);
824 die("Invalid offset");
827 migrate_superblock_usage();
832 die("Please specify a device");
835 die("Please specify offset of existing superblock");
837 int fd = xopen(dev, O_RDWR);
838 struct bch_sb *sb = __bch2_super_read(fd, offset);
840 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
841 die("Can't add superblock: no space left in superblock layout");
844 for (i = 0; i < sb->layout.nr_superblocks; i++)
845 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
846 die("Superblock layout already has default superblock");
848 memmove(&sb->layout.sb_offset[1],
849 &sb->layout.sb_offset[0],
850 sb->layout.nr_superblocks * sizeof(u64));
851 sb->layout.nr_superblocks++;
853 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
855 bch2_super_write(fd, sb);