9 #include <sys/sysmacros.h>
10 #include <sys/types.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs-common.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 /* XXX cut and pasted from fsck.c */
42 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
44 static char *dev_t_to_path(dev_t dev)
46 char link[PATH_MAX], *p;
49 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
50 major(dev), minor(dev));
51 ret = readlink(sysfs_dev, link, sizeof(link));
54 if (ret < 0 || ret >= sizeof(link))
55 die("readlink error while looking up block device: %m");
59 p = strrchr(link, '/');
61 die("error looking up device name");
64 return mprintf("/dev/%s", p);
67 static bool path_is_fs_root(const char *path)
69 char *line = NULL, *p, *mount;
74 f = fopen("/proc/self/mountinfo", "r");
76 die("Error getting mount information");
78 while (getline(&line, &n, f) != -1) {
81 strsep(&p, " "); /* mount id */
82 strsep(&p, " "); /* parent id */
83 strsep(&p, " "); /* dev */
84 strsep(&p, " "); /* root */
85 mount = strsep(&p, " ");
88 if (mount && !strcmp(path, mount))
99 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
101 struct bch_dev *ca = c->devs[0];
102 struct hole_iter iter;
105 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
108 if (i.start == i.end)
111 b = sector_to_bucket(ca, i.start >> 9);
113 set_bit(b, ca->buckets_nouse);
115 } while (bucket_to_sector(ca, b) << 9 < i.end);
119 static void update_inode(struct bch_fs *c,
120 struct bch_inode_unpacked *inode)
122 struct bkey_inode_buf packed;
125 bch2_inode_pack(&packed, inode);
126 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
129 die("error updating inode: %s", strerror(-ret));
132 static void create_link(struct bch_fs *c,
133 struct bch_inode_unpacked *parent,
134 const char *name, u64 inum, mode_t mode)
136 struct qstr qstr = QSTR(name);
137 struct bch_inode_unpacked parent_u;
138 struct bch_inode_unpacked inode;
140 int ret = bch2_trans_do(c, NULL, NULL, 0,
141 bch2_link_trans(&trans, parent->bi_inum, inum,
142 &parent_u, &inode, &qstr));
144 die("error creating hardlink: %s", strerror(-ret));
147 static struct bch_inode_unpacked create_file(struct bch_fs *c,
148 struct bch_inode_unpacked *parent,
150 uid_t uid, gid_t gid,
151 mode_t mode, dev_t rdev)
153 struct qstr qstr = QSTR(name);
154 struct bch_inode_unpacked new_inode;
156 int ret = bch2_trans_do(c, NULL, NULL, 0,
157 bch2_create_trans(&trans,
158 parent->bi_inum, parent,
160 uid, gid, mode, rdev, NULL, NULL));
162 die("error creating file: %s", strerror(-ret));
167 #define for_each_xattr_handler(handlers, handler) \
169 for ((handler) = *(handlers)++; \
171 (handler) = *(handlers)++)
173 static const struct xattr_handler *xattr_resolve_name(char **name)
175 const struct xattr_handler **handlers = bch2_xattr_handlers;
176 const struct xattr_handler *handler;
178 for_each_xattr_handler(handlers, handler) {
181 n = strcmp_prefix(*name, xattr_prefix(handler));
183 if (!handler->prefix ^ !*n) {
186 return ERR_PTR(-EINVAL);
192 return ERR_PTR(-EOPNOTSUPP);
195 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
198 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
199 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
200 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
203 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
206 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
208 char attrs[XATTR_LIST_MAX];
209 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
211 die("listxattr error: %m");
215 attr < attrs + attrs_size;
217 next = attr + strlen(attr) + 1;
219 char val[XATTR_SIZE_MAX];
220 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
223 die("error getting xattr val: %m");
225 const struct xattr_handler *h = xattr_resolve_name(&attr);
227 int ret = bch2_trans_do(c, NULL, NULL, 0,
228 bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
229 val, val_size, h->flags, 0));
231 die("error creating xattr: %s", strerror(-ret));
235 static char buf[1 << 20] __aligned(PAGE_SIZE);
237 static void write_data(struct bch_fs *c,
238 struct bch_inode_unpacked *dst_inode,
239 u64 dst_offset, void *buf, size_t len)
242 struct bch_write_op op;
243 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
247 BUG_ON(dst_offset & (block_bytes(c) - 1));
248 BUG_ON(len & (block_bytes(c) - 1));
250 closure_init_stack(&cl);
252 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
253 bch2_bio_map(&o.op.wbio.bio, buf, len);
255 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
256 o.op.write_point = writepoint_hashed(0);
257 o.op.nr_replicas = 1;
258 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
260 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
261 c->opts.data_replicas, 0);
263 die("error reserving space in new filesystem: %s", strerror(-ret));
265 closure_call(&o.op.cl, bch2_write, NULL, &cl);
268 dst_inode->bi_sectors += len >> 9;
271 static void copy_data(struct bch_fs *c,
272 struct bch_inode_unpacked *dst_inode,
273 int src_fd, u64 start, u64 end)
275 while (start < end) {
276 unsigned len = min_t(u64, end - start, sizeof(buf));
277 unsigned pad = round_up(len, block_bytes(c)) - len;
279 xpread(src_fd, buf, len, start);
280 memset(buf + len, 0, pad);
282 write_data(c, dst_inode, start, buf, len + pad);
287 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
288 u64 logical, u64 physical, u64 length)
290 struct bch_dev *ca = c->devs[0];
292 BUG_ON(logical & (block_bytes(c) - 1));
293 BUG_ON(physical & (block_bytes(c) - 1));
294 BUG_ON(length & (block_bytes(c) - 1));
300 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
303 struct bkey_i_extent *e;
305 u64 b = sector_to_bucket(ca, physical);
306 struct disk_reservation res;
310 sectors = min(ca->mi.bucket_size -
311 (physical & (ca->mi.bucket_size - 1)),
314 e = bkey_extent_init(&k.k);
315 e->k.p.inode = dst->bi_inum;
316 e->k.p.offset = logical + sectors;
318 bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
321 .gen = bucket(ca, b)->mark.gen,
324 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
325 BCH_DISK_RESERVATION_NOFAIL);
327 die("error reserving space in new filesystem: %s",
330 bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
332 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
335 die("btree insert error %s", strerror(-ret));
337 bch2_disk_reservation_put(c, &res);
339 dst->bi_sectors += sectors;
346 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
349 ssize_t ret = readlink(src, buf, sizeof(buf));
351 die("readlink error: %m");
353 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
356 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
357 int src_fd, u64 src_size,
358 char *src_path, ranges *extents)
360 struct fiemap_iter iter;
361 struct fiemap_extent e;
363 fiemap_for_each(src_fd, iter, e)
364 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
369 fiemap_for_each(src_fd, iter, e) {
370 if ((e.fe_logical & (block_bytes(c) - 1)) ||
371 (e.fe_length & (block_bytes(c) - 1)))
372 die("Unaligned extent in %s - can't handle", src_path);
374 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
375 FIEMAP_EXTENT_ENCODED|
376 FIEMAP_EXTENT_NOT_ALIGNED|
377 FIEMAP_EXTENT_DATA_INLINE)) {
378 copy_data(c, dst, src_fd, e.fe_logical,
379 min(src_size - e.fe_logical,
385 * if the data is below 1 MB, copy it so it doesn't conflict
386 * with bcachefs's potentially larger superblock:
388 if (e.fe_physical < 1 << 20) {
389 copy_data(c, dst, src_fd, e.fe_logical,
390 min(src_size - e.fe_logical,
395 if ((e.fe_physical & (block_bytes(c) - 1)))
396 die("Unaligned extent in %s - can't handle", src_path);
398 range_add(extents, e.fe_physical, e.fe_length);
399 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
403 struct copy_fs_state {
407 GENRADIX(u64) hardlinks;
411 static void copy_dir(struct copy_fs_state *s,
413 struct bch_inode_unpacked *dst,
414 int src_fd, const char *src_path)
416 DIR *dir = fdopendir(src_fd);
419 while ((errno = 0), (d = readdir(dir))) {
420 struct bch_inode_unpacked inode;
424 die("chdir error: %m");
427 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
429 if (!strcmp(d->d_name, ".") ||
430 !strcmp(d->d_name, "..") ||
431 stat.st_ino == s->bcachefs_inum)
434 char *child_path = mprintf("%s/%s", src_path, d->d_name);
436 if (stat.st_dev != s->dev)
437 die("%s does not have correct st_dev!", child_path);
439 u64 *dst_inum = S_ISREG(stat.st_mode)
440 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
443 if (dst_inum && *dst_inum) {
444 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
448 inode = create_file(c, dst, d->d_name,
449 stat.st_uid, stat.st_gid,
450 stat.st_mode, stat.st_rdev);
453 *dst_inum = inode.bi_inum;
455 copy_times(c, &inode, &stat);
456 copy_xattrs(c, &inode, d->d_name);
460 switch (mode_to_type(stat.st_mode)) {
462 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
463 copy_dir(s, c, &inode, fd, child_path);
467 inode.bi_size = stat.st_size;
469 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
470 copy_file(c, &inode, fd, stat.st_size,
471 child_path, &s->extents);
475 inode.bi_size = stat.st_size;
477 copy_link(c, &inode, d->d_name);
484 /* nothing else to copy for these: */
490 update_inode(c, &inode);
496 die("readdir error: %m");
499 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
500 u64 size, u64 *bcachefs_inum, dev_t dev,
504 ? open(file_path, O_RDWR|O_CREAT, 0600)
505 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
507 die("Error creating %s for bcachefs metadata: %m",
510 struct stat statbuf = xfstat(fd);
512 if (statbuf.st_dev != dev)
513 die("bcachefs file has incorrect device");
515 *bcachefs_inum = statbuf.st_ino;
517 if (fallocate(fd, 0, 0, size))
518 die("Error reserving space for bcachefs metadata: %m");
522 struct fiemap_iter iter;
523 struct fiemap_extent e;
524 ranges extents = { NULL };
526 fiemap_for_each(fd, iter, e) {
527 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
528 FIEMAP_EXTENT_ENCODED|
529 FIEMAP_EXTENT_NOT_ALIGNED|
530 FIEMAP_EXTENT_DATA_INLINE))
531 die("Unable to continue: metadata file not fully mapped");
533 if ((e.fe_physical & (block_size - 1)) ||
534 (e.fe_length & (block_size - 1)))
535 die("Unable to continue: unaligned extents in metadata file");
537 range_add(&extents, e.fe_physical, e.fe_length);
541 ranges_sort_merge(&extents);
545 static void reserve_old_fs_space(struct bch_fs *c,
546 struct bch_inode_unpacked *root_inode,
549 struct bch_dev *ca = c->devs[0];
550 struct bch_inode_unpacked dst;
551 struct hole_iter iter;
554 dst = create_file(c, root_inode, "old_migrated_filesystem",
555 0, 0, S_IFREG|0400, 0);
556 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
558 ranges_sort_merge(extents);
560 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
561 link_data(c, &dst, i.start, i.start, i.end - i.start);
563 update_inode(c, &dst);
566 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
567 u64 bcachefs_inum, ranges *extents)
571 struct bch_inode_unpacked root_inode;
572 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
574 die("error looking up root directory: %s", strerror(-ret));
577 die("chdir error: %m");
579 struct stat stat = xfstat(src_fd);
580 copy_times(c, &root_inode, &stat);
581 copy_xattrs(c, &root_inode, ".");
583 struct copy_fs_state s = {
584 .bcachefs_inum = bcachefs_inum,
590 copy_dir(&s, c, &root_inode, src_fd, src_path);
592 reserve_old_fs_space(c, &root_inode, &s.extents);
594 update_inode(c, &root_inode);
596 darray_free(s.extents);
597 genradix_free(&s.hardlinks);
600 bch2_alloc_write(c, false, &wrote);
603 static void find_superblock_space(ranges extents, struct dev_opts *dev)
607 darray_foreach(i, extents) {
608 u64 start = round_up(max(256ULL << 10, i->start),
609 dev->bucket_size << 9);
610 u64 end = round_down(i->end,
611 dev->bucket_size << 9);
613 if (start + (128 << 10) <= end) {
614 dev->sb_offset = start >> 9;
615 dev->sb_end = dev->sb_offset + 256;
620 die("Couldn't find a valid location for superblock");
623 static void migrate_usage(void)
625 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
626 "Usage: bcachefs migrate [OPTION]...\n"
629 " -f fs Root of filesystem to migrate(s)\n"
630 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
631 " --no_passphrase Don't encrypt master encryption key\n"
632 " -F Force, even if metadata file already exists\n"
633 " -h Display this help and exit\n"
634 "Report bugs to <linux-bcache@vger.kernel.org>");
637 static const struct option migrate_opts[] = {
638 { "encrypted", no_argument, NULL, 'e' },
639 { "no_passphrase", no_argument, NULL, 'p' },
643 static int migrate_fs(const char *fs_path,
644 struct bch_opt_strs fs_opt_strs,
645 struct bch_opts fs_opts,
646 struct format_opts format_opts,
649 if (!path_is_fs_root(fs_path))
650 die("%s is not a filysestem root", fs_path);
652 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
653 struct stat stat = xfstat(fs_fd);
655 if (!S_ISDIR(stat.st_mode))
656 die("%s is not a directory", fs_path);
658 struct dev_opts dev = dev_opts_default();
660 dev.path = dev_t_to_path(stat.st_dev);
661 dev.fd = xopen(dev.path, O_RDWR);
663 opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
665 char *file_path = mprintf("%s/bcachefs", fs_path);
666 printf("Creating new filesystem on %s in space reserved at %s\n",
667 dev.path, file_path);
669 bch2_pick_bucket_size(fs_opts, &dev);
672 ranges extents = reserve_new_fs_space(file_path,
673 fs_opts.block_size << 9,
674 get_size(dev.path, dev.fd) / 5,
675 &bcachefs_inum, stat.st_dev, force);
677 find_superblock_space(extents, &dev);
679 struct bch_sb *sb = bch2_format(fs_opt_strs,
680 fs_opts,format_opts, &dev, 1);
681 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
683 if (format_opts.passphrase)
684 bch2_add_key(sb, format_opts.passphrase);
688 struct bch_opts opts = bch2_opts_empty();
689 struct bch_fs *c = NULL;
690 char *path[1] = { dev.path };
692 opt_set(opts, sb, sb_offset);
693 opt_set(opts, nostart, true);
694 opt_set(opts, noexcl, true);
696 c = bch2_fs_open(path, 1, opts);
698 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
700 mark_unreserved_space(c, extents);
702 int ret = bch2_fs_start(c);
704 die("Error starting new filesystem: %s", strerror(-ret));
706 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
710 printf("Migrate complete, running fsck:\n");
711 opt_set(opts, nostart, false);
712 opt_set(opts, nochanges, true);
714 c = bch2_fs_open(path, 1, opts);
716 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
719 printf("fsck complete\n");
721 printf("To mount the new filesystem, run\n"
722 " mount -t bcachefs -o sb=%llu %s dir\n"
724 "After verifying that the new filesystem is correct, to create a\n"
725 "superblock at the default offset and finish the migration run\n"
726 " bcachefs migrate-superblock -d %s -o %llu\n"
728 "The new filesystem will have a file at /old_migrated_filestem\n"
729 "referencing all disk space that might be used by the existing\n"
730 "filesystem. That file can be deleted once the old filesystem is\n"
731 "no longer needed (and should be deleted prior to running\n"
732 "bcachefs migrate-superblock)\n",
733 sb_offset, dev.path, dev.path, sb_offset);
737 int cmd_migrate(int argc, char *argv[])
739 struct format_opts format_opts = format_opts_default();
740 char *fs_path = NULL;
741 bool no_passphrase = false, force = false;
744 struct bch_opt_strs fs_opt_strs =
745 bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
746 struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
748 while ((opt = getopt_long(argc, argv, "f:Fh",
749 migrate_opts, NULL)) != -1)
755 format_opts.encrypted = true;
758 no_passphrase = true;
769 die("Please specify a filesystem to migrate");
771 if (format_opts.encrypted && !no_passphrase)
772 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
774 return migrate_fs(fs_path,
780 static void migrate_superblock_usage(void)
782 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
783 "Usage: bcachefs migrate-superblock [OPTION]...\n"
786 " -d device Device to create superblock for\n"
787 " -o offset Offset of existing superblock\n"
788 " -h Display this help and exit\n"
789 "Report bugs to <linux-bcache@vger.kernel.org>");
792 int cmd_migrate_superblock(int argc, char *argv[])
798 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
804 ret = kstrtou64(optarg, 10, &offset);
806 die("Invalid offset");
809 migrate_superblock_usage();
814 die("Please specify a device");
817 die("Please specify offset of existing superblock");
819 int fd = xopen(dev, O_RDWR);
820 struct bch_sb *sb = __bch2_super_read(fd, offset);
822 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
823 die("Can't add superblock: no space left in superblock layout");
826 for (i = 0; i < sb->layout.nr_superblocks; i++)
827 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
828 die("Superblock layout already has default superblock");
830 memmove(&sb->layout.sb_offset[1],
831 &sb->layout.sb_offset[0],
832 sb->layout.nr_superblocks * sizeof(u64));
833 sb->layout.nr_superblocks++;
835 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
837 bch2_super_write(fd, sb);