1 #include </usr/include/dirent.h>
8 #include <sys/sysmacros.h>
12 #include <attr/xattr.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcache.h"
23 #include "linux/bcache.h"
25 #include <linux/dcache.h>
26 #include <linux/generic-radix-tree.h>
27 #include <linux/xattr.h>
28 #include "btree_update.h"
38 static char *dev_t_to_path(dev_t dev)
40 char link[PATH_MAX], *p;
43 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
44 major(dev), minor(dev));
45 ret = readlink(sysfs_dev, link, sizeof(link));
48 if (ret < 0 || ret >= sizeof(link))
49 die("readlink error while looking up block device: %s", strerror(errno));
53 p = strrchr(link, '/');
55 die("error looking up device name");
58 return mprintf("/dev/%s", p);
61 static bool path_is_fs_root(char *path)
63 char *line = NULL, *p, *mount;
68 f = fopen("/proc/self/mountinfo", "r");
70 die("Error getting mount information");
72 while (getline(&line, &n, f) != -1) {
75 strsep(&p, " "); /* mount id */
76 strsep(&p, " "); /* parent id */
77 strsep(&p, " "); /* dev */
78 strsep(&p, " "); /* root */
79 mount = strsep(&p, " ");
82 if (mount && !strcmp(path, mount))
93 static void mark_unreserved_space(struct cache_set *c, ranges extents)
95 struct cache *ca = c->cache[0];
96 struct hole_iter iter;
99 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
100 struct bucket_mark new;
103 if (i.start == i.end)
106 b = sector_to_bucket(ca, i.start >> 9);
108 bucket_cmpxchg(&ca->buckets[b], new, new.nouse = 1);
110 } while (bucket_to_sector(ca, b) << 9 < i.end);
114 static void update_inode(struct cache_set *c,
115 struct bch_inode_unpacked *inode)
117 struct bkey_inode_buf packed;
120 bch_inode_pack(&packed, inode);
121 ret = bch_btree_update(c, BTREE_ID_INODES, &packed.inode.k_i, NULL);
123 die("error creating file: %s", strerror(-ret));
126 static void create_dirent(struct cache_set *c,
127 struct bch_inode_unpacked *parent,
128 const char *name, u64 inum, mode_t mode)
130 struct bch_hash_info parent_hash_info = bch_hash_info_init(parent);
131 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
133 int ret = bch_dirent_create(c, parent->inum, &parent_hash_info,
134 mode_to_type(mode), &qname,
135 inum, NULL, BCH_HASH_SET_MUST_CREATE);
137 die("error creating file: %s", strerror(-ret));
143 static void create_link(struct cache_set *c,
144 struct bch_inode_unpacked *parent,
145 const char *name, u64 inum, mode_t mode)
147 struct bch_inode_unpacked inode;
148 int ret = bch_inode_find_by_inum(c, inum, &inode);
150 die("error looking up hardlink: %s", strerror(-ret));
153 update_inode(c, &inode);
155 create_dirent(c, parent, name, inum, mode);
158 static struct bch_inode_unpacked create_file(struct cache_set *c,
159 struct bch_inode_unpacked *parent,
161 uid_t uid, gid_t gid,
162 mode_t mode, dev_t rdev)
164 struct bch_inode_unpacked new_inode;
165 struct bkey_inode_buf packed;
168 bch_inode_init(c, &new_inode, uid, gid, mode, rdev);
169 bch_inode_pack(&packed, &new_inode);
171 ret = bch_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
172 &c->unused_inode_hint);
174 die("error creating file: %s", strerror(-ret));
176 new_inode.inum = packed.inode.k.p.inode;
177 create_dirent(c, parent, name, new_inode.inum, mode);
182 #define for_each_xattr_handler(handlers, handler) \
184 for ((handler) = *(handlers)++; \
186 (handler) = *(handlers)++)
188 static const struct xattr_handler *xattr_resolve_name(const char **name)
190 const struct xattr_handler **handlers = bch_xattr_handlers;
191 const struct xattr_handler *handler;
193 for_each_xattr_handler(handlers, handler) {
196 n = strcmp_prefix(*name, xattr_prefix(handler));
198 if (!handler->prefix ^ !*n) {
201 return ERR_PTR(-EINVAL);
207 return ERR_PTR(-EOPNOTSUPP);
210 static void copy_times(struct cache_set *c, struct bch_inode_unpacked *dst,
213 dst->i_atime = timespec_to_bch_time(c, src->st_atim);
214 dst->i_mtime = timespec_to_bch_time(c, src->st_mtim);
215 dst->i_ctime = timespec_to_bch_time(c, src->st_ctim);
218 static void copy_xattrs(struct cache_set *c, struct bch_inode_unpacked *dst,
221 struct bch_hash_info hash_info = bch_hash_info_init(dst);
222 ssize_t size = llistxattr(src, NULL, 0);
224 die("listxattr error: %s", strerror(errno));
229 char *buf = malloc(size);
230 size = llistxattr(src, buf, size);
232 die("listxattr error: %s", strerror(errno));
234 for (const char *next, *attr = buf;
237 next = attr + strlen(attr) + 1;
239 /* max possible xattr val: */
240 static char val[64 << 10];
241 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
244 die("error getting xattr val: %s", strerror(errno));
246 const struct xattr_handler *h = xattr_resolve_name(&attr);
248 int ret = __bch_xattr_set(c, dst->inum, &hash_info, attr,
249 val, val_size, 0, h->flags, NULL);
251 die("error creating xattr: %s", strerror(-ret));
257 static void write_data(struct cache_set *c,
258 struct bch_inode_unpacked *dst_inode,
259 u64 dst_offset, void *buf, size_t len)
261 struct disk_reservation res;
262 struct bch_write_op op;
263 struct bch_write_bio bio;
267 BUG_ON(dst_offset & (block_bytes(c) - 1));
268 BUG_ON(len & (block_bytes(c) - 1));
270 closure_init_stack(&cl);
273 bio.bio.bi_max_vecs = 1;
274 bio.bio.bi_io_vec = &bv;
275 bio.bio.bi_iter.bi_size = len;
276 bch_bio_map(&bio.bio, buf);
278 int ret = bch_disk_reservation_get(c, &res, len >> 9, 0);
280 die("error reserving space in new filesystem: %s", strerror(-ret));
282 bch_write_op_init(&op, c, &bio, res, c->write_points,
283 POS(dst_inode->inum, dst_offset >> 9), NULL, 0);
284 closure_call(&op.cl, bch_write, NULL, &cl);
287 dst_inode->i_sectors += len >> 9;
290 static char buf[1 << 20] __aligned(PAGE_SIZE);
292 static void copy_data(struct cache_set *c,
293 struct bch_inode_unpacked *dst_inode,
294 int src_fd, u64 start, u64 end)
296 while (start < end) {
297 unsigned len = min_t(u64, end - start, sizeof(buf));
299 xpread(src_fd, buf, len, start);
300 write_data(c, dst_inode, start, buf, len);
305 static void link_data(struct cache_set *c, struct bch_inode_unpacked *dst,
306 u64 logical, u64 physical, u64 length)
308 struct cache *ca = c->cache[0];
310 BUG_ON(logical & (block_bytes(c) - 1));
311 BUG_ON(physical & (block_bytes(c) - 1));
312 BUG_ON(length & (block_bytes(c) - 1));
318 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
321 struct bkey_i_extent *e;
323 u64 b = sector_to_bucket(ca, physical >> 9);
324 struct disk_reservation res;
328 sectors = min(ca->mi.bucket_size -
329 (physical & (ca->mi.bucket_size - 1)),
332 e = bkey_extent_init(&k.k);
333 e->k.p.inode = dst->inum;
334 e->k.p.offset = logical + sectors;
336 extent_ptr_append(e, (struct bch_extent_ptr) {
339 .gen = ca->buckets[b].mark.gen,
342 ret = bch_disk_reservation_get(c, &res, sectors,
343 BCH_DISK_RESERVATION_NOFAIL);
345 die("error reserving space in new filesystem: %s",
348 ret = bch_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
349 &res, NULL, NULL, 0);
351 die("btree insert error %s", strerror(-ret));
353 bch_disk_reservation_put(c, &res);
355 dst->i_sectors += sectors;
362 static void copy_link(struct cache_set *c, struct bch_inode_unpacked *dst,
365 ssize_t ret = readlink(src, buf, sizeof(buf));
367 die("readlink error: %s", strerror(errno));
369 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
372 static void copy_file(struct cache_set *c, struct bch_inode_unpacked *dst,
373 int src, char *src_path, ranges *extents)
375 struct fiemap_iter iter;
376 struct fiemap_extent e;
378 fiemap_for_each(src, iter, e)
379 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
384 fiemap_for_each(src, iter, e) {
385 if ((e.fe_logical & (block_bytes(c) - 1)) ||
386 (e.fe_length & (block_bytes(c) - 1)))
387 die("Unaligned extent in %s - can't handle", src_path);
389 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
390 FIEMAP_EXTENT_ENCODED|
391 FIEMAP_EXTENT_NOT_ALIGNED|
392 FIEMAP_EXTENT_DATA_INLINE)) {
395 round_down(e.fe_logical, block_bytes(c)),
396 round_up(e.fe_logical + e.fe_length,
401 if ((e.fe_physical & (block_bytes(c) - 1)))
402 die("Unaligned extent in %s - can't handle", src_path);
404 range_add(extents, e.fe_physical, e.fe_length);
405 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
409 struct copy_fs_state {
413 GENRADIX(u64) hardlinks;
417 static void copy_dir(struct copy_fs_state *s,
419 struct bch_inode_unpacked *dst,
420 int src_fd, const char *src_path)
422 DIR *dir = fdopendir(src_fd);
425 while ((errno = 0), (d = readdir(dir))) {
426 struct bch_inode_unpacked inode;
430 die("chdir error: %s", strerror(errno));
433 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
435 if (!strcmp(d->d_name, ".") ||
436 !strcmp(d->d_name, "..") ||
437 stat.st_ino == s->bcachefs_inum)
440 char *child_path = mprintf("%s/%s", src_path, d->d_name);
442 if (stat.st_dev != s->dev)
443 die("%s does not have correct st_dev!", child_path);
445 u64 *dst_inum = S_ISREG(stat.st_mode)
446 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
449 if (dst_inum && *dst_inum) {
450 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
454 inode = create_file(c, dst, d->d_name,
455 stat.st_uid, stat.st_gid,
456 stat.st_mode, stat.st_rdev);
459 *dst_inum = inode.inum;
461 copy_times(c, &inode, &stat);
462 copy_xattrs(c, &inode, d->d_name);
466 switch (mode_to_type(stat.st_mode)) {
468 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
469 copy_dir(s, c, &inode, fd, child_path);
473 inode.i_size = stat.st_size;
475 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
476 copy_file(c, &inode, fd, child_path, &s->extents);
480 inode.i_size = stat.st_size;
482 copy_link(c, &inode, d->d_name);
489 /* nothing else to copy for these: */
495 update_inode(c, &inode);
501 die("readdir error: %s", strerror(errno));
504 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
505 u64 size, u64 *bcachefs_inum, dev_t dev,
509 ? open(file_path, O_RDWR|O_CREAT, 0600)
510 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
512 die("Error creating %s for bcachefs metadata: %s",
513 file_path, strerror(errno));
515 struct stat statbuf = xfstat(fd);
517 if (statbuf.st_dev != dev)
518 die("bcachefs file has incorrect device");
520 *bcachefs_inum = statbuf.st_ino;
522 if (fallocate(fd, 0, 0, size))
523 die("Error reserving space for bcachefs metadata: %s",
528 struct fiemap_iter iter;
529 struct fiemap_extent e;
530 ranges extents = { NULL };
532 fiemap_for_each(fd, iter, e) {
533 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
534 FIEMAP_EXTENT_ENCODED|
535 FIEMAP_EXTENT_NOT_ALIGNED|
536 FIEMAP_EXTENT_DATA_INLINE))
537 die("Unable to continue: metadata file not fully mapped");
539 if ((e.fe_physical & (block_size - 1)) ||
540 (e.fe_length & (block_size - 1)))
541 die("Unable to continue: unaligned extents in metadata file");
543 range_add(&extents, e.fe_physical, e.fe_length);
547 ranges_sort_merge(&extents);
551 static void reserve_old_fs_space(struct cache_set *c,
552 struct bch_inode_unpacked *root_inode,
555 struct cache *ca = c->cache[0];
556 struct bch_inode_unpacked dst;
557 struct hole_iter iter;
560 dst = create_file(c, root_inode, "old_migrated_filesystem",
561 0, 0, S_IFREG|0400, 0);
562 dst.i_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
564 ranges_sort_merge(extents);
566 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
567 link_data(c, &dst, i.start, i.start, i.end - i.start);
569 update_inode(c, &dst);
572 static void copy_fs(struct cache_set *c, int src_fd, const char *src_path,
573 u64 bcachefs_inum, ranges *extents)
577 struct bch_inode_unpacked root_inode;
578 int ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, &root_inode);
580 die("error looking up root directory: %s", strerror(-ret));
583 die("chdir error: %s", strerror(errno));
585 struct stat stat = xfstat(src_fd);
586 copy_times(c, &root_inode, &stat);
587 copy_xattrs(c, &root_inode, ".");
589 struct copy_fs_state s = {
590 .bcachefs_inum = bcachefs_inum,
596 copy_dir(&s, c, &root_inode, src_fd, src_path);
598 reserve_old_fs_space(c, &root_inode, &s.extents);
600 update_inode(c, &root_inode);
602 darray_free(s.extents);
603 genradix_free(&s.hardlinks);
606 static void find_superblock_space(ranges extents, struct dev_opts *dev)
609 darray_foreach(i, extents) {
610 u64 offset = max(256ULL << 10, i->start);
612 if (offset + (128 << 10) <= i->end) {
613 dev->sb_offset = offset >> 9;
614 dev->sb_end = dev->sb_offset + 256;
619 die("Couldn't find a valid location for superblock");
622 static void migrate_usage(void)
624 puts("bcache migrate - migrate an existing filesystem to bcachefs\n"
625 "Usage: bcache migrate [OPTION]...\n"
628 " -f fs Root of filesystem to migrate(s)\n"
629 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
630 " --no_passphrase Don't encrypt master encryption key\n"
631 " -F Force, even if metadata file already exists\n"
632 " -h Display this help and exit\n"
633 "Report bugs to <linux-bcache@vger.kernel.org>");
636 static const struct option migrate_opts[] = {
637 { "encrypted", no_argument, NULL, 'e' },
638 { "no_passphrase", no_argument, NULL, 'p' },
642 int cmd_migrate(int argc, char *argv[])
644 struct format_opts format_opts = format_opts_default();
645 char *fs_path = NULL;
647 bool no_passphrase = false, force = false;
650 while ((opt = getopt_long(argc, argv, "f:Fh",
651 migrate_opts, NULL)) != -1)
657 format_opts.encrypted = true;
660 no_passphrase = true;
671 die("Please specify a filesytem to migrate");
673 if (!path_is_fs_root(fs_path))
674 die("%s is not a filysestem root", fs_path);
676 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
677 struct stat stat = xfstat(fs_fd);
679 if (!S_ISDIR(stat.st_mode))
680 die("%s is not a directory", fs_path);
682 struct dev_opts dev = { 0 };
684 dev.path = dev_t_to_path(stat.st_dev);
685 dev.fd = xopen(dev.path, O_RDWR);
687 block_size = min_t(unsigned, stat.st_blksize,
688 get_blocksize(dev.path, dev.fd) << 9);
690 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
691 format_opts.block_size = block_size >> 9;
694 char *file_path = mprintf("%s/bcachefs", fs_path);
696 ranges extents = reserve_new_fs_space(file_path,
697 block_size, get_size(dev.path, dev.fd) / 5,
698 &bcachefs_inum, stat.st_dev, force);
700 find_superblock_space(extents, &dev);
702 if (format_opts.encrypted && !no_passphrase) {
703 format_opts.passphrase = read_passphrase("Enter passphrase: ");
705 if (isatty(STDIN_FILENO)) {
707 read_passphrase("Enter same passphrase again: ");
709 if (strcmp(format_opts.passphrase, pass2)) {
710 memzero_explicit(format_opts.passphrase,
711 strlen(format_opts.passphrase));
712 memzero_explicit(pass2, strlen(pass2));
713 die("Passphrases do not match");
716 memzero_explicit(pass2, strlen(pass2));
721 struct bch_sb *sb = bcache_format(format_opts, &dev, 1);
722 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
724 if (format_opts.passphrase)
725 add_bcache_key(sb, format_opts.passphrase);
729 printf("Creating new filesystem on %s in space reserved at %s\n"
731 " mount -t bcache -o sb=%llu %s dir\n"
733 "After verifying that the new filesystem is correct, to create a\n"
734 "superblock at the default offset and finish the migration run\n"
735 " bcache migrate_superblock -d %s -o %llu\n"
737 "The new filesystem will have a file at /old_migrated_filestem\n"
738 "referencing all disk space that might be used by the existing\n"
739 "filesystem. That file can be deleted once the old filesystem is\n"
740 "no longer needed (and should be deleted prior to running\n"
741 "bcache migrate_superblock)\n",
742 dev.path, file_path, sb_offset, dev.path,
743 dev.path, sb_offset);
745 struct bch_opts opts = bch_opts_empty();
746 struct cache_set *c = NULL;
747 char *path[1] = { dev.path };
754 err = bch_fs_open(path, 1, opts, &c);
756 die("Error opening new filesystem: %s", err);
758 mark_unreserved_space(c, extents);
760 err = bch_fs_start(c);
762 die("Error starting new filesystem: %s", err);
764 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
768 printf("Migrate complete, running fsck:\n");
769 opts.nostart = false;
770 opts.nochanges = true;
771 fsck_err_opt = FSCK_ERR_NO;
773 err = bch_fs_open(path, 1, opts, &c);
775 die("Error opening new filesystem: %s", err);
778 printf("fsck complete\n");
782 static void migrate_superblock_usage(void)
784 puts("bcache migrate_superblock - create default superblock after migrating\n"
785 "Usage: bcache migrate_superblock [OPTION]...\n"
788 " -d device Device to create superblock for\n"
789 " -o offset Offset of existing superblock\n"
790 " -h Display this help and exit\n"
791 "Report bugs to <linux-bcache@vger.kernel.org>");
794 int cmd_migrate_superblock(int argc, char *argv[])
800 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
806 ret = kstrtou64(optarg, 10, &offset);
808 die("Invalid offset");
811 migrate_superblock_usage();
816 die("Please specify a device");
819 die("Please specify offset of existing superblock");
821 int fd = xopen(dev, O_RDWR);
822 struct bch_sb *sb = __bcache_super_read(fd, offset);
824 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
825 die("Can't add superblock: no space left in superblock layout");
827 for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
828 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
829 die("Superblock layout already has default superblock");
831 memmove(&sb->layout.sb_offset[1],
832 &sb->layout.sb_offset[0],
833 sb->layout.nr_superblocks * sizeof(u64));
834 sb->layout.nr_superblocks++;
836 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
838 bcache_super_write(fd, sb);