8 #include <sys/sysmacros.h>
12 #include <attr/xattr.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/btree_update.h"
29 #include "libbcachefs/buckets.h"
30 #include "libbcachefs/dirent.h"
31 #include "libbcachefs/fs.h"
32 #include "libbcachefs/inode.h"
33 #include "libbcachefs/io.h"
34 #include "libbcachefs/str_hash.h"
35 #include "libbcachefs/super.h"
36 #include "libbcachefs/xattr.h"
38 static char *dev_t_to_path(dev_t dev)
40 char link[PATH_MAX], *p;
43 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
44 major(dev), minor(dev));
45 ret = readlink(sysfs_dev, link, sizeof(link));
48 if (ret < 0 || ret >= sizeof(link))
49 die("readlink error while looking up block device: %m");
53 p = strrchr(link, '/');
55 die("error looking up device name");
58 return mprintf("/dev/%s", p);
61 static bool path_is_fs_root(char *path)
63 char *line = NULL, *p, *mount;
68 f = fopen("/proc/self/mountinfo", "r");
70 die("Error getting mount information");
72 while (getline(&line, &n, f) != -1) {
75 strsep(&p, " "); /* mount id */
76 strsep(&p, " "); /* parent id */
77 strsep(&p, " "); /* dev */
78 strsep(&p, " "); /* root */
79 mount = strsep(&p, " ");
82 if (mount && !strcmp(path, mount))
93 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
95 struct bch_dev *ca = c->devs[0];
96 struct hole_iter iter;
99 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
100 struct bucket_mark new;
103 if (i.start == i.end)
106 b = sector_to_bucket(ca, i.start >> 9);
108 struct bucket *g = bucket(ca, b);
109 bucket_cmpxchg(g, new, new.nouse = 1);
111 } while (bucket_to_sector(ca, b) << 9 < i.end);
115 static void update_inode(struct bch_fs *c,
116 struct bch_inode_unpacked *inode)
118 struct bkey_inode_buf packed;
121 bch2_inode_pack(&packed, inode);
122 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
123 NULL, NULL, NULL, 0);
125 die("error creating file: %s", strerror(-ret));
128 static void create_dirent(struct bch_fs *c,
129 struct bch_inode_unpacked *parent,
130 const char *name, u64 inum, mode_t mode)
132 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
133 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
135 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
136 mode_to_type(mode), &qname,
137 inum, NULL, BCH_HASH_SET_MUST_CREATE);
139 die("error creating file: %s", strerror(-ret));
145 static void create_link(struct bch_fs *c,
146 struct bch_inode_unpacked *parent,
147 const char *name, u64 inum, mode_t mode)
149 struct bch_inode_unpacked inode;
150 int ret = bch2_inode_find_by_inum(c, inum, &inode);
152 die("error looking up hardlink: %s", strerror(-ret));
155 update_inode(c, &inode);
157 create_dirent(c, parent, name, inum, mode);
160 static struct bch_inode_unpacked create_file(struct bch_fs *c,
161 struct bch_inode_unpacked *parent,
163 uid_t uid, gid_t gid,
164 mode_t mode, dev_t rdev)
166 struct bch_inode_unpacked new_inode;
169 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
171 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
172 &c->unused_inode_hint);
174 die("error creating file: %s", strerror(-ret));
176 create_dirent(c, parent, name, new_inode.bi_inum, mode);
181 #define for_each_xattr_handler(handlers, handler) \
183 for ((handler) = *(handlers)++; \
185 (handler) = *(handlers)++)
187 static const struct xattr_handler *xattr_resolve_name(const char **name)
189 const struct xattr_handler **handlers = bch2_xattr_handlers;
190 const struct xattr_handler *handler;
192 for_each_xattr_handler(handlers, handler) {
195 n = strcmp_prefix(*name, xattr_prefix(handler));
197 if (!handler->prefix ^ !*n) {
200 return ERR_PTR(-EINVAL);
206 return ERR_PTR(-EOPNOTSUPP);
209 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
212 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
213 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
214 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
217 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
220 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
222 char attrs[XATTR_LIST_MAX];
223 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
225 die("listxattr error: %m");
227 const char *next, *attr;
229 attr < attrs + attrs_size;
231 next = attr + strlen(attr) + 1;
233 char val[XATTR_SIZE_MAX];
234 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
237 die("error getting xattr val: %m");
239 const struct xattr_handler *h = xattr_resolve_name(&attr);
241 int ret = __bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
242 val, val_size, 0, h->flags, NULL);
244 die("error creating xattr: %s", strerror(-ret));
248 static void write_data(struct bch_fs *c,
249 struct bch_inode_unpacked *dst_inode,
250 u64 dst_offset, void *buf, size_t len)
252 struct bch_write_op op;
256 BUG_ON(dst_offset & (block_bytes(c) - 1));
257 BUG_ON(len & (block_bytes(c) - 1));
259 closure_init_stack(&cl);
261 bio_init(&op.wbio.bio, &bv, 1);
262 op.wbio.bio.bi_iter.bi_size = len;
263 bch2_bio_map(&op.wbio.bio, buf);
265 bch2_write_op_init(&op, c);
267 op.write_point = writepoint_hashed(0);
268 op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
270 int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
271 c->opts.data_replicas, 0);
273 die("error reserving space in new filesystem: %s", strerror(-ret));
275 closure_call(&op.cl, bch2_write, NULL, &cl);
278 dst_inode->bi_sectors += len >> 9;
281 static char buf[1 << 20] __aligned(PAGE_SIZE);
283 static void copy_data(struct bch_fs *c,
284 struct bch_inode_unpacked *dst_inode,
285 int src_fd, u64 start, u64 end)
287 while (start < end) {
288 unsigned len = min_t(u64, end - start, sizeof(buf));
290 xpread(src_fd, buf, len, start);
291 write_data(c, dst_inode, start, buf, len);
296 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
297 u64 logical, u64 physical, u64 length)
299 struct bch_dev *ca = c->devs[0];
301 BUG_ON(logical & (block_bytes(c) - 1));
302 BUG_ON(physical & (block_bytes(c) - 1));
303 BUG_ON(length & (block_bytes(c) - 1));
309 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
312 struct bkey_i_extent *e;
314 u64 b = sector_to_bucket(ca, physical >> 9);
315 struct disk_reservation res;
319 sectors = min(ca->mi.bucket_size -
320 (physical & (ca->mi.bucket_size - 1)),
323 e = bkey_extent_init(&k.k);
324 e->k.p.inode = dst->bi_inum;
325 e->k.p.offset = logical + sectors;
327 extent_ptr_append(e, (struct bch_extent_ptr) {
330 .gen = bucket(ca, b)->mark.gen,
333 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
334 BCH_DISK_RESERVATION_NOFAIL);
336 die("error reserving space in new filesystem: %s",
339 bch2_check_mark_super(c, BCH_DATA_USER,
340 bch2_bkey_devs(extent_i_to_s_c(e).s_c));
342 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
343 &res, NULL, NULL, 0);
345 die("btree insert error %s", strerror(-ret));
347 bch2_disk_reservation_put(c, &res);
349 dst->bi_sectors += sectors;
356 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
359 ssize_t ret = readlink(src, buf, sizeof(buf));
361 die("readlink error: %m");
363 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
366 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
367 int src, char *src_path, ranges *extents)
369 struct fiemap_iter iter;
370 struct fiemap_extent e;
372 fiemap_for_each(src, iter, e)
373 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
378 fiemap_for_each(src, iter, e) {
379 if ((e.fe_logical & (block_bytes(c) - 1)) ||
380 (e.fe_length & (block_bytes(c) - 1)))
381 die("Unaligned extent in %s - can't handle", src_path);
383 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
384 FIEMAP_EXTENT_ENCODED|
385 FIEMAP_EXTENT_NOT_ALIGNED|
386 FIEMAP_EXTENT_DATA_INLINE)) {
389 round_down(e.fe_logical, block_bytes(c)),
390 round_up(e.fe_logical + e.fe_length,
395 if (e.fe_physical < 1 << 20) {
398 round_down(e.fe_logical, block_bytes(c)),
399 round_up(e.fe_logical + e.fe_length,
404 if ((e.fe_physical & (block_bytes(c) - 1)))
405 die("Unaligned extent in %s - can't handle", src_path);
407 range_add(extents, e.fe_physical, e.fe_length);
408 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
412 struct copy_fs_state {
416 GENRADIX(u64) hardlinks;
420 static void copy_dir(struct copy_fs_state *s,
422 struct bch_inode_unpacked *dst,
423 int src_fd, const char *src_path)
425 DIR *dir = fdopendir(src_fd);
428 while ((errno = 0), (d = readdir(dir))) {
429 struct bch_inode_unpacked inode;
433 die("chdir error: %m");
436 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
438 if (!strcmp(d->d_name, ".") ||
439 !strcmp(d->d_name, "..") ||
440 stat.st_ino == s->bcachefs_inum)
443 char *child_path = mprintf("%s/%s", src_path, d->d_name);
445 if (stat.st_dev != s->dev)
446 die("%s does not have correct st_dev!", child_path);
448 u64 *dst_inum = S_ISREG(stat.st_mode)
449 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
452 if (dst_inum && *dst_inum) {
453 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
457 inode = create_file(c, dst, d->d_name,
458 stat.st_uid, stat.st_gid,
459 stat.st_mode, stat.st_rdev);
462 *dst_inum = inode.bi_inum;
464 copy_times(c, &inode, &stat);
465 copy_xattrs(c, &inode, d->d_name);
469 switch (mode_to_type(stat.st_mode)) {
471 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
472 copy_dir(s, c, &inode, fd, child_path);
476 inode.bi_size = stat.st_size;
478 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
479 copy_file(c, &inode, fd, child_path, &s->extents);
483 inode.bi_size = stat.st_size;
485 copy_link(c, &inode, d->d_name);
492 /* nothing else to copy for these: */
498 update_inode(c, &inode);
504 die("readdir error: %m");
507 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
508 u64 size, u64 *bcachefs_inum, dev_t dev,
512 ? open(file_path, O_RDWR|O_CREAT, 0600)
513 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
515 die("Error creating %s for bcachefs metadata: %m",
518 struct stat statbuf = xfstat(fd);
520 if (statbuf.st_dev != dev)
521 die("bcachefs file has incorrect device");
523 *bcachefs_inum = statbuf.st_ino;
525 if (fallocate(fd, 0, 0, size))
526 die("Error reserving space for bcachefs metadata: %m");
530 struct fiemap_iter iter;
531 struct fiemap_extent e;
532 ranges extents = { NULL };
534 fiemap_for_each(fd, iter, e) {
535 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
536 FIEMAP_EXTENT_ENCODED|
537 FIEMAP_EXTENT_NOT_ALIGNED|
538 FIEMAP_EXTENT_DATA_INLINE))
539 die("Unable to continue: metadata file not fully mapped");
541 if ((e.fe_physical & (block_size - 1)) ||
542 (e.fe_length & (block_size - 1)))
543 die("Unable to continue: unaligned extents in metadata file");
545 range_add(&extents, e.fe_physical, e.fe_length);
549 ranges_sort_merge(&extents);
553 static void reserve_old_fs_space(struct bch_fs *c,
554 struct bch_inode_unpacked *root_inode,
557 struct bch_dev *ca = c->devs[0];
558 struct bch_inode_unpacked dst;
559 struct hole_iter iter;
562 dst = create_file(c, root_inode, "old_migrated_filesystem",
563 0, 0, S_IFREG|0400, 0);
564 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
566 ranges_sort_merge(extents);
568 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
569 link_data(c, &dst, i.start, i.start, i.end - i.start);
571 update_inode(c, &dst);
574 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
575 u64 bcachefs_inum, ranges *extents)
579 struct bch_inode_unpacked root_inode;
580 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
582 die("error looking up root directory: %s", strerror(-ret));
585 die("chdir error: %m");
587 struct stat stat = xfstat(src_fd);
588 copy_times(c, &root_inode, &stat);
589 copy_xattrs(c, &root_inode, ".");
591 struct copy_fs_state s = {
592 .bcachefs_inum = bcachefs_inum,
598 copy_dir(&s, c, &root_inode, src_fd, src_path);
600 reserve_old_fs_space(c, &root_inode, &s.extents);
602 update_inode(c, &root_inode);
604 darray_free(s.extents);
605 genradix_free(&s.hardlinks);
608 static void find_superblock_space(ranges extents, struct dev_opts *dev)
612 darray_foreach(i, extents) {
613 u64 start = round_up(max(256ULL << 10, i->start),
614 dev->bucket_size << 9);
615 u64 end = round_down(i->end,
616 dev->bucket_size << 9);
618 if (start + (128 << 10) <= end) {
619 dev->sb_offset = start >> 9;
620 dev->sb_end = dev->sb_offset + 256;
625 die("Couldn't find a valid location for superblock");
628 static void migrate_usage(void)
630 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
631 "Usage: bcachefs migrate [OPTION]...\n"
634 " -f fs Root of filesystem to migrate(s)\n"
635 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
636 " --no_passphrase Don't encrypt master encryption key\n"
637 " -F Force, even if metadata file already exists\n"
638 " -h Display this help and exit\n"
639 "Report bugs to <linux-bcache@vger.kernel.org>");
642 static const struct option migrate_opts[] = {
643 { "encrypted", no_argument, NULL, 'e' },
644 { "no_passphrase", no_argument, NULL, 'p' },
648 int cmd_migrate(int argc, char *argv[])
650 struct format_opts format_opts = format_opts_default();
651 char *fs_path = NULL;
653 bool no_passphrase = false, force = false;
656 while ((opt = getopt_long(argc, argv, "f:Fh",
657 migrate_opts, NULL)) != -1)
663 format_opts.encrypted = true;
666 no_passphrase = true;
677 die("Please specify a filesytem to migrate");
679 if (!path_is_fs_root(fs_path))
680 die("%s is not a filysestem root", fs_path);
682 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
683 struct stat stat = xfstat(fs_fd);
685 if (!S_ISDIR(stat.st_mode))
686 die("%s is not a directory", fs_path);
688 struct dev_opts dev = dev_opts_default();
690 dev.path = dev_t_to_path(stat.st_dev);
691 dev.fd = xopen(dev.path, O_RDWR);
693 block_size = min_t(unsigned, stat.st_blksize,
694 get_blocksize(dev.path, dev.fd) << 9);
696 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
697 format_opts.block_size = block_size >> 9;
700 char *file_path = mprintf("%s/bcachefs", fs_path);
702 bch2_pick_bucket_size(format_opts, &dev);
704 ranges extents = reserve_new_fs_space(file_path,
705 block_size, get_size(dev.path, dev.fd) / 5,
706 &bcachefs_inum, stat.st_dev, force);
708 find_superblock_space(extents, &dev);
710 if (format_opts.encrypted && !no_passphrase)
711 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
713 struct bch_sb *sb = bch2_format(format_opts, &dev, 1);
714 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
716 if (format_opts.passphrase)
717 bch2_add_key(sb, format_opts.passphrase);
721 printf("Creating new filesystem on %s in space reserved at %s\n"
723 " mount -t bcachefs -o sb=%llu %s dir\n"
725 "After verifying that the new filesystem is correct, to create a\n"
726 "superblock at the default offset and finish the migration run\n"
727 " bcachefs migrate_superblock -d %s -o %llu\n"
729 "The new filesystem will have a file at /old_migrated_filestem\n"
730 "referencing all disk space that might be used by the existing\n"
731 "filesystem. That file can be deleted once the old filesystem is\n"
732 "no longer needed (and should be deleted prior to running\n"
733 "bcachefs migrate_superblock)\n",
734 dev.path, file_path, sb_offset, dev.path,
735 dev.path, sb_offset);
737 struct bch_opts opts = bch2_opts_empty();
738 struct bch_fs *c = NULL;
739 char *path[1] = { dev.path };
741 opt_set(opts, sb, sb_offset);
742 opt_set(opts, nostart, true);
743 opt_set(opts, noexcl, true);
745 c = bch2_fs_open(path, 1, opts);
747 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
749 mark_unreserved_space(c, extents);
751 const char *err = bch2_fs_start(c);
753 die("Error starting new filesystem: %s", err);
755 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
759 printf("Migrate complete, running fsck:\n");
760 opt_set(opts, nostart, false);
761 opt_set(opts, nochanges, true);
763 c = bch2_fs_open(path, 1, opts);
765 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
768 printf("fsck complete\n");
772 static void migrate_superblock_usage(void)
774 puts("bcachefs migrate_superblock - create default superblock after migrating\n"
775 "Usage: bcachefs migrate_superblock [OPTION]...\n"
778 " -d device Device to create superblock for\n"
779 " -o offset Offset of existing superblock\n"
780 " -h Display this help and exit\n"
781 "Report bugs to <linux-bcache@vger.kernel.org>");
784 int cmd_migrate_superblock(int argc, char *argv[])
790 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
796 ret = kstrtou64(optarg, 10, &offset);
798 die("Invalid offset");
801 migrate_superblock_usage();
806 die("Please specify a device");
809 die("Please specify offset of existing superblock");
811 int fd = xopen(dev, O_RDWR);
812 struct bch_sb *sb = __bch2_super_read(fd, offset);
814 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
815 die("Can't add superblock: no space left in superblock layout");
818 for (i = 0; i < sb->layout.nr_superblocks; i++)
819 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
820 die("Superblock layout already has default superblock");
822 memmove(&sb->layout.sb_offset[1],
823 &sb->layout.sb_offset[0],
824 sb->layout.nr_superblocks * sizeof(u64));
825 sb->layout.nr_superblocks++;
827 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
829 bch2_super_write(fd, sb);