8 #include <sys/sysmacros.h>
12 #include <attr/xattr.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/btree_update.h"
29 #include "libbcachefs/buckets.h"
30 #include "libbcachefs/dirent.h"
31 #include "libbcachefs/fs.h"
32 #include "libbcachefs/inode.h"
33 #include "libbcachefs/io.h"
34 #include "libbcachefs/replicas.h"
35 #include "libbcachefs/str_hash.h"
36 #include "libbcachefs/super.h"
37 #include "libbcachefs/xattr.h"
39 static char *dev_t_to_path(dev_t dev)
41 char link[PATH_MAX], *p;
44 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
45 major(dev), minor(dev));
46 ret = readlink(sysfs_dev, link, sizeof(link));
49 if (ret < 0 || ret >= sizeof(link))
50 die("readlink error while looking up block device: %m");
54 p = strrchr(link, '/');
56 die("error looking up device name");
59 return mprintf("/dev/%s", p);
62 static bool path_is_fs_root(const char *path)
64 char *line = NULL, *p, *mount;
69 f = fopen("/proc/self/mountinfo", "r");
71 die("Error getting mount information");
73 while (getline(&line, &n, f) != -1) {
76 strsep(&p, " "); /* mount id */
77 strsep(&p, " "); /* parent id */
78 strsep(&p, " "); /* dev */
79 strsep(&p, " "); /* root */
80 mount = strsep(&p, " ");
83 if (mount && !strcmp(path, mount))
94 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
96 struct bch_dev *ca = c->devs[0];
97 struct hole_iter iter;
100 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
101 struct bucket_mark new;
104 if (i.start == i.end)
107 b = sector_to_bucket(ca, i.start >> 9);
109 struct bucket *g = bucket(ca, b);
110 bucket_cmpxchg(g, new, new.nouse = 1);
112 } while (bucket_to_sector(ca, b) << 9 < i.end);
116 static void update_inode(struct bch_fs *c,
117 struct bch_inode_unpacked *inode)
119 struct bkey_inode_buf packed;
122 bch2_inode_pack(&packed, inode);
123 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
126 die("error creating file: %s", strerror(-ret));
129 static void create_dirent(struct bch_fs *c,
130 struct bch_inode_unpacked *parent,
131 const char *name, u64 inum, mode_t mode)
133 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
134 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
136 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
137 mode_to_type(mode), &qname,
138 inum, NULL, BCH_HASH_SET_MUST_CREATE);
140 die("error creating file: %s", strerror(-ret));
146 static void create_link(struct bch_fs *c,
147 struct bch_inode_unpacked *parent,
148 const char *name, u64 inum, mode_t mode)
150 struct bch_inode_unpacked inode;
151 int ret = bch2_inode_find_by_inum(c, inum, &inode);
153 die("error looking up hardlink: %s", strerror(-ret));
156 update_inode(c, &inode);
158 create_dirent(c, parent, name, inum, mode);
161 static struct bch_inode_unpacked create_file(struct bch_fs *c,
162 struct bch_inode_unpacked *parent,
164 uid_t uid, gid_t gid,
165 mode_t mode, dev_t rdev)
167 struct bch_inode_unpacked new_inode;
170 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
172 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
173 &c->unused_inode_hint);
175 die("error creating file: %s", strerror(-ret));
177 create_dirent(c, parent, name, new_inode.bi_inum, mode);
182 #define for_each_xattr_handler(handlers, handler) \
184 for ((handler) = *(handlers)++; \
186 (handler) = *(handlers)++)
188 static const struct xattr_handler *xattr_resolve_name(const char **name)
190 const struct xattr_handler **handlers = bch2_xattr_handlers;
191 const struct xattr_handler *handler;
193 for_each_xattr_handler(handlers, handler) {
196 n = strcmp_prefix(*name, xattr_prefix(handler));
198 if (!handler->prefix ^ !*n) {
201 return ERR_PTR(-EINVAL);
207 return ERR_PTR(-EOPNOTSUPP);
210 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
213 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
214 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
215 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
218 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
221 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
223 char attrs[XATTR_LIST_MAX];
224 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
226 die("listxattr error: %m");
228 const char *next, *attr;
230 attr < attrs + attrs_size;
232 next = attr + strlen(attr) + 1;
234 char val[XATTR_SIZE_MAX];
235 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
238 die("error getting xattr val: %m");
240 const struct xattr_handler *h = xattr_resolve_name(&attr);
242 int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
243 bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
244 val, val_size, h->flags, 0));
246 die("error creating xattr: %s", strerror(-ret));
250 static char buf[1 << 20] __aligned(PAGE_SIZE);
251 static const size_t buf_pages = sizeof(buf) / PAGE_SIZE;
253 static void write_data(struct bch_fs *c,
254 struct bch_inode_unpacked *dst_inode,
255 u64 dst_offset, void *buf, size_t len)
258 struct bch_write_op op;
259 struct bio_vec bv[buf_pages];
263 BUG_ON(dst_offset & (block_bytes(c) - 1));
264 BUG_ON(len & (block_bytes(c) - 1));
266 closure_init_stack(&cl);
268 bio_init(&o.op.wbio.bio, o.bv, buf_pages);
269 o.op.wbio.bio.bi_iter.bi_size = len;
270 bch2_bio_map(&o.op.wbio.bio, buf);
272 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
273 o.op.write_point = writepoint_hashed(0);
274 o.op.nr_replicas = 1;
275 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
277 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
278 c->opts.data_replicas, 0);
280 die("error reserving space in new filesystem: %s", strerror(-ret));
282 closure_call(&o.op.cl, bch2_write, NULL, &cl);
285 dst_inode->bi_sectors += len >> 9;
288 static void copy_data(struct bch_fs *c,
289 struct bch_inode_unpacked *dst_inode,
290 int src_fd, u64 start, u64 end)
292 while (start < end) {
293 unsigned len = min_t(u64, end - start, sizeof(buf));
294 unsigned pad = round_up(len, block_bytes(c)) - len;
296 xpread(src_fd, buf, len, start);
297 memset(buf + len, 0, pad);
299 write_data(c, dst_inode, start, buf, len + pad);
304 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
305 u64 logical, u64 physical, u64 length)
307 struct bch_dev *ca = c->devs[0];
309 BUG_ON(logical & (block_bytes(c) - 1));
310 BUG_ON(physical & (block_bytes(c) - 1));
311 BUG_ON(length & (block_bytes(c) - 1));
317 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
320 struct bkey_i_extent *e;
322 u64 b = sector_to_bucket(ca, physical);
323 struct disk_reservation res;
327 sectors = min(ca->mi.bucket_size -
328 (physical & (ca->mi.bucket_size - 1)),
331 e = bkey_extent_init(&k.k);
332 e->k.p.inode = dst->bi_inum;
333 e->k.p.offset = logical + sectors;
335 extent_ptr_append(e, (struct bch_extent_ptr) {
338 .gen = bucket(ca, b)->mark.gen,
341 set_bit(b, ca->buckets_dirty);
343 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
344 BCH_DISK_RESERVATION_NOFAIL);
346 die("error reserving space in new filesystem: %s",
349 bch2_mark_bkey_replicas(c, BCH_DATA_USER,
350 extent_i_to_s_c(e).s_c);
352 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
355 die("btree insert error %s", strerror(-ret));
357 bch2_disk_reservation_put(c, &res);
359 dst->bi_sectors += sectors;
366 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
369 ssize_t ret = readlink(src, buf, sizeof(buf));
371 die("readlink error: %m");
373 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
376 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
377 int src_fd, u64 src_size,
378 char *src_path, ranges *extents)
380 struct fiemap_iter iter;
381 struct fiemap_extent e;
383 fiemap_for_each(src_fd, iter, e)
384 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
389 fiemap_for_each(src_fd, iter, e) {
390 if ((e.fe_logical & (block_bytes(c) - 1)) ||
391 (e.fe_length & (block_bytes(c) - 1)))
392 die("Unaligned extent in %s - can't handle", src_path);
394 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
395 FIEMAP_EXTENT_ENCODED|
396 FIEMAP_EXTENT_NOT_ALIGNED|
397 FIEMAP_EXTENT_DATA_INLINE)) {
398 copy_data(c, dst, src_fd, e.fe_logical,
399 min(src_size - e.fe_logical,
405 * if the data is below 1 MB, copy it so it doesn't conflict
406 * with bcachefs's potentially larger superblock:
408 if (e.fe_physical < 1 << 20) {
409 copy_data(c, dst, src_fd, e.fe_logical,
410 min(src_size - e.fe_logical,
415 if ((e.fe_physical & (block_bytes(c) - 1)))
416 die("Unaligned extent in %s - can't handle", src_path);
418 range_add(extents, e.fe_physical, e.fe_length);
419 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
423 struct copy_fs_state {
427 GENRADIX(u64) hardlinks;
431 static void copy_dir(struct copy_fs_state *s,
433 struct bch_inode_unpacked *dst,
434 int src_fd, const char *src_path)
436 DIR *dir = fdopendir(src_fd);
439 while ((errno = 0), (d = readdir(dir))) {
440 struct bch_inode_unpacked inode;
444 die("chdir error: %m");
447 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
449 if (!strcmp(d->d_name, ".") ||
450 !strcmp(d->d_name, "..") ||
451 stat.st_ino == s->bcachefs_inum)
454 char *child_path = mprintf("%s/%s", src_path, d->d_name);
456 if (stat.st_dev != s->dev)
457 die("%s does not have correct st_dev!", child_path);
459 u64 *dst_inum = S_ISREG(stat.st_mode)
460 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
463 if (dst_inum && *dst_inum) {
464 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
468 inode = create_file(c, dst, d->d_name,
469 stat.st_uid, stat.st_gid,
470 stat.st_mode, stat.st_rdev);
473 *dst_inum = inode.bi_inum;
475 copy_times(c, &inode, &stat);
476 copy_xattrs(c, &inode, d->d_name);
480 switch (mode_to_type(stat.st_mode)) {
482 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
483 copy_dir(s, c, &inode, fd, child_path);
487 inode.bi_size = stat.st_size;
489 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
490 copy_file(c, &inode, fd, stat.st_size,
491 child_path, &s->extents);
495 inode.bi_size = stat.st_size;
497 copy_link(c, &inode, d->d_name);
504 /* nothing else to copy for these: */
510 update_inode(c, &inode);
516 die("readdir error: %m");
519 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
520 u64 size, u64 *bcachefs_inum, dev_t dev,
524 ? open(file_path, O_RDWR|O_CREAT, 0600)
525 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
527 die("Error creating %s for bcachefs metadata: %m",
530 struct stat statbuf = xfstat(fd);
532 if (statbuf.st_dev != dev)
533 die("bcachefs file has incorrect device");
535 *bcachefs_inum = statbuf.st_ino;
537 if (fallocate(fd, 0, 0, size))
538 die("Error reserving space for bcachefs metadata: %m");
542 struct fiemap_iter iter;
543 struct fiemap_extent e;
544 ranges extents = { NULL };
546 fiemap_for_each(fd, iter, e) {
547 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
548 FIEMAP_EXTENT_ENCODED|
549 FIEMAP_EXTENT_NOT_ALIGNED|
550 FIEMAP_EXTENT_DATA_INLINE))
551 die("Unable to continue: metadata file not fully mapped");
553 if ((e.fe_physical & (block_size - 1)) ||
554 (e.fe_length & (block_size - 1)))
555 die("Unable to continue: unaligned extents in metadata file");
557 range_add(&extents, e.fe_physical, e.fe_length);
561 ranges_sort_merge(&extents);
565 static void reserve_old_fs_space(struct bch_fs *c,
566 struct bch_inode_unpacked *root_inode,
569 struct bch_dev *ca = c->devs[0];
570 struct bch_inode_unpacked dst;
571 struct hole_iter iter;
574 dst = create_file(c, root_inode, "old_migrated_filesystem",
575 0, 0, S_IFREG|0400, 0);
576 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
578 ranges_sort_merge(extents);
580 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
581 link_data(c, &dst, i.start, i.start, i.end - i.start);
583 update_inode(c, &dst);
586 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
587 u64 bcachefs_inum, ranges *extents)
591 struct bch_inode_unpacked root_inode;
592 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
594 die("error looking up root directory: %s", strerror(-ret));
597 die("chdir error: %m");
599 struct stat stat = xfstat(src_fd);
600 copy_times(c, &root_inode, &stat);
601 copy_xattrs(c, &root_inode, ".");
603 struct copy_fs_state s = {
604 .bcachefs_inum = bcachefs_inum,
610 copy_dir(&s, c, &root_inode, src_fd, src_path);
612 reserve_old_fs_space(c, &root_inode, &s.extents);
614 update_inode(c, &root_inode);
616 darray_free(s.extents);
617 genradix_free(&s.hardlinks);
622 static void find_superblock_space(ranges extents, struct dev_opts *dev)
626 darray_foreach(i, extents) {
627 u64 start = round_up(max(256ULL << 10, i->start),
628 dev->bucket_size << 9);
629 u64 end = round_down(i->end,
630 dev->bucket_size << 9);
632 if (start + (128 << 10) <= end) {
633 dev->sb_offset = start >> 9;
634 dev->sb_end = dev->sb_offset + 256;
639 die("Couldn't find a valid location for superblock");
642 static void migrate_usage(void)
644 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
645 "Usage: bcachefs migrate [OPTION]...\n"
648 " -f fs Root of filesystem to migrate(s)\n"
649 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
650 " --no_passphrase Don't encrypt master encryption key\n"
651 " -F Force, even if metadata file already exists\n"
652 " -h Display this help and exit\n"
653 "Report bugs to <linux-bcache@vger.kernel.org>");
656 static const struct option migrate_opts[] = {
657 { "encrypted", no_argument, NULL, 'e' },
658 { "no_passphrase", no_argument, NULL, 'p' },
662 static int migrate_fs(const char *fs_path,
663 struct format_opts format_opts,
666 if (!path_is_fs_root(fs_path))
667 die("%s is not a filysestem root", fs_path);
669 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
670 struct stat stat = xfstat(fs_fd);
672 if (!S_ISDIR(stat.st_mode))
673 die("%s is not a directory", fs_path);
675 struct dev_opts dev = dev_opts_default();
677 dev.path = dev_t_to_path(stat.st_dev);
678 dev.fd = xopen(dev.path, O_RDWR);
680 unsigned block_size = get_blocksize(dev.path, dev.fd) << 9;
681 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
682 format_opts.block_size = block_size >> 9;
684 char *file_path = mprintf("%s/bcachefs", fs_path);
685 printf("Creating new filesystem on %s in space reserved at %s\n",
686 dev.path, file_path);
688 bch2_pick_bucket_size(format_opts, &dev);
691 ranges extents = reserve_new_fs_space(file_path,
692 format_opts.block_size << 9,
693 get_size(dev.path, dev.fd) / 5,
694 &bcachefs_inum, stat.st_dev, force);
696 find_superblock_space(extents, &dev);
698 struct bch_sb *sb = bch2_format(format_opts, &dev, 1);
699 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
701 if (format_opts.passphrase)
702 bch2_add_key(sb, format_opts.passphrase);
706 struct bch_opts opts = bch2_opts_empty();
707 struct bch_fs *c = NULL;
708 char *path[1] = { dev.path };
710 opt_set(opts, sb, sb_offset);
711 opt_set(opts, nostart, true);
712 opt_set(opts, noexcl, true);
714 c = bch2_fs_open(path, 1, opts);
716 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
718 mark_unreserved_space(c, extents);
720 const char *err = bch2_fs_start(c);
722 die("Error starting new filesystem: %s", err);
724 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
728 printf("Migrate complete, running fsck:\n");
729 opt_set(opts, nostart, false);
730 opt_set(opts, nochanges, true);
732 c = bch2_fs_open(path, 1, opts);
734 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
737 printf("fsck complete\n");
739 printf("To mount the new filesystem, run\n"
740 " mount -t bcachefs -o sb=%llu %s dir\n"
742 "After verifying that the new filesystem is correct, to create a\n"
743 "superblock at the default offset and finish the migration run\n"
744 " bcachefs migrate-superblock -d %s -o %llu\n"
746 "The new filesystem will have a file at /old_migrated_filestem\n"
747 "referencing all disk space that might be used by the existing\n"
748 "filesystem. That file can be deleted once the old filesystem is\n"
749 "no longer needed (and should be deleted prior to running\n"
750 "bcachefs migrate-superblock)\n",
751 sb_offset, dev.path, dev.path, sb_offset);
755 int cmd_migrate(int argc, char *argv[])
757 struct format_opts format_opts = format_opts_default();
758 char *fs_path = NULL;
759 bool no_passphrase = false, force = false;
762 while ((opt = getopt_long(argc, argv, "f:Fh",
763 migrate_opts, NULL)) != -1)
769 format_opts.encrypted = true;
772 no_passphrase = true;
783 die("Please specify a filesytem to migrate");
785 if (format_opts.encrypted && !no_passphrase)
786 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
788 return migrate_fs(fs_path, format_opts, force);
791 static void migrate_superblock_usage(void)
793 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
794 "Usage: bcachefs migrate-superblock [OPTION]...\n"
797 " -d device Device to create superblock for\n"
798 " -o offset Offset of existing superblock\n"
799 " -h Display this help and exit\n"
800 "Report bugs to <linux-bcache@vger.kernel.org>");
803 int cmd_migrate_superblock(int argc, char *argv[])
809 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
815 ret = kstrtou64(optarg, 10, &offset);
817 die("Invalid offset");
820 migrate_superblock_usage();
825 die("Please specify a device");
828 die("Please specify offset of existing superblock");
830 int fd = xopen(dev, O_RDWR);
831 struct bch_sb *sb = __bch2_super_read(fd, offset);
833 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
834 die("Can't add superblock: no space left in superblock layout");
837 for (i = 0; i < sb->layout.nr_superblocks; i++)
838 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
839 die("Superblock layout already has default superblock");
841 memmove(&sb->layout.sb_offset[1],
842 &sb->layout.sb_offset[0],
843 sb->layout.nr_superblocks * sizeof(u64));
844 sb->layout.nr_superblocks++;
846 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
848 bch2_super_write(fd, sb);