8 #include <sys/sysmacros.h>
12 #include <attr/xattr.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/alloc_background.h"
29 #include "libbcachefs/alloc_foreground.h"
30 #include "libbcachefs/btree_update.h"
31 #include "libbcachefs/buckets.h"
32 #include "libbcachefs/dirent.h"
33 #include "libbcachefs/fs.h"
34 #include "libbcachefs/inode.h"
35 #include "libbcachefs/io.h"
36 #include "libbcachefs/replicas.h"
37 #include "libbcachefs/str_hash.h"
38 #include "libbcachefs/super.h"
39 #include "libbcachefs/xattr.h"
41 static char *dev_t_to_path(dev_t dev)
43 char link[PATH_MAX], *p;
46 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
47 major(dev), minor(dev));
48 ret = readlink(sysfs_dev, link, sizeof(link));
51 if (ret < 0 || ret >= sizeof(link))
52 die("readlink error while looking up block device: %m");
56 p = strrchr(link, '/');
58 die("error looking up device name");
61 return mprintf("/dev/%s", p);
64 static bool path_is_fs_root(const char *path)
66 char *line = NULL, *p, *mount;
71 f = fopen("/proc/self/mountinfo", "r");
73 die("Error getting mount information");
75 while (getline(&line, &n, f) != -1) {
78 strsep(&p, " "); /* mount id */
79 strsep(&p, " "); /* parent id */
80 strsep(&p, " "); /* dev */
81 strsep(&p, " "); /* root */
82 mount = strsep(&p, " ");
85 if (mount && !strcmp(path, mount))
96 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
98 struct bch_dev *ca = c->devs[0];
99 struct hole_iter iter;
102 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
103 struct bucket_mark new;
106 if (i.start == i.end)
109 b = sector_to_bucket(ca, i.start >> 9);
111 struct bucket *g = bucket(ca, b);
112 bucket_cmpxchg(g, new, new.nouse = 1);
114 } while (bucket_to_sector(ca, b) << 9 < i.end);
118 static void update_inode(struct bch_fs *c,
119 struct bch_inode_unpacked *inode)
121 struct bkey_inode_buf packed;
124 bch2_inode_pack(&packed, inode);
125 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
128 die("error creating file: %s", strerror(-ret));
131 static void create_dirent(struct bch_fs *c,
132 struct bch_inode_unpacked *parent,
133 const char *name, u64 inum, mode_t mode)
135 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
136 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
138 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
139 mode_to_type(mode), &qname,
140 inum, NULL, BCH_HASH_SET_MUST_CREATE);
142 die("error creating file: %s", strerror(-ret));
148 static void create_link(struct bch_fs *c,
149 struct bch_inode_unpacked *parent,
150 const char *name, u64 inum, mode_t mode)
152 struct bch_inode_unpacked inode;
153 int ret = bch2_inode_find_by_inum(c, inum, &inode);
155 die("error looking up hardlink: %s", strerror(-ret));
158 update_inode(c, &inode);
160 create_dirent(c, parent, name, inum, mode);
163 static struct bch_inode_unpacked create_file(struct bch_fs *c,
164 struct bch_inode_unpacked *parent,
166 uid_t uid, gid_t gid,
167 mode_t mode, dev_t rdev)
169 struct bch_inode_unpacked new_inode;
172 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
174 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
175 &c->unused_inode_hint);
177 die("error creating file: %s", strerror(-ret));
179 create_dirent(c, parent, name, new_inode.bi_inum, mode);
184 #define for_each_xattr_handler(handlers, handler) \
186 for ((handler) = *(handlers)++; \
188 (handler) = *(handlers)++)
190 static const struct xattr_handler *xattr_resolve_name(const char **name)
192 const struct xattr_handler **handlers = bch2_xattr_handlers;
193 const struct xattr_handler *handler;
195 for_each_xattr_handler(handlers, handler) {
198 n = strcmp_prefix(*name, xattr_prefix(handler));
200 if (!handler->prefix ^ !*n) {
203 return ERR_PTR(-EINVAL);
209 return ERR_PTR(-EOPNOTSUPP);
212 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
215 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
216 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
217 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
220 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
223 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
225 char attrs[XATTR_LIST_MAX];
226 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
228 die("listxattr error: %m");
230 const char *next, *attr;
232 attr < attrs + attrs_size;
234 next = attr + strlen(attr) + 1;
236 char val[XATTR_SIZE_MAX];
237 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
240 die("error getting xattr val: %m");
242 const struct xattr_handler *h = xattr_resolve_name(&attr);
244 int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
245 bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
246 val, val_size, h->flags, 0));
248 die("error creating xattr: %s", strerror(-ret));
252 static char buf[1 << 20] __aligned(PAGE_SIZE);
254 static void write_data(struct bch_fs *c,
255 struct bch_inode_unpacked *dst_inode,
256 u64 dst_offset, void *buf, size_t len)
259 struct bch_write_op op;
260 struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
264 BUG_ON(dst_offset & (block_bytes(c) - 1));
265 BUG_ON(len & (block_bytes(c) - 1));
267 closure_init_stack(&cl);
269 bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
270 o.op.wbio.bio.bi_iter.bi_size = len;
271 bch2_bio_map(&o.op.wbio.bio, buf);
273 bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
274 o.op.write_point = writepoint_hashed(0);
275 o.op.nr_replicas = 1;
276 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
278 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
279 c->opts.data_replicas, 0);
281 die("error reserving space in new filesystem: %s", strerror(-ret));
283 closure_call(&o.op.cl, bch2_write, NULL, &cl);
286 dst_inode->bi_sectors += len >> 9;
289 static void copy_data(struct bch_fs *c,
290 struct bch_inode_unpacked *dst_inode,
291 int src_fd, u64 start, u64 end)
293 while (start < end) {
294 unsigned len = min_t(u64, end - start, sizeof(buf));
295 unsigned pad = round_up(len, block_bytes(c)) - len;
297 xpread(src_fd, buf, len, start);
298 memset(buf + len, 0, pad);
300 write_data(c, dst_inode, start, buf, len + pad);
305 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
306 u64 logical, u64 physical, u64 length)
308 struct bch_dev *ca = c->devs[0];
310 BUG_ON(logical & (block_bytes(c) - 1));
311 BUG_ON(physical & (block_bytes(c) - 1));
312 BUG_ON(length & (block_bytes(c) - 1));
318 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
321 struct bkey_i_extent *e;
323 u64 b = sector_to_bucket(ca, physical);
324 struct disk_reservation res;
328 sectors = min(ca->mi.bucket_size -
329 (physical & (ca->mi.bucket_size - 1)),
332 e = bkey_extent_init(&k.k);
333 e->k.p.inode = dst->bi_inum;
334 e->k.p.offset = logical + sectors;
336 extent_ptr_append(e, (struct bch_extent_ptr) {
339 .gen = bucket(ca, b)->mark.gen,
342 set_bit(b, ca->buckets_dirty);
344 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
345 BCH_DISK_RESERVATION_NOFAIL);
347 die("error reserving space in new filesystem: %s",
350 bch2_mark_bkey_replicas(c, BCH_DATA_USER,
351 extent_i_to_s_c(e).s_c);
353 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
356 die("btree insert error %s", strerror(-ret));
358 bch2_disk_reservation_put(c, &res);
360 dst->bi_sectors += sectors;
367 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
370 ssize_t ret = readlink(src, buf, sizeof(buf));
372 die("readlink error: %m");
374 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
377 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
378 int src_fd, u64 src_size,
379 char *src_path, ranges *extents)
381 struct fiemap_iter iter;
382 struct fiemap_extent e;
384 fiemap_for_each(src_fd, iter, e)
385 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
390 fiemap_for_each(src_fd, iter, e) {
391 if ((e.fe_logical & (block_bytes(c) - 1)) ||
392 (e.fe_length & (block_bytes(c) - 1)))
393 die("Unaligned extent in %s - can't handle", src_path);
395 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
396 FIEMAP_EXTENT_ENCODED|
397 FIEMAP_EXTENT_NOT_ALIGNED|
398 FIEMAP_EXTENT_DATA_INLINE)) {
399 copy_data(c, dst, src_fd, e.fe_logical,
400 min(src_size - e.fe_logical,
406 * if the data is below 1 MB, copy it so it doesn't conflict
407 * with bcachefs's potentially larger superblock:
409 if (e.fe_physical < 1 << 20) {
410 copy_data(c, dst, src_fd, e.fe_logical,
411 min(src_size - e.fe_logical,
416 if ((e.fe_physical & (block_bytes(c) - 1)))
417 die("Unaligned extent in %s - can't handle", src_path);
419 range_add(extents, e.fe_physical, e.fe_length);
420 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
424 struct copy_fs_state {
428 GENRADIX(u64) hardlinks;
432 static void copy_dir(struct copy_fs_state *s,
434 struct bch_inode_unpacked *dst,
435 int src_fd, const char *src_path)
437 DIR *dir = fdopendir(src_fd);
440 while ((errno = 0), (d = readdir(dir))) {
441 struct bch_inode_unpacked inode;
445 die("chdir error: %m");
448 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
450 if (!strcmp(d->d_name, ".") ||
451 !strcmp(d->d_name, "..") ||
452 stat.st_ino == s->bcachefs_inum)
455 char *child_path = mprintf("%s/%s", src_path, d->d_name);
457 if (stat.st_dev != s->dev)
458 die("%s does not have correct st_dev!", child_path);
460 u64 *dst_inum = S_ISREG(stat.st_mode)
461 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
464 if (dst_inum && *dst_inum) {
465 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
469 inode = create_file(c, dst, d->d_name,
470 stat.st_uid, stat.st_gid,
471 stat.st_mode, stat.st_rdev);
474 *dst_inum = inode.bi_inum;
476 copy_times(c, &inode, &stat);
477 copy_xattrs(c, &inode, d->d_name);
481 switch (mode_to_type(stat.st_mode)) {
483 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
484 copy_dir(s, c, &inode, fd, child_path);
488 inode.bi_size = stat.st_size;
490 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
491 copy_file(c, &inode, fd, stat.st_size,
492 child_path, &s->extents);
496 inode.bi_size = stat.st_size;
498 copy_link(c, &inode, d->d_name);
505 /* nothing else to copy for these: */
511 update_inode(c, &inode);
517 die("readdir error: %m");
520 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
521 u64 size, u64 *bcachefs_inum, dev_t dev,
525 ? open(file_path, O_RDWR|O_CREAT, 0600)
526 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
528 die("Error creating %s for bcachefs metadata: %m",
531 struct stat statbuf = xfstat(fd);
533 if (statbuf.st_dev != dev)
534 die("bcachefs file has incorrect device");
536 *bcachefs_inum = statbuf.st_ino;
538 if (fallocate(fd, 0, 0, size))
539 die("Error reserving space for bcachefs metadata: %m");
543 struct fiemap_iter iter;
544 struct fiemap_extent e;
545 ranges extents = { NULL };
547 fiemap_for_each(fd, iter, e) {
548 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
549 FIEMAP_EXTENT_ENCODED|
550 FIEMAP_EXTENT_NOT_ALIGNED|
551 FIEMAP_EXTENT_DATA_INLINE))
552 die("Unable to continue: metadata file not fully mapped");
554 if ((e.fe_physical & (block_size - 1)) ||
555 (e.fe_length & (block_size - 1)))
556 die("Unable to continue: unaligned extents in metadata file");
558 range_add(&extents, e.fe_physical, e.fe_length);
562 ranges_sort_merge(&extents);
566 static void reserve_old_fs_space(struct bch_fs *c,
567 struct bch_inode_unpacked *root_inode,
570 struct bch_dev *ca = c->devs[0];
571 struct bch_inode_unpacked dst;
572 struct hole_iter iter;
575 dst = create_file(c, root_inode, "old_migrated_filesystem",
576 0, 0, S_IFREG|0400, 0);
577 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
579 ranges_sort_merge(extents);
581 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
582 link_data(c, &dst, i.start, i.start, i.end - i.start);
584 update_inode(c, &dst);
587 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
588 u64 bcachefs_inum, ranges *extents)
592 struct bch_inode_unpacked root_inode;
593 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
595 die("error looking up root directory: %s", strerror(-ret));
598 die("chdir error: %m");
600 struct stat stat = xfstat(src_fd);
601 copy_times(c, &root_inode, &stat);
602 copy_xattrs(c, &root_inode, ".");
604 struct copy_fs_state s = {
605 .bcachefs_inum = bcachefs_inum,
611 copy_dir(&s, c, &root_inode, src_fd, src_path);
613 reserve_old_fs_space(c, &root_inode, &s.extents);
615 update_inode(c, &root_inode);
617 darray_free(s.extents);
618 genradix_free(&s.hardlinks);
623 static void find_superblock_space(ranges extents, struct dev_opts *dev)
627 darray_foreach(i, extents) {
628 u64 start = round_up(max(256ULL << 10, i->start),
629 dev->bucket_size << 9);
630 u64 end = round_down(i->end,
631 dev->bucket_size << 9);
633 if (start + (128 << 10) <= end) {
634 dev->sb_offset = start >> 9;
635 dev->sb_end = dev->sb_offset + 256;
640 die("Couldn't find a valid location for superblock");
643 static void migrate_usage(void)
645 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
646 "Usage: bcachefs migrate [OPTION]...\n"
649 " -f fs Root of filesystem to migrate(s)\n"
650 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
651 " --no_passphrase Don't encrypt master encryption key\n"
652 " -F Force, even if metadata file already exists\n"
653 " -h Display this help and exit\n"
654 "Report bugs to <linux-bcache@vger.kernel.org>");
657 static const struct option migrate_opts[] = {
658 { "encrypted", no_argument, NULL, 'e' },
659 { "no_passphrase", no_argument, NULL, 'p' },
663 static int migrate_fs(const char *fs_path,
664 struct format_opts format_opts,
667 if (!path_is_fs_root(fs_path))
668 die("%s is not a filysestem root", fs_path);
670 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
671 struct stat stat = xfstat(fs_fd);
673 if (!S_ISDIR(stat.st_mode))
674 die("%s is not a directory", fs_path);
676 struct dev_opts dev = dev_opts_default();
678 dev.path = dev_t_to_path(stat.st_dev);
679 dev.fd = xopen(dev.path, O_RDWR);
681 unsigned block_size = get_blocksize(dev.path, dev.fd) << 9;
682 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
683 format_opts.block_size = block_size >> 9;
685 char *file_path = mprintf("%s/bcachefs", fs_path);
686 printf("Creating new filesystem on %s in space reserved at %s\n",
687 dev.path, file_path);
689 bch2_pick_bucket_size(format_opts, &dev);
692 ranges extents = reserve_new_fs_space(file_path,
693 format_opts.block_size << 9,
694 get_size(dev.path, dev.fd) / 5,
695 &bcachefs_inum, stat.st_dev, force);
697 find_superblock_space(extents, &dev);
699 struct bch_sb *sb = bch2_format(format_opts, &dev, 1);
700 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
702 if (format_opts.passphrase)
703 bch2_add_key(sb, format_opts.passphrase);
707 struct bch_opts opts = bch2_opts_empty();
708 struct bch_fs *c = NULL;
709 char *path[1] = { dev.path };
711 opt_set(opts, sb, sb_offset);
712 opt_set(opts, nostart, true);
713 opt_set(opts, noexcl, true);
715 c = bch2_fs_open(path, 1, opts);
717 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
719 mark_unreserved_space(c, extents);
721 const char *err = bch2_fs_start(c);
723 die("Error starting new filesystem: %s", err);
725 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
729 printf("Migrate complete, running fsck:\n");
730 opt_set(opts, nostart, false);
731 opt_set(opts, nochanges, true);
733 c = bch2_fs_open(path, 1, opts);
735 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
738 printf("fsck complete\n");
740 printf("To mount the new filesystem, run\n"
741 " mount -t bcachefs -o sb=%llu %s dir\n"
743 "After verifying that the new filesystem is correct, to create a\n"
744 "superblock at the default offset and finish the migration run\n"
745 " bcachefs migrate-superblock -d %s -o %llu\n"
747 "The new filesystem will have a file at /old_migrated_filestem\n"
748 "referencing all disk space that might be used by the existing\n"
749 "filesystem. That file can be deleted once the old filesystem is\n"
750 "no longer needed (and should be deleted prior to running\n"
751 "bcachefs migrate-superblock)\n",
752 sb_offset, dev.path, dev.path, sb_offset);
756 int cmd_migrate(int argc, char *argv[])
758 struct format_opts format_opts = format_opts_default();
759 char *fs_path = NULL;
760 bool no_passphrase = false, force = false;
763 while ((opt = getopt_long(argc, argv, "f:Fh",
764 migrate_opts, NULL)) != -1)
770 format_opts.encrypted = true;
773 no_passphrase = true;
784 die("Please specify a filesytem to migrate");
786 if (format_opts.encrypted && !no_passphrase)
787 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
789 return migrate_fs(fs_path, format_opts, force);
792 static void migrate_superblock_usage(void)
794 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
795 "Usage: bcachefs migrate-superblock [OPTION]...\n"
798 " -d device Device to create superblock for\n"
799 " -o offset Offset of existing superblock\n"
800 " -h Display this help and exit\n"
801 "Report bugs to <linux-bcache@vger.kernel.org>");
804 int cmd_migrate_superblock(int argc, char *argv[])
810 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
816 ret = kstrtou64(optarg, 10, &offset);
818 die("Invalid offset");
821 migrate_superblock_usage();
826 die("Please specify a device");
829 die("Please specify offset of existing superblock");
831 int fd = xopen(dev, O_RDWR);
832 struct bch_sb *sb = __bch2_super_read(fd, offset);
834 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
835 die("Can't add superblock: no space left in superblock layout");
838 for (i = 0; i < sb->layout.nr_superblocks; i++)
839 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
840 die("Superblock layout already has default superblock");
842 memmove(&sb->layout.sb_offset[1],
843 &sb->layout.sb_offset[0],
844 sb->layout.nr_superblocks * sizeof(u64));
845 sb->layout.nr_superblocks++;
847 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
849 bch2_super_write(fd, sb);