8 #include <sys/sysmacros.h>
12 #include <attr/xattr.h>
14 #include <linux/fiemap.h>
16 #include <linux/stat.h>
18 #include <uuid/uuid.h>
22 #include "libbcachefs.h"
24 #include <linux/dcache.h>
25 #include <linux/generic-radix-tree.h>
26 #include <linux/xattr.h>
27 #include "libbcachefs/bcachefs.h"
28 #include "libbcachefs/btree_update.h"
29 #include "libbcachefs/buckets.h"
30 #include "libbcachefs/dirent.h"
31 #include "libbcachefs/fs.h"
32 #include "libbcachefs/inode.h"
33 #include "libbcachefs/io.h"
34 #include "libbcachefs/str_hash.h"
35 #include "libbcachefs/super.h"
36 #include "libbcachefs/xattr.h"
38 static char *dev_t_to_path(dev_t dev)
40 char link[PATH_MAX], *p;
43 char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
44 major(dev), minor(dev));
45 ret = readlink(sysfs_dev, link, sizeof(link));
48 if (ret < 0 || ret >= sizeof(link))
49 die("readlink error while looking up block device: %m");
53 p = strrchr(link, '/');
55 die("error looking up device name");
58 return mprintf("/dev/%s", p);
61 static bool path_is_fs_root(const char *path)
63 char *line = NULL, *p, *mount;
68 f = fopen("/proc/self/mountinfo", "r");
70 die("Error getting mount information");
72 while (getline(&line, &n, f) != -1) {
75 strsep(&p, " "); /* mount id */
76 strsep(&p, " "); /* parent id */
77 strsep(&p, " "); /* dev */
78 strsep(&p, " "); /* root */
79 mount = strsep(&p, " ");
82 if (mount && !strcmp(path, mount))
93 static void mark_unreserved_space(struct bch_fs *c, ranges extents)
95 struct bch_dev *ca = c->devs[0];
96 struct hole_iter iter;
99 for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
100 struct bucket_mark new;
103 if (i.start == i.end)
106 b = sector_to_bucket(ca, i.start >> 9);
108 struct bucket *g = bucket(ca, b);
109 bucket_cmpxchg(g, new, new.nouse = 1);
111 } while (bucket_to_sector(ca, b) << 9 < i.end);
115 static void update_inode(struct bch_fs *c,
116 struct bch_inode_unpacked *inode)
118 struct bkey_inode_buf packed;
121 bch2_inode_pack(&packed, inode);
122 ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
123 NULL, NULL, NULL, 0);
125 die("error creating file: %s", strerror(-ret));
128 static void create_dirent(struct bch_fs *c,
129 struct bch_inode_unpacked *parent,
130 const char *name, u64 inum, mode_t mode)
132 struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent);
133 struct qstr qname = { { { .len = strlen(name), } }, .name = name };
135 int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info,
136 mode_to_type(mode), &qname,
137 inum, NULL, BCH_HASH_SET_MUST_CREATE);
139 die("error creating file: %s", strerror(-ret));
145 static void create_link(struct bch_fs *c,
146 struct bch_inode_unpacked *parent,
147 const char *name, u64 inum, mode_t mode)
149 struct bch_inode_unpacked inode;
150 int ret = bch2_inode_find_by_inum(c, inum, &inode);
152 die("error looking up hardlink: %s", strerror(-ret));
155 update_inode(c, &inode);
157 create_dirent(c, parent, name, inum, mode);
160 static struct bch_inode_unpacked create_file(struct bch_fs *c,
161 struct bch_inode_unpacked *parent,
163 uid_t uid, gid_t gid,
164 mode_t mode, dev_t rdev)
166 struct bch_inode_unpacked new_inode;
169 bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
171 ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
172 &c->unused_inode_hint);
174 die("error creating file: %s", strerror(-ret));
176 create_dirent(c, parent, name, new_inode.bi_inum, mode);
181 #define for_each_xattr_handler(handlers, handler) \
183 for ((handler) = *(handlers)++; \
185 (handler) = *(handlers)++)
187 static const struct xattr_handler *xattr_resolve_name(const char **name)
189 const struct xattr_handler **handlers = bch2_xattr_handlers;
190 const struct xattr_handler *handler;
192 for_each_xattr_handler(handlers, handler) {
195 n = strcmp_prefix(*name, xattr_prefix(handler));
197 if (!handler->prefix ^ !*n) {
200 return ERR_PTR(-EINVAL);
206 return ERR_PTR(-EOPNOTSUPP);
209 static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
212 dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
213 dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
214 dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
217 static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
220 struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
222 char attrs[XATTR_LIST_MAX];
223 ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
225 die("listxattr error: %m");
227 const char *next, *attr;
229 attr < attrs + attrs_size;
231 next = attr + strlen(attr) + 1;
233 char val[XATTR_SIZE_MAX];
234 ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
237 die("error getting xattr val: %m");
239 const struct xattr_handler *h = xattr_resolve_name(&attr);
241 int ret = __bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
242 val, val_size, 0, h->flags, NULL);
244 die("error creating xattr: %s", strerror(-ret));
248 static char buf[1 << 20] __aligned(PAGE_SIZE);
249 static const size_t buf_pages = sizeof(buf) / PAGE_SIZE;
251 static void write_data(struct bch_fs *c,
252 struct bch_inode_unpacked *dst_inode,
253 u64 dst_offset, void *buf, size_t len)
256 struct bch_write_op op;
257 struct bio_vec bv[buf_pages];
261 BUG_ON(dst_offset & (block_bytes(c) - 1));
262 BUG_ON(len & (block_bytes(c) - 1));
264 closure_init_stack(&cl);
266 bio_init(&o.op.wbio.bio, o.bv, buf_pages);
267 o.op.wbio.bio.bi_iter.bi_size = len;
268 bch2_bio_map(&o.op.wbio.bio, buf);
270 bch2_write_op_init(&o.op, c);
271 o.op.write_point = writepoint_hashed(0);
272 o.op.nr_replicas = 1;
273 o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
275 int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
276 c->opts.data_replicas, 0);
278 die("error reserving space in new filesystem: %s", strerror(-ret));
280 closure_call(&o.op.cl, bch2_write, NULL, &cl);
283 dst_inode->bi_sectors += len >> 9;
286 static void copy_data(struct bch_fs *c,
287 struct bch_inode_unpacked *dst_inode,
288 int src_fd, u64 start, u64 end)
290 while (start < end) {
291 unsigned len = min_t(u64, end - start, sizeof(buf));
292 unsigned pad = round_up(len, block_bytes(c)) - len;
294 xpread(src_fd, buf, len, start);
295 memset(buf + len, 0, pad);
297 write_data(c, dst_inode, start, buf, len + pad);
302 static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
303 u64 logical, u64 physical, u64 length)
305 struct bch_dev *ca = c->devs[0];
307 BUG_ON(logical & (block_bytes(c) - 1));
308 BUG_ON(physical & (block_bytes(c) - 1));
309 BUG_ON(length & (block_bytes(c) - 1));
315 BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
318 struct bkey_i_extent *e;
320 u64 b = sector_to_bucket(ca, physical);
321 struct disk_reservation res;
325 sectors = min(ca->mi.bucket_size -
326 (physical & (ca->mi.bucket_size - 1)),
329 e = bkey_extent_init(&k.k);
330 e->k.p.inode = dst->bi_inum;
331 e->k.p.offset = logical + sectors;
333 extent_ptr_append(e, (struct bch_extent_ptr) {
336 .gen = bucket(ca, b)->mark.gen,
339 set_bit(b, ca->buckets_dirty);
341 ret = bch2_disk_reservation_get(c, &res, sectors, 1,
342 BCH_DISK_RESERVATION_NOFAIL);
344 die("error reserving space in new filesystem: %s",
347 bch2_mark_bkey_replicas(c, BCH_DATA_USER,
348 extent_i_to_s_c(e).s_c);
350 ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
351 &res, NULL, NULL, 0);
353 die("btree insert error %s", strerror(-ret));
355 bch2_disk_reservation_put(c, &res);
357 dst->bi_sectors += sectors;
364 static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
367 ssize_t ret = readlink(src, buf, sizeof(buf));
369 die("readlink error: %m");
371 write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
374 static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
375 int src_fd, u64 src_size,
376 char *src_path, ranges *extents)
378 struct fiemap_iter iter;
379 struct fiemap_extent e;
381 fiemap_for_each(src_fd, iter, e)
382 if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
387 fiemap_for_each(src_fd, iter, e) {
388 if ((e.fe_logical & (block_bytes(c) - 1)) ||
389 (e.fe_length & (block_bytes(c) - 1)))
390 die("Unaligned extent in %s - can't handle", src_path);
392 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
393 FIEMAP_EXTENT_ENCODED|
394 FIEMAP_EXTENT_NOT_ALIGNED|
395 FIEMAP_EXTENT_DATA_INLINE)) {
396 copy_data(c, dst, src_fd, e.fe_logical,
397 min(src_size - e.fe_logical,
403 * if the data is below 1 MB, copy it so it doesn't conflict
404 * with bcachefs's potentially larger superblock:
406 if (e.fe_physical < 1 << 20) {
407 copy_data(c, dst, src_fd, e.fe_logical,
408 min(src_size - e.fe_logical,
413 if ((e.fe_physical & (block_bytes(c) - 1)))
414 die("Unaligned extent in %s - can't handle", src_path);
416 range_add(extents, e.fe_physical, e.fe_length);
417 link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
421 struct copy_fs_state {
425 GENRADIX(u64) hardlinks;
429 static void copy_dir(struct copy_fs_state *s,
431 struct bch_inode_unpacked *dst,
432 int src_fd, const char *src_path)
434 DIR *dir = fdopendir(src_fd);
437 while ((errno = 0), (d = readdir(dir))) {
438 struct bch_inode_unpacked inode;
442 die("chdir error: %m");
445 xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
447 if (!strcmp(d->d_name, ".") ||
448 !strcmp(d->d_name, "..") ||
449 stat.st_ino == s->bcachefs_inum)
452 char *child_path = mprintf("%s/%s", src_path, d->d_name);
454 if (stat.st_dev != s->dev)
455 die("%s does not have correct st_dev!", child_path);
457 u64 *dst_inum = S_ISREG(stat.st_mode)
458 ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
461 if (dst_inum && *dst_inum) {
462 create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
466 inode = create_file(c, dst, d->d_name,
467 stat.st_uid, stat.st_gid,
468 stat.st_mode, stat.st_rdev);
471 *dst_inum = inode.bi_inum;
473 copy_times(c, &inode, &stat);
474 copy_xattrs(c, &inode, d->d_name);
478 switch (mode_to_type(stat.st_mode)) {
480 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
481 copy_dir(s, c, &inode, fd, child_path);
485 inode.bi_size = stat.st_size;
487 fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
488 copy_file(c, &inode, fd, stat.st_size,
489 child_path, &s->extents);
493 inode.bi_size = stat.st_size;
495 copy_link(c, &inode, d->d_name);
502 /* nothing else to copy for these: */
508 update_inode(c, &inode);
514 die("readdir error: %m");
517 static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
518 u64 size, u64 *bcachefs_inum, dev_t dev,
522 ? open(file_path, O_RDWR|O_CREAT, 0600)
523 : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
525 die("Error creating %s for bcachefs metadata: %m",
528 struct stat statbuf = xfstat(fd);
530 if (statbuf.st_dev != dev)
531 die("bcachefs file has incorrect device");
533 *bcachefs_inum = statbuf.st_ino;
535 if (fallocate(fd, 0, 0, size))
536 die("Error reserving space for bcachefs metadata: %m");
540 struct fiemap_iter iter;
541 struct fiemap_extent e;
542 ranges extents = { NULL };
544 fiemap_for_each(fd, iter, e) {
545 if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
546 FIEMAP_EXTENT_ENCODED|
547 FIEMAP_EXTENT_NOT_ALIGNED|
548 FIEMAP_EXTENT_DATA_INLINE))
549 die("Unable to continue: metadata file not fully mapped");
551 if ((e.fe_physical & (block_size - 1)) ||
552 (e.fe_length & (block_size - 1)))
553 die("Unable to continue: unaligned extents in metadata file");
555 range_add(&extents, e.fe_physical, e.fe_length);
559 ranges_sort_merge(&extents);
563 static void reserve_old_fs_space(struct bch_fs *c,
564 struct bch_inode_unpacked *root_inode,
567 struct bch_dev *ca = c->devs[0];
568 struct bch_inode_unpacked dst;
569 struct hole_iter iter;
572 dst = create_file(c, root_inode, "old_migrated_filesystem",
573 0, 0, S_IFREG|0400, 0);
574 dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
576 ranges_sort_merge(extents);
578 for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
579 link_data(c, &dst, i.start, i.start, i.end - i.start);
581 update_inode(c, &dst);
584 static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
585 u64 bcachefs_inum, ranges *extents)
589 struct bch_inode_unpacked root_inode;
590 int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
592 die("error looking up root directory: %s", strerror(-ret));
595 die("chdir error: %m");
597 struct stat stat = xfstat(src_fd);
598 copy_times(c, &root_inode, &stat);
599 copy_xattrs(c, &root_inode, ".");
601 struct copy_fs_state s = {
602 .bcachefs_inum = bcachefs_inum,
608 copy_dir(&s, c, &root_inode, src_fd, src_path);
610 reserve_old_fs_space(c, &root_inode, &s.extents);
612 update_inode(c, &root_inode);
614 darray_free(s.extents);
615 genradix_free(&s.hardlinks);
620 static void find_superblock_space(ranges extents, struct dev_opts *dev)
624 darray_foreach(i, extents) {
625 u64 start = round_up(max(256ULL << 10, i->start),
626 dev->bucket_size << 9);
627 u64 end = round_down(i->end,
628 dev->bucket_size << 9);
630 if (start + (128 << 10) <= end) {
631 dev->sb_offset = start >> 9;
632 dev->sb_end = dev->sb_offset + 256;
637 die("Couldn't find a valid location for superblock");
640 static void migrate_usage(void)
642 puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
643 "Usage: bcachefs migrate [OPTION]...\n"
646 " -f fs Root of filesystem to migrate(s)\n"
647 " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
648 " --no_passphrase Don't encrypt master encryption key\n"
649 " -F Force, even if metadata file already exists\n"
650 " -h Display this help and exit\n"
651 "Report bugs to <linux-bcache@vger.kernel.org>");
654 static const struct option migrate_opts[] = {
655 { "encrypted", no_argument, NULL, 'e' },
656 { "no_passphrase", no_argument, NULL, 'p' },
660 static int migrate_fs(const char *fs_path,
661 struct format_opts format_opts,
664 if (!path_is_fs_root(fs_path))
665 die("%s is not a filysestem root", fs_path);
667 int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
668 struct stat stat = xfstat(fs_fd);
670 if (!S_ISDIR(stat.st_mode))
671 die("%s is not a directory", fs_path);
673 struct dev_opts dev = dev_opts_default();
675 dev.path = dev_t_to_path(stat.st_dev);
676 dev.fd = xopen(dev.path, O_RDWR);
678 unsigned block_size = get_blocksize(dev.path, dev.fd) << 9;
679 BUG_ON(!is_power_of_2(block_size) || block_size < 512);
680 format_opts.block_size = block_size >> 9;
682 char *file_path = mprintf("%s/bcachefs", fs_path);
683 printf("Creating new filesystem on %s in space reserved at %s\n",
684 dev.path, file_path);
686 bch2_pick_bucket_size(format_opts, &dev);
689 ranges extents = reserve_new_fs_space(file_path,
690 format_opts.block_size << 9,
691 get_size(dev.path, dev.fd) / 5,
692 &bcachefs_inum, stat.st_dev, force);
694 find_superblock_space(extents, &dev);
696 struct bch_sb *sb = bch2_format(format_opts, &dev, 1);
697 u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
699 if (format_opts.passphrase)
700 bch2_add_key(sb, format_opts.passphrase);
704 struct bch_opts opts = bch2_opts_empty();
705 struct bch_fs *c = NULL;
706 char *path[1] = { dev.path };
708 opt_set(opts, sb, sb_offset);
709 opt_set(opts, nostart, true);
710 opt_set(opts, noexcl, true);
712 c = bch2_fs_open(path, 1, opts);
714 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
716 mark_unreserved_space(c, extents);
718 const char *err = bch2_fs_start(c);
720 die("Error starting new filesystem: %s", err);
722 copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
726 printf("Migrate complete, running fsck:\n");
727 opt_set(opts, nostart, false);
728 opt_set(opts, nochanges, true);
730 c = bch2_fs_open(path, 1, opts);
732 die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
735 printf("fsck complete\n");
737 printf("To mount the new filesystem, run\n"
738 " mount -t bcachefs -o sb=%llu %s dir\n"
740 "After verifying that the new filesystem is correct, to create a\n"
741 "superblock at the default offset and finish the migration run\n"
742 " bcachefs migrate-superblock -d %s -o %llu\n"
744 "The new filesystem will have a file at /old_migrated_filestem\n"
745 "referencing all disk space that might be used by the existing\n"
746 "filesystem. That file can be deleted once the old filesystem is\n"
747 "no longer needed (and should be deleted prior to running\n"
748 "bcachefs migrate-superblock)\n",
749 sb_offset, dev.path, dev.path, sb_offset);
753 int cmd_migrate(int argc, char *argv[])
755 struct format_opts format_opts = format_opts_default();
756 char *fs_path = NULL;
757 bool no_passphrase = false, force = false;
760 while ((opt = getopt_long(argc, argv, "f:Fh",
761 migrate_opts, NULL)) != -1)
767 format_opts.encrypted = true;
770 no_passphrase = true;
781 die("Please specify a filesytem to migrate");
783 if (format_opts.encrypted && !no_passphrase)
784 format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
786 return migrate_fs(fs_path, format_opts, force);
789 static void migrate_superblock_usage(void)
791 puts("bcachefs migrate-superblock - create default superblock after migrating\n"
792 "Usage: bcachefs migrate-superblock [OPTION]...\n"
795 " -d device Device to create superblock for\n"
796 " -o offset Offset of existing superblock\n"
797 " -h Display this help and exit\n"
798 "Report bugs to <linux-bcache@vger.kernel.org>");
801 int cmd_migrate_superblock(int argc, char *argv[])
807 while ((opt = getopt(argc, argv, "d:o:h")) != -1)
813 ret = kstrtou64(optarg, 10, &offset);
815 die("Invalid offset");
818 migrate_superblock_usage();
823 die("Please specify a device");
826 die("Please specify offset of existing superblock");
828 int fd = xopen(dev, O_RDWR);
829 struct bch_sb *sb = __bch2_super_read(fd, offset);
831 if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
832 die("Can't add superblock: no space left in superblock layout");
835 for (i = 0; i < sb->layout.nr_superblocks; i++)
836 if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
837 die("Superblock layout already has default superblock");
839 memmove(&sb->layout.sb_offset[1],
840 &sb->layout.sb_offset[0],
841 sb->layout.nr_superblocks * sizeof(u64));
842 sb->layout.nr_superblocks++;
844 sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
846 bch2_super_write(fd, sb);