]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - c_src/cmd_migrate.c
move Rust sources to top level, C sources into c_src
[bcachefs-tools-debian] / c_src / cmd_migrate.c
diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c
new file mode 100644 (file)
index 0000000..ea32e4e
--- /dev/null
@@ -0,0 +1,867 @@
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/xattr.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "crypto.h"
+#include "libbcachefs.h"
+
+#include <linux/dcache.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/xattr.h>
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/alloc_background.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/btree_update.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/dirent.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/io_write.h"
+#include "libbcachefs/replicas.h"
+#include "libbcachefs/str_hash.h"
+#include "libbcachefs/super.h"
+#include "libbcachefs/xattr.h"
+
+/* XXX cut and pasted from fsck.c */
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static char *dev_t_to_path(dev_t dev)
+{
+       char link[PATH_MAX], *p;
+       int ret;
+
+       char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
+                                 major(dev), minor(dev));
+       ret = readlink(sysfs_dev, link, sizeof(link));
+       free(sysfs_dev);
+
+       if (ret < 0 || ret >= sizeof(link))
+               die("readlink error while looking up block device: %m");
+
+       link[ret] = '\0';
+
+       p = strrchr(link, '/');
+       if (!p)
+               die("error looking up device name");
+       p++;
+
+       return mprintf("/dev/%s", p);
+}
+
+static bool path_is_fs_root(const char *path)
+{
+       char *line = NULL, *p, *mount;
+       size_t n = 0;
+       FILE *f;
+       bool ret = true;
+
+       f = fopen("/proc/self/mountinfo", "r");
+       if (!f)
+               die("Error getting mount information");
+
+       while (getline(&line, &n, f) != -1) {
+               p = line;
+
+               strsep(&p, " "); /* mount id */
+               strsep(&p, " "); /* parent id */
+               strsep(&p, " "); /* dev */
+               strsep(&p, " "); /* root */
+               mount = strsep(&p, " ");
+               strsep(&p, " ");
+
+               if (mount && !strcmp(path, mount))
+                       goto found;
+       }
+
+       ret = false;
+found:
+       fclose(f);
+       free(line);
+       return ret;
+}
+
+static void mark_unreserved_space(struct bch_fs *c, ranges extents)
+{
+       struct bch_dev *ca = c->devs[0];
+       struct hole_iter iter;
+       struct range i;
+
+       for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
+               u64 b;
+
+               if (i.start == i.end)
+                       return;
+
+               b = sector_to_bucket(ca, i.start >> 9);
+               do {
+                       set_bit(b, ca->buckets_nouse);
+                       b++;
+               } while (bucket_to_sector(ca, b) << 9 < i.end);
+       }
+}
+
+static void update_inode(struct bch_fs *c,
+                        struct bch_inode_unpacked *inode)
+{
+       struct bkey_inode_buf packed;
+       int ret;
+
+       bch2_inode_pack(&packed, inode);
+       packed.inode.k.p.snapshot = U32_MAX;
+       ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
+                               NULL, 0);
+       if (ret)
+               die("error updating inode: %s", bch2_err_str(ret));
+}
+
+static void create_link(struct bch_fs *c,
+                       struct bch_inode_unpacked *parent,
+                       const char *name, u64 inum, mode_t mode)
+{
+       struct qstr qstr = QSTR(name);
+       struct bch_inode_unpacked parent_u;
+       struct bch_inode_unpacked inode;
+
+       int ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_link_trans(trans,
+                               (subvol_inum) { 1, parent->bi_inum }, &parent_u,
+                               (subvol_inum) { 1, inum }, &inode, &qstr));
+       if (ret)
+               die("error creating hardlink: %s", bch2_err_str(ret));
+}
+
+static struct bch_inode_unpacked create_file(struct bch_fs *c,
+                                            struct bch_inode_unpacked *parent,
+                                            const char *name,
+                                            uid_t uid, gid_t gid,
+                                            mode_t mode, dev_t rdev)
+{
+       struct qstr qstr = QSTR(name);
+       struct bch_inode_unpacked new_inode;
+
+       bch2_inode_init_early(c, &new_inode);
+
+       int ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_create_trans(trans,
+                                 (subvol_inum) { 1, parent->bi_inum }, parent,
+                                 &new_inode, &qstr,
+                                 uid, gid, mode, rdev, NULL, NULL,
+                                 (subvol_inum) {}, 0));
+       if (ret)
+               die("error creating %s: %s", name, bch2_err_str(ret));
+
+       return new_inode;
+}
+
+#define for_each_xattr_handler(handlers, handler)              \
+       if (handlers)                                           \
+               for ((handler) = *(handlers)++;                 \
+                       (handler) != NULL;                      \
+                       (handler) = *(handlers)++)
+
+static const struct xattr_handler *xattr_resolve_name(char **name)
+{
+       const struct xattr_handler **handlers = bch2_xattr_handlers;
+       const struct xattr_handler *handler;
+
+       for_each_xattr_handler(handlers, handler) {
+               char *n;
+
+               n = strcmp_prefix(*name, xattr_prefix(handler));
+               if (n) {
+                       if (!handler->prefix ^ !*n) {
+                               if (*n)
+                                       continue;
+                               return ERR_PTR(-EINVAL);
+                       }
+                       *name = n;
+                       return handler;
+               }
+       }
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
+static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                      struct stat *src)
+{
+       dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
+       dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
+       dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
+}
+
+static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                       char *src)
+{
+       struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
+
+       char attrs[XATTR_LIST_MAX];
+       ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
+       if (attrs_size < 0)
+               die("listxattr error: %m");
+
+       char *next, *attr;
+       for (attr = attrs;
+            attr < attrs + attrs_size;
+            attr = next) {
+               next = attr + strlen(attr) + 1;
+
+               char val[XATTR_SIZE_MAX];
+               ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
+
+               if (val_size < 0)
+                       die("error getting xattr val: %m");
+
+               const struct xattr_handler *h = xattr_resolve_name(&attr);
+               struct bch_inode_unpacked inode_u;
+
+               int ret = bch2_trans_do(c, NULL, NULL, 0,
+                               bch2_xattr_set(trans,
+                                              (subvol_inum) { 1, dst->bi_inum },
+                                              &inode_u, &hash_info, attr,
+                                              val, val_size, h->flags, 0));
+               if (ret < 0)
+                       die("error creating xattr: %s", bch2_err_str(ret));
+       }
+}
+
+#define WRITE_DATA_BUF (1 << 20)
+
+static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
+
+static void write_data(struct bch_fs *c,
+                      struct bch_inode_unpacked *dst_inode,
+                      u64 dst_offset, void *buf, size_t len)
+{
+       struct bch_write_op op;
+       struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
+
+       BUG_ON(dst_offset       & (block_bytes(c) - 1));
+       BUG_ON(len              & (block_bytes(c) - 1));
+       BUG_ON(len > WRITE_DATA_BUF);
+
+       bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
+       bch2_bio_map(&op.wbio.bio, buf, len);
+
+       bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
+       op.write_point  = writepoint_hashed(0);
+       op.nr_replicas  = 1;
+       op.subvol       = 1;
+       op.pos          = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
+       op.flags |= BCH_WRITE_SYNC;
+
+       int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
+                                           c->opts.data_replicas, 0);
+       if (ret)
+               die("error reserving space in new filesystem: %s", bch2_err_str(ret));
+
+       closure_call(&op.cl, bch2_write, NULL, NULL);
+
+       BUG_ON(!(op.flags & BCH_WRITE_DONE));
+       dst_inode->bi_sectors += len >> 9;
+
+       if (op.error)
+               die("write error: %s", bch2_err_str(op.error));
+}
+
+static void copy_data(struct bch_fs *c,
+                     struct bch_inode_unpacked *dst_inode,
+                     int src_fd, u64 start, u64 end)
+{
+       while (start < end) {
+               unsigned len = min_t(u64, end - start, sizeof(buf));
+               unsigned pad = round_up(len, block_bytes(c)) - len;
+
+               xpread(src_fd, buf, len, start);
+               memset(buf + len, 0, pad);
+
+               write_data(c, dst_inode, start, buf, len + pad);
+               start += len;
+       }
+}
+
+static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                     u64 logical, u64 physical, u64 length)
+{
+       struct bch_dev *ca = c->devs[0];
+
+       BUG_ON(logical  & (block_bytes(c) - 1));
+       BUG_ON(physical & (block_bytes(c) - 1));
+       BUG_ON(length   & (block_bytes(c) - 1));
+
+       logical         >>= 9;
+       physical        >>= 9;
+       length          >>= 9;
+
+       BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
+
+       while (length) {
+               struct bkey_i_extent *e;
+               BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
+               u64 b = sector_to_bucket(ca, physical);
+               struct disk_reservation res;
+               unsigned sectors;
+               int ret;
+
+               sectors = min(ca->mi.bucket_size -
+                             (physical & (ca->mi.bucket_size - 1)),
+                             length);
+
+               e = bkey_extent_init(&k.k);
+               e->k.p.inode    = dst->bi_inum;
+               e->k.p.offset   = logical + sectors;
+               e->k.p.snapshot = U32_MAX;
+               e->k.size       = sectors;
+               bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
+                                       .offset = physical,
+                                       .dev = 0,
+                                       .gen = *bucket_gen(ca, b),
+                                 });
+
+               ret = bch2_disk_reservation_get(c, &res, sectors, 1,
+                                               BCH_DISK_RESERVATION_NOFAIL);
+               if (ret)
+                       die("error reserving space in new filesystem: %s",
+                           bch2_err_str(ret));
+
+               ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0);
+               if (ret)
+                       die("btree insert error %s", bch2_err_str(ret));
+
+               bch2_disk_reservation_put(c, &res);
+
+               dst->bi_sectors += sectors;
+               logical         += sectors;
+               physical        += sectors;
+               length          -= sectors;
+       }
+}
+
+static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                     char *src)
+{
+       ssize_t ret = readlink(src, buf, sizeof(buf));
+       if (ret < 0)
+               die("readlink error: %m");
+
+       write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
+}
+
+static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                     int src_fd, u64 src_size,
+                     char *src_path, ranges *extents)
+{
+       struct fiemap_iter iter;
+       struct fiemap_extent e;
+
+       fiemap_for_each(src_fd, iter, e)
+               if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
+                       fsync(src_fd);
+                       break;
+               }
+       fiemap_iter_exit(&iter);
+
+       fiemap_for_each(src_fd, iter, e) {
+               u64 src_max = roundup(src_size, block_bytes(c));
+
+               e.fe_length = min(e.fe_length, src_max - e.fe_logical);
+
+               if ((e.fe_logical       & (block_bytes(c) - 1)) ||
+                   (e.fe_length        & (block_bytes(c) - 1)))
+                       die("Unaligned extent in %s - can't handle", src_path);
+
+               if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+                                 FIEMAP_EXTENT_ENCODED|
+                                 FIEMAP_EXTENT_NOT_ALIGNED|
+                                 FIEMAP_EXTENT_DATA_INLINE)) {
+                       copy_data(c, dst, src_fd, e.fe_logical,
+                                 min(src_size - e.fe_logical,
+                                     e.fe_length));
+                       continue;
+               }
+
+               /*
+                * if the data is below 1 MB, copy it so it doesn't conflict
+                * with bcachefs's potentially larger superblock:
+                */
+               if (e.fe_physical < 1 << 20) {
+                       copy_data(c, dst, src_fd, e.fe_logical,
+                                 min(src_size - e.fe_logical,
+                                     e.fe_length));
+                       continue;
+               }
+
+               if ((e.fe_physical      & (block_bytes(c) - 1)))
+                       die("Unaligned extent in %s - can't handle", src_path);
+
+               range_add(extents, e.fe_physical, e.fe_length);
+               link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
+       }
+       fiemap_iter_exit(&iter);
+}
+
+struct copy_fs_state {
+       u64                     bcachefs_inum;
+       dev_t                   dev;
+
+       GENRADIX(u64)           hardlinks;
+       ranges                  extents;
+};
+
+static void copy_dir(struct copy_fs_state *s,
+                    struct bch_fs *c,
+                    struct bch_inode_unpacked *dst,
+                    int src_fd, const char *src_path)
+{
+       DIR *dir = fdopendir(src_fd);
+       struct dirent *d;
+
+       while ((errno = 0), (d = readdir(dir))) {
+               struct bch_inode_unpacked inode;
+               int fd;
+
+               if (fchdir(src_fd))
+                       die("chdir error: %m");
+
+               struct stat stat =
+                       xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
+
+               if (!strcmp(d->d_name, ".") ||
+                   !strcmp(d->d_name, "..") ||
+                   !strcmp(d->d_name, "lost+found") ||
+                   stat.st_ino == s->bcachefs_inum)
+                       continue;
+
+               char *child_path = mprintf("%s/%s", src_path, d->d_name);
+
+               if (stat.st_dev != s->dev)
+                       die("%s does not have correct st_dev!", child_path);
+
+               u64 *dst_inum = S_ISREG(stat.st_mode)
+                       ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
+                       : NULL;
+
+               if (dst_inum && *dst_inum) {
+                       create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
+                       goto next;
+               }
+
+               inode = create_file(c, dst, d->d_name,
+                                   stat.st_uid, stat.st_gid,
+                                   stat.st_mode, stat.st_rdev);
+
+               if (dst_inum)
+                       *dst_inum = inode.bi_inum;
+
+               copy_times(c, &inode, &stat);
+               copy_xattrs(c, &inode, d->d_name);
+
+               /* copy xattrs */
+
+               switch (mode_to_type(stat.st_mode)) {
+               case DT_DIR:
+                       fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+                       copy_dir(s, c, &inode, fd, child_path);
+                       close(fd);
+                       break;
+               case DT_REG:
+                       inode.bi_size = stat.st_size;
+
+                       fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+                       copy_file(c, &inode, fd, stat.st_size,
+                                 child_path, &s->extents);
+                       close(fd);
+                       break;
+               case DT_LNK:
+                       inode.bi_size = stat.st_size;
+
+                       copy_link(c, &inode, d->d_name);
+                       break;
+               case DT_FIFO:
+               case DT_CHR:
+               case DT_BLK:
+               case DT_SOCK:
+               case DT_WHT:
+                       /* nothing else to copy for these: */
+                       break;
+               default:
+                       BUG();
+               }
+
+               update_inode(c, &inode);
+next:
+               free(child_path);
+       }
+
+       if (errno)
+               die("readdir error: %m");
+       closedir(dir);
+}
+
+static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
+                                  u64 size, u64 *bcachefs_inum, dev_t dev,
+                                  bool force)
+{
+       int fd = force
+               ? open(file_path, O_RDWR|O_CREAT, 0600)
+               : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
+       if (fd < 0)
+               die("Error creating %s for bcachefs metadata: %m",
+                   file_path);
+
+       struct stat statbuf = xfstat(fd);
+
+       if (statbuf.st_dev != dev)
+               die("bcachefs file has incorrect device");
+
+       *bcachefs_inum = statbuf.st_ino;
+
+       if (fallocate(fd, 0, 0, size))
+               die("Error reserving space for bcachefs metadata: %m");
+
+       fsync(fd);
+
+       struct fiemap_iter iter;
+       struct fiemap_extent e;
+       ranges extents = { 0 };
+
+       fiemap_for_each(fd, iter, e) {
+               if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+                                 FIEMAP_EXTENT_ENCODED|
+                                 FIEMAP_EXTENT_NOT_ALIGNED|
+                                 FIEMAP_EXTENT_DATA_INLINE))
+                       die("Unable to continue: metadata file not fully mapped");
+
+               if ((e.fe_physical      & (block_size - 1)) ||
+                   (e.fe_length        & (block_size - 1)))
+                       die("Unable to continue: unaligned extents in metadata file");
+
+               range_add(&extents, e.fe_physical, e.fe_length);
+       }
+       fiemap_iter_exit(&iter);
+       close(fd);
+
+       ranges_sort_merge(&extents);
+       return extents;
+}
+
+static void reserve_old_fs_space(struct bch_fs *c,
+                                struct bch_inode_unpacked *root_inode,
+                                ranges *extents)
+{
+       struct bch_dev *ca = c->devs[0];
+       struct bch_inode_unpacked dst;
+       struct hole_iter iter;
+       struct range i;
+
+       dst = create_file(c, root_inode, "old_migrated_filesystem",
+                         0, 0, S_IFREG|0400, 0);
+       dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
+
+       ranges_sort_merge(extents);
+
+       for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
+               link_data(c, &dst, i.start, i.start, i.end - i.start);
+
+       update_inode(c, &dst);
+}
+
+static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
+                   u64 bcachefs_inum, ranges *extents)
+{
+       syncfs(src_fd);
+
+       struct bch_inode_unpacked root_inode;
+       int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
+                                         &root_inode);
+       if (ret)
+               die("error looking up root directory: %s", bch2_err_str(ret));
+
+       if (fchdir(src_fd))
+               die("chdir error: %m");
+
+       struct stat stat = xfstat(src_fd);
+       copy_times(c, &root_inode, &stat);
+       copy_xattrs(c, &root_inode, ".");
+
+       struct copy_fs_state s = {
+               .bcachefs_inum  = bcachefs_inum,
+               .dev            = stat.st_dev,
+               .extents        = *extents,
+       };
+
+       /* now, copy: */
+       copy_dir(&s, c, &root_inode, src_fd, src_path);
+
+       reserve_old_fs_space(c, &root_inode, &s.extents);
+
+       update_inode(c, &root_inode);
+
+       darray_exit(&s.extents);
+       genradix_free(&s.hardlinks);
+}
+
+static void find_superblock_space(ranges extents,
+                                 struct format_opts opts,
+                                 struct dev_opts *dev)
+{
+       darray_for_each(extents, i) {
+               u64 start = round_up(max(256ULL << 10, i->start),
+                                    dev->bucket_size << 9);
+               u64 end = round_down(i->end,
+                                    dev->bucket_size << 9);
+
+               /* Need space for two superblocks: */
+               if (start + (opts.superblock_size << 9) * 2 <= end) {
+                       dev->sb_offset  = start >> 9;
+                       dev->sb_end     = dev->sb_offset + opts.superblock_size * 2;
+                       return;
+               }
+       }
+
+       die("Couldn't find a valid location for superblock");
+}
+
+static void migrate_usage(void)
+{
+       puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
+            "Usage: bcachefs migrate [OPTION]...\n"
+            "\n"
+            "Options:\n"
+            "  -f fs                  Root of filesystem to migrate(s)\n"
+            "      --encrypted        Enable whole filesystem encryption (chacha20/poly1305)\n"
+            "      --no_passphrase    Don't encrypt master encryption key\n"
+            "  -F                     Force, even if metadata file already exists\n"
+            "  -h                     Display this help and exit\n"
+            "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static const struct option migrate_opts[] = {
+       { "encrypted",          no_argument, NULL, 'e' },
+       { "no_passphrase",      no_argument, NULL, 'p' },
+       { NULL }
+};
+
+static int migrate_fs(const char               *fs_path,
+                     struct bch_opt_strs       fs_opt_strs,
+                     struct bch_opts           fs_opts,
+                     struct format_opts        format_opts,
+                     bool force)
+{
+       if (!path_is_fs_root(fs_path))
+               die("%s is not a filysestem root", fs_path);
+
+       int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
+       struct stat stat = xfstat(fs_fd);
+
+       if (!S_ISDIR(stat.st_mode))
+               die("%s is not a directory", fs_path);
+
+       struct dev_opts dev = dev_opts_default();
+
+       dev.path = dev_t_to_path(stat.st_dev);
+       dev.bdev = blkdev_get_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
+
+       opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
+
+       char *file_path = mprintf("%s/bcachefs", fs_path);
+       printf("Creating new filesystem on %s in space reserved at %s\n",
+              dev.path, file_path);
+
+       dev.size        = get_size(dev.bdev->bd_fd);
+       dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
+       dev.nbuckets    = dev.size / dev.bucket_size;
+
+       bch2_check_bucket_size(fs_opts, &dev);
+
+       u64 bcachefs_inum;
+       ranges extents = reserve_new_fs_space(file_path,
+                               fs_opts.block_size >> 9,
+                               get_size(dev.bdev->bd_fd) / 5,
+                               &bcachefs_inum, stat.st_dev, force);
+
+       find_superblock_space(extents, format_opts, &dev);
+
+       struct bch_sb *sb = bch2_format(fs_opt_strs,
+                                       fs_opts, format_opts, &dev, 1);
+       u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
+
+       if (format_opts.passphrase)
+               bch2_add_key(sb, "user", "user", format_opts.passphrase);
+
+       free(sb);
+
+       struct bch_opts opts = bch2_opts_empty();
+       struct bch_fs *c = NULL;
+       char *path[1] = { dev.path };
+
+       opt_set(opts, sb,       sb_offset);
+       opt_set(opts, nostart,  true);
+       opt_set(opts, noexcl,   true);
+       opt_set(opts, buckets_nouse, true);
+
+       c = bch2_fs_open(path, 1, opts);
+       if (IS_ERR(c))
+               die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
+
+       mark_unreserved_space(c, extents);
+
+       int ret = bch2_fs_start(c);
+       if (ret)
+               die("Error starting new filesystem: %s", bch2_err_str(ret));
+
+       copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
+
+       bch2_fs_stop(c);
+
+       printf("Migrate complete, running fsck:\n");
+       opt_set(opts, nostart,  false);
+       opt_set(opts, nochanges, true);
+       opt_set(opts, read_only, true);
+
+       c = bch2_fs_open(path, 1, opts);
+       if (IS_ERR(c))
+               die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
+
+       bch2_fs_stop(c);
+       printf("fsck complete\n");
+
+       printf("To mount the new filesystem, run\n"
+              "  mount -t bcachefs -o sb=%llu %s dir\n"
+              "\n"
+              "After verifying that the new filesystem is correct, to create a\n"
+              "superblock at the default offset and finish the migration run\n"
+              "  bcachefs migrate-superblock -d %s -o %llu\n"
+              "\n"
+              "The new filesystem will have a file at /old_migrated_filestem\n"
+              "referencing all disk space that might be used by the existing\n"
+              "filesystem. That file can be deleted once the old filesystem is\n"
+              "no longer needed (and should be deleted prior to running\n"
+              "bcachefs migrate-superblock)\n",
+              sb_offset, dev.path, dev.path, sb_offset);
+       return 0;
+}
+
+int cmd_migrate(int argc, char *argv[])
+{
+       struct format_opts format_opts = format_opts_default();
+       char *fs_path = NULL;
+       bool no_passphrase = false, force = false;
+       int opt;
+
+       struct bch_opt_strs fs_opt_strs =
+               bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
+       struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+       while ((opt = getopt_long(argc, argv, "f:Fh",
+                                 migrate_opts, NULL)) != -1)
+               switch (opt) {
+               case 'f':
+                       fs_path = optarg;
+                       break;
+               case 'e':
+                       format_opts.encrypted = true;
+                       break;
+               case 'p':
+                       no_passphrase = true;
+                       break;
+               case 'F':
+                       force = true;
+                       break;
+               case 'h':
+                       migrate_usage();
+                       exit(EXIT_SUCCESS);
+               }
+
+       if (!fs_path)
+               die("Please specify a filesystem to migrate");
+
+       if (format_opts.encrypted && !no_passphrase)
+               format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
+
+       int ret = migrate_fs(fs_path,
+                            fs_opt_strs,
+                            fs_opts,
+                            format_opts, force);
+       bch2_opt_strs_free(&fs_opt_strs);
+       return ret;
+}
+
+static void migrate_superblock_usage(void)
+{
+       puts("bcachefs migrate-superblock - create default superblock after migrating\n"
+            "Usage: bcachefs migrate-superblock [OPTION]...\n"
+            "\n"
+            "Options:\n"
+            "  -d device     Device to create superblock for\n"
+            "  -o offset     Offset of existing superblock\n"
+            "  -h            Display this help and exit\n"
+            "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_migrate_superblock(int argc, char *argv[])
+{
+       char *dev = NULL;
+       u64 offset = 0;
+       int opt, ret;
+
+       while ((opt = getopt(argc, argv, "d:o:h")) != -1)
+               switch (opt) {
+                       case 'd':
+                               dev = optarg;
+                               break;
+                       case 'o':
+                               ret = kstrtou64(optarg, 10, &offset);
+                               if (ret)
+                                       die("Invalid offset");
+                               break;
+                       case 'h':
+                               migrate_superblock_usage();
+                               exit(EXIT_SUCCESS);
+               }
+
+       if (!dev)
+               die("Please specify a device");
+
+       if (!offset)
+               die("Please specify offset of existing superblock");
+
+       int fd = xopen(dev, O_RDWR);
+       struct bch_sb *sb = __bch2_super_read(fd, offset);
+
+       if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
+               die("Can't add superblock: no space left in superblock layout");
+
+       unsigned i;
+       for (i = 0; i < sb->layout.nr_superblocks; i++)
+               if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
+                       die("Superblock layout already has default superblock");
+
+       memmove(&sb->layout.sb_offset[1],
+               &sb->layout.sb_offset[0],
+               sb->layout.nr_superblocks * sizeof(u64));
+       sb->layout.nr_superblocks++;
+
+       sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
+
+       bch2_super_write(fd, sb);
+       close(fd);
+
+       return 0;
+}