]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Encryption support
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 17 Aug 2016 21:23:03 +0000 (13:23 -0800)
committerKent Overstreet <kent.overstreet@gmail.com>
Wed, 17 Aug 2016 21:23:03 +0000 (13:23 -0800)
12 files changed:
Makefile
bcache-format.c
bcache-key.c [new file with mode: 0644]
bcache-key.h [new file with mode: 0644]
bcache-ondisk.h
bcache.c
crypto.c [new file with mode: 0644]
crypto.h [new file with mode: 0644]
libbcache.c [new file with mode: 0644]
libbcache.h [new file with mode: 0644]
util.c
util.h

index 248ff2032f05d44d9c94955feae8ab17f555613e..aef4e9e3404519060532222f2803b94b3a5ddad9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -29,9 +29,9 @@ util.o: CFLAGS += `pkg-config --cflags blkid uuid`
 bcache.o: CFLAGS += `pkg-config --cflags libnih`
 
 bcache-objs = bcache.o bcache-assemble.o bcache-device.o bcache-format.o\
-       bcache-fs.o bcache-run.o
+       bcache-fs.o bcache-run.o bcache-key.o libbcache.o crypto.o
 
-bcache: LDLIBS += `pkg-config --libs uuid blkid libnih`
+bcache: LDLIBS += `pkg-config --libs uuid blkid libnih` -lscrypt -lsodium -lkeyutils
 bcache: $(bcache-objs) util.o libccan.a
 
 bcache-test: LDLIBS += `pkg-config --libs openssl`
index 665a309c55a5eaedc8bcbc4d30ba3e6f731d3c1b..e80ebed639ad4078f7a80a478ff2f03e7a21166a 100644 (file)
 #include <nih/command.h>
 #include <nih/option.h>
 
-#include "ccan/ilog/ilog.h"
 #include "ccan/darray/darray.h"
 
 #include "bcache.h"
+#include "libbcache.h"
 #include "bcache-format.h"
-
-struct cache_opts {
-       int             fd;
-       const char      *dev;
-       unsigned        bucket_size;
-       unsigned        tier;
-       unsigned        replacement_policy;
-       unsigned        replication_set;
-       u64             size; /* 512 byte sectors */
-
-       u64             first_bucket;
-       u64             nbuckets;
-};
-
-struct backingdev_opts {
-       int             fd;
-       const char      *dev;
-       const char      *label;
-};
-
-static darray(struct cache_opts) cache_devices;
-static darray(struct backingdev_opts) backing_devices;
-
-static char *label = NULL;
+#include "crypto.h"
 
 /* All in units of 512 byte sectors */
-static unsigned block_size, bucket_size, btree_node_size;
-static u64 filesystem_size;
-static unsigned tier, replacement_policy;
 
-static uuid_le set_uuid, user_uuid;
+static darray(struct dev_opts) cache_devices;
+
+static unsigned block_size, btree_node_size;
 static unsigned meta_csum_type = BCH_CSUM_CRC32C;
 static unsigned data_csum_type = BCH_CSUM_CRC32C;
 static unsigned compression_type = BCH_COMPRESSION_NONE;
-
-static unsigned replication_set, meta_replicas = 1, data_replicas = 1;
+static int encrypted;
+static unsigned meta_replicas = 1, data_replicas = 1;
 static unsigned on_error_action;
-static int discard;
-static unsigned version = 1;
+static char *label = NULL;
+static uuid_le uuid;
 
-static u64 data_offset = BDEV_DATA_START_DEFAULT;
-static unsigned cache_mode = CACHE_MODE_WRITEBACK;
+/* Device specific options: */
+static u64 filesystem_size;
+static unsigned bucket_size;
+static unsigned tier;
+static unsigned replacement_policy;
+static int discard;
 
 static int set_cache(NihOption *option, const char *arg)
 {
-       darray_append(cache_devices, (struct cache_opts) {
+       darray_append(cache_devices, (struct dev_opts) {
                .fd                     = dev_open(arg),
                .dev                    = strdup(arg),
+               .size                   = filesystem_size,
                .bucket_size            = bucket_size,
                .tier                   = tier,
                .replacement_policy     = replacement_policy,
-               .replication_set        = replication_set,
-               .size                   = filesystem_size,
-       });
-       return 0;
-}
-
-static int set_bdev(NihOption *option, const char *arg)
-{
-       darray_append(backing_devices, (struct backingdev_opts) {
-               .fd                     = dev_open(arg),
-               .dev                    = strdup(arg),
-               .label                  = label ? strdup(label) : NULL,
+               .discard                = discard,
        });
        return 0;
 }
 
-static int set_cache_set_uuid(NihOption *option, const char *arg)
+static int set_uuid(NihOption *option, const char *arg)
 {
-       if (uuid_parse(arg, user_uuid.b))
+       if (uuid_parse(arg, uuid.b))
                die("Bad uuid");
        return 0;
 }
@@ -158,13 +128,6 @@ static int set_tier(NihOption *option, const char *arg)
        return 0;
 }
 
-static int set_replication_set(NihOption *option, const char *arg)
-{
-       replication_set = strtoul_or_die(arg, CACHE_REPLICATION_SET_MAX,
-                                        "replication set");
-       return 0;
-}
-
 static int set_meta_replicas(NihOption *option, const char *arg)
 {
        meta_replicas = strtoul_or_die(arg, CACHE_SET_META_REPLICAS_WANT_MAX,
@@ -179,359 +142,97 @@ static int set_data_replicas(NihOption *option, const char *arg)
        return 0;
 }
 
-static int set_cache_mode(NihOption *option, const char *arg)
-{
-       cache_mode = read_string_list_or_die(arg, bdev_cache_mode,
-                                            "cache mode");
-       return 0;
-}
-
-static int set_version(NihOption *option, const char *arg)
-{
-       version = strtoul_or_die(arg, 2, "version");
-       return 0;
-}
-
 NihOption opts_format[] = {
 //     { int shortoption, char *longoption, char *help, NihOptionGroup, char *argname, void *value, NihOptionSetter}
 
        { 'C',  "cache",                N_("Format a cache device"),
                NULL, "dev",    NULL,   set_cache },
-       { 'B',  "bdev",                 N_("Format a backing device"),
-               NULL, "dev",    NULL,   set_bdev },
 
-       { 'l',  "label",                N_("label"),
-               NULL, "label",  &label, NULL},
-       { 0,    "cset_uuid",            N_("UUID for the cache set"),
-               NULL, "uuid",   NULL,   set_cache_set_uuid },
-
-       { 'w',  "block",                N_("block size (hard sector size of SSD, often 2k"),
+       { 'w',  "block",                N_("block size"),
                NULL, "size",   NULL,   set_block_size },
-       { 'b',  "bucket",               N_("bucket size"),
-               NULL, "size",   NULL,   set_bucket_sizes },
        { 'n',  "btree_node",           N_("Btree node size, default 256k"),
                NULL, "size",   NULL,   set_btree_node_size },
-       { 0,    "fs_size",              N_("Size of filesystem on device" ),
-               NULL, "size",   NULL,   set_filesystem_size },
-
-       { 'p',  "cache_replacement_policy", NULL,
-               NULL, "(lru|fifo|random)", NULL, set_replacement_policy },
 
        { 0,    "metadata_csum_type",   N_("Checksum type"),
                NULL, "(none|crc32c|crc64)", &meta_csum_type, set_csum_type },
-
        { 0,    "data_csum_type",       N_("Checksum type"),
                NULL, "(none|crc32c|crc64)", &data_csum_type, set_csum_type },
-
        { 0,    "compression_type",     N_("Compression type"),
                NULL, "(none|gzip)", NULL, set_compression_type },
-
-       { 0,    "error_action",         N_("Action to take on filesystem error"),
-               NULL, "(continue|readonly|panic)", NULL, set_on_error_action },
-
-       { 0,    "discard",              N_("Enable discards"),
-               NULL, NULL,             &discard,       NULL },
-
-       { 't',  "tier",                 N_("tier of subsequent devices"),
-               NULL, "#",      NULL,   set_tier },
-
-       { 0,    "replication_set",      N_("replication set of subsequent devices"),
-               NULL, "#",      NULL,   set_replication_set },
+       { 0,    "encrypted",            N_("enable encryption"),
+               NULL, NULL,             &encrypted,     NULL },
 
        { 0,    "meta_replicas",        N_("number of metadata replicas"),
                NULL, "#",      NULL,   set_meta_replicas },
-
        { 0,    "data_replicas",        N_("number of data replicas"),
                NULL, "#",      NULL,   set_data_replicas },
 
-       { 0,    "cache_mode",           N_("Cache mode (for backing devices)"),
-               NULL, "(writethrough|writeback|writearound", NULL, set_cache_mode },
+       { 0,    "error_action",         N_("Action to take on filesystem error"),
+               NULL, "(continue|readonly|panic)", NULL, set_on_error_action },
 
-       { 'o',  "data_offset",          N_("data offset in sectors"),
-               NULL, "offset", &data_offset, NULL},
+       { 'l',  "label",                N_("label"),
+               NULL, "label",  &label, NULL},
+       { 0,    "uuid",                 N_("filesystem UUID"),
+               NULL, "uuid",   NULL,   set_uuid },
 
-       { 'v',  "version",              N_("superblock version"),
-               NULL, "#",      NULL,   set_version},
+       /* Device specific options: */
+       { 0,    "fs_size",              N_("Size of filesystem on device" ),
+               NULL, "size",   NULL,   set_filesystem_size },
+       { 'b',  "bucket",               N_("bucket size"),
+               NULL, "size",   NULL,   set_bucket_sizes },
+       { 't',  "tier",                 N_("tier of subsequent devices"),
+               NULL, "#",      NULL,   set_tier },
+       { 'p',  "cache_replacement_policy", NULL,
+               NULL, "(lru|fifo|random)", NULL, set_replacement_policy },
+       { 0,    "discard",              N_("Enable discards"),
+               NULL, NULL,             &discard,       NULL },
 
        NIH_OPTION_LAST
 };
 
-void __do_write_sb(int fd, void *sb, size_t bytes)
-{
-       char zeroes[SB_SECTOR << 9] = {0};
-
-       /* Zero start of disk */
-       if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) {
-               perror("write error trying to zero start of disk\n");
-               exit(EXIT_FAILURE);
-       }
-       /* Write superblock */
-       if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) {
-               perror("write error trying to write superblock\n");
-               exit(EXIT_FAILURE);
-       }
-
-       fsync(fd);
-       close(fd);
-}
-
-#define do_write_sb(_fd, _sb)                  \
-       __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
-
-void write_backingdev_sb(int fd, unsigned block_size, unsigned mode,
-                        u64 data_offset, const char *label,
-                        uuid_le set_uuid)
-{
-       char uuid_str[40];
-       struct backingdev_sb sb;
-
-       memset(&sb, 0, sizeof(struct cache_sb));
-
-       sb.offset       = SB_SECTOR;
-       sb.version      = BCACHE_SB_VERSION_BDEV;
-       sb.magic        = BCACHE_MAGIC;
-       uuid_generate(sb.disk_uuid.b);
-       sb.set_uuid     = set_uuid;
-       sb.block_size   = block_size;
-
-       uuid_unparse(sb.disk_uuid.b, uuid_str);
-       if (label)
-               memcpy(sb.label, label, SB_LABEL_SIZE);
-
-       SET_BDEV_CACHE_MODE(&sb, mode);
-
-       if (data_offset != BDEV_DATA_START_DEFAULT) {
-               sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
-               sb.data_offset = data_offset;
-       }
-
-       sb.csum = csum_set(&sb, BCH_CSUM_CRC64);
-
-       printf("UUID:                   %s\n"
-              "version:                %u\n"
-              "block_size:             %u\n"
-              "data_offset:            %llu\n",
-              uuid_str, (unsigned) sb.version,
-              sb.block_size, data_offset);
-
-       do_write_sb(fd, &sb);
-}
-
-static void format_v0(void)
-{
-       struct cache_opts *i;
-
-       set_uuid = user_uuid;
-
-       darray_foreach(i, cache_devices)
-               bucket_size = min(bucket_size, i->bucket_size);
-
-       struct cache_sb_v0 *sb = calloc(1, sizeof(*sb));
-
-       sb->offset              = SB_SECTOR;
-       sb->version             = BCACHE_SB_VERSION_CDEV_WITH_UUID;
-       sb->magic               = BCACHE_MAGIC;
-       sb->block_size          = block_size;
-       sb->bucket_size         = bucket_size;
-       sb->set_uuid            = set_uuid;
-       sb->nr_in_set           = darray_size(cache_devices);
-
-       if (label)
-               memcpy(sb->label, label, sizeof(sb->label));
-
-       darray_foreach(i, cache_devices) {
-               char uuid_str[40], set_uuid_str[40];
-
-               uuid_generate(sb->uuid.b);
-               sb->nbuckets            = i->nbuckets;
-               sb->first_bucket        = i->first_bucket;
-               sb->nr_this_dev         = i - cache_devices.item;
-               sb->csum                = csum_set(sb, BCH_CSUM_CRC64);
-
-               uuid_unparse(sb->uuid.b, uuid_str);
-               uuid_unparse(sb->set_uuid.b, set_uuid_str);
-               printf("UUID:                   %s\n"
-                      "Set UUID:               %s\n"
-                      "version:                %u\n"
-                      "nbuckets:               %llu\n"
-                      "block_size:             %u\n"
-                      "bucket_size:            %u\n"
-                      "nr_in_set:              %u\n"
-                      "nr_this_dev:            %u\n"
-                      "first_bucket:           %u\n",
-                      uuid_str, set_uuid_str,
-                      (unsigned) sb->version,
-                      sb->nbuckets,
-                      sb->block_size,
-                      sb->bucket_size,
-                      sb->nr_in_set,
-                      sb->nr_this_dev,
-                      sb->first_bucket);
-
-               do_write_sb(i->fd, sb);
-       }
-}
-
-static void format_v1(void)
-{
-       struct cache_sb *sb;
-       struct cache_opts *i;
-
-       sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) *
-                   darray_size(cache_devices));
-
-       sb->offset      = __cpu_to_le64(SB_SECTOR);
-       sb->version     = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3);
-       sb->magic       = BCACHE_MAGIC;
-       sb->block_size  = __cpu_to_le16(block_size);
-       sb->set_uuid    = set_uuid;
-       sb->user_uuid   = user_uuid;
-       sb->nr_in_set   = darray_size(cache_devices);
-
-       if (label)
-               memcpy(sb->label, label, sizeof(sb->label));
-
-       /*
-        * don't have a userspace crc32c implementation handy, just always use
-        * crc64
-        */
-       SET_CACHE_SB_CSUM_TYPE(sb,              BCH_CSUM_CRC64);
-       SET_CACHE_META_PREFERRED_CSUM_TYPE(sb,  meta_csum_type);
-       SET_CACHE_DATA_PREFERRED_CSUM_TYPE(sb,  data_csum_type);
-       SET_CACHE_COMPRESSION_TYPE(sb,          compression_type);
-
-       SET_CACHE_BTREE_NODE_SIZE(sb,           btree_node_size);
-       SET_CACHE_SET_META_REPLICAS_WANT(sb,    meta_replicas);
-       SET_CACHE_SET_META_REPLICAS_HAVE(sb,    meta_replicas);
-       SET_CACHE_SET_DATA_REPLICAS_WANT(sb,    data_replicas);
-       SET_CACHE_SET_DATA_REPLICAS_HAVE(sb,    data_replicas);
-       SET_CACHE_ERROR_ACTION(sb,              on_error_action);
-
-       darray_foreach(i, cache_devices) {
-               struct cache_member *m = sb->members +
-                       (i - cache_devices.item);
-
-               uuid_generate(m->uuid.b);
-               m->nbuckets     = __cpu_to_le64(i->nbuckets);
-               m->first_bucket = __cpu_to_le16(i->first_bucket);
-               m->bucket_size  = __cpu_to_le16(i->bucket_size);
-
-               if (__le64_to_cpu(m->nbuckets < 1 << 7))
-                       die("Not enough buckets: %llu, need %u",
-                           __le64_to_cpu(m->nbuckets), 1 << 7);
-
-               SET_CACHE_TIER(m,               i->tier);
-               SET_CACHE_REPLICATION_SET(m,    i->replication_set);
-               SET_CACHE_REPLACEMENT(m,        i->replacement_policy);
-               SET_CACHE_DISCARD(m,            discard);
-       }
-
-       sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb));
-
-       darray_foreach(i, cache_devices) {
-               char uuid_str[40], set_uuid_str[40];
-               struct cache_member *m = sb->members +
-                       (i - cache_devices.item);
-
-               sb->disk_uuid   = m->uuid;
-               sb->nr_this_dev = i - cache_devices.item;
-               sb->csum        = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s),
-                                                          CACHE_SB_CSUM_TYPE(sb)));
-
-               uuid_unparse(sb->disk_uuid.b, uuid_str);
-               uuid_unparse(sb->user_uuid.b, set_uuid_str);
-               printf("UUID:                   %s\n"
-                      "Set UUID:               %s\n"
-                      "version:                %u\n"
-                      "nbuckets:               %llu\n"
-                      "block_size:             %u\n"
-                      "bucket_size:            %u\n"
-                      "nr_in_set:              %u\n"
-                      "nr_this_dev:            %u\n"
-                      "first_bucket:           %u\n",
-                      uuid_str, set_uuid_str,
-                      (unsigned) sb->version,
-                      __le64_to_cpu(m->nbuckets),
-                      __le16_to_cpu(sb->block_size),
-                      __le16_to_cpu(m->bucket_size),
-                      sb->nr_in_set,
-                      sb->nr_this_dev,
-                      __le16_to_cpu(m->first_bucket));
-
-               do_write_sb(i->fd, sb);
-       }
-}
-
 int cmd_format(NihCommand *command, char * const *args)
 {
-       struct cache_opts *i;
-       struct backingdev_opts *ib;
+       char *passphrase = NULL;
 
-       if (!darray_size(cache_devices) &&
-           !darray_size(backing_devices))
+       if (!darray_size(cache_devices))
                die("Please supply a device");
 
-       if (uuid_is_null(user_uuid.b))
-               uuid_generate(user_uuid.b);
-
-       uuid_generate(set_uuid.b);
-
-       if (!block_size) {
-               darray_foreach(i, cache_devices)
-                       block_size = max(block_size,
-                                        get_blocksize(i->dev, i->fd));
-
-               darray_foreach(ib, backing_devices)
-                       block_size = max(block_size,
-                                        get_blocksize(ib->dev, ib->fd));
-       }
+       if (uuid_is_null(uuid.b))
+               uuid_generate(uuid.b);
 
-       darray_foreach(i, cache_devices) {
-               if (!i->size)
-                       i->size = get_size(i->dev, i->fd);
+       if (encrypted) {
+               char *pass2;
 
-               if (!i->bucket_size) {
-                       u64 bytes = i->size << 9;
+               passphrase = read_passphrase("Enter passphrase: ");
+               pass2 = read_passphrase("Enter same passphrase again: ");
 
-                       if (bytes < 1 << 20) /* 1M device - 256 4k buckets*/
-                               i->bucket_size = rounddown_pow_of_two(bytes >> 17);
-                       else
-                               /* Max 1M bucket at around 256G */
-                               i->bucket_size = 8 << min((ilog2(bytes >> 20) / 2), 9U);
+               if (strcmp(passphrase, pass2)) {
+                       memzero_explicit(passphrase, strlen(passphrase));
+                       memzero_explicit(pass2, strlen(pass2));
+                       die("Passphrases do not match");
                }
 
-               if (i->bucket_size < block_size)
-                       die("Bucket size cannot be smaller than block size");
-
-               i->nbuckets     = i->size / i->bucket_size;
-               i->first_bucket = (23 / i->bucket_size) + 3;
-
-               if (i->nbuckets < 1 << 7)
-                       die("Not enough buckets: %llu, need %u",
-                           i->nbuckets, 1 << 7);
+               memzero_explicit(pass2, strlen(pass2));
+               free(pass2);
        }
 
-       if (!btree_node_size) {
-               /* 256k default btree node size */
-               btree_node_size = 512;
-
-               darray_foreach(i, cache_devices)
-                       btree_node_size = min(btree_node_size, i->bucket_size);
+       bcache_format(cache_devices.item, darray_size(cache_devices),
+                     block_size,
+                     btree_node_size,
+                     meta_csum_type,
+                     data_csum_type,
+                     compression_type,
+                     passphrase,
+                     meta_replicas,
+                     data_replicas,
+                     on_error_action,
+                     label,
+                     uuid);
+
+       if (passphrase) {
+               memzero_explicit(passphrase, strlen(passphrase));
+               free(passphrase);
        }
 
-       switch (version) {
-       case 0:
-               format_v0();
-               break;
-       case 1:
-               format_v1();
-               break;
-       }
-
-       darray_foreach(ib, backing_devices)
-               write_backingdev_sb(ib->fd, block_size, cache_mode,
-                                   data_offset, ib->label,
-                                   set_uuid);
-
        return 0;
 }
diff --git a/bcache-key.c b/bcache-key.c
new file mode 100644 (file)
index 0000000..53dbe37
--- /dev/null
@@ -0,0 +1,58 @@
+#include <errno.h>
+#include <unistd.h>
+#include <keyutils.h>
+#include <uuid/uuid.h>
+#include <nih/command.h>
+#include <nih/option.h>
+
+#include "bcache.h"
+#include "libbcache.h"
+#include "crypto.h"
+
+NihOption opts_unlock[] = {
+       NIH_OPTION_LAST
+};
+
+int cmd_unlock(NihCommand *command, char * const *args)
+{
+       struct bcache_disk_key disk_key;
+       struct bcache_key key;
+       struct cache_sb sb;
+       char *passphrase;
+       char uuid[40];
+       char description[60];
+
+       if (!args[0] || args[1])
+               die("please supply a single device");
+
+       bcache_super_read(args[0], &sb);
+
+       if (!CACHE_SET_ENCRYPTION_KEY(&sb))
+               die("filesystem is not encrypted");
+
+       memcpy(&disk_key, sb.encryption_key, sizeof(disk_key));
+
+       if (!memcmp(&disk_key, bch_key_header, sizeof(bch_key_header)))
+               die("filesystem does not have encryption key");
+
+       passphrase = read_passphrase("Enter passphrase: ");
+
+       derive_passphrase(&key, passphrase);
+       disk_key_encrypt(&disk_key, &key);
+
+       if (memcmp(&disk_key, bch_key_header, sizeof(bch_key_header)))
+               die("incorrect passphrase");
+
+       uuid_unparse_lower(sb.user_uuid.b, uuid);
+       sprintf(description, "bcache:%s", uuid);
+
+       if (add_key("logon", description, &key, sizeof(key),
+                   KEY_SPEC_USER_KEYRING) < 0)
+               die("add_key error: %s", strerror(errno));
+
+       memzero_explicit(&disk_key, sizeof(disk_key));
+       memzero_explicit(&key, sizeof(key));
+       memzero_explicit(passphrase, strlen(passphrase));
+       free(passphrase);
+       return 0;
+}
diff --git a/bcache-key.h b/bcache-key.h
new file mode 100644 (file)
index 0000000..0a4df25
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _BCACHE_KEY_H
+#define _BCACHE_KEY_H
+
+extern NihOption opts_unlock[];
+int cmd_unlock(NihCommand *, char * const *);
+
+#endif /* _BCACHE_KEY_H */
index 8981bbd3134a402bb220034cb501a944204be52e..4ceb10bb2113bcf3c24bbc4ecbb7932a032af472 100644 (file)
@@ -77,7 +77,7 @@ struct bpos {
 #else
 #error edit for your odd byteorder.
 #endif
-} __attribute__((packed)) __attribute__((aligned(4)));
+} __attribute__((packedaligned(4)));
 
 #define KEY_INODE_MAX                  ((__u64)~0ULL)
 #define KEY_OFFSET_MAX                 ((__u64)~0ULL)
@@ -102,6 +102,16 @@ struct bch_val {
        __u64           __nothing[0];
 };
 
+struct bversion {
+#if defined(__LITTLE_ENDIAN)
+       __u64           low;
+       __u32           high;
+#elif defined(__BIG_ENDIAN)
+       __u32           high;
+       __u64           low;
+#endif
+} __attribute__((packed, aligned(4)));
+
 struct bkey {
        __u64           _data[0];
 
@@ -117,17 +127,17 @@ struct bkey {
 #if defined(__LITTLE_ENDIAN)
        __u8            pad[1];
 
-       __u32           version;
+       struct bversion version;
        __u32           size;           /* extent size, in sectors */
        struct bpos     p;
 #elif defined(__BIG_ENDIAN)
        struct bpos     p;
        __u32           size;           /* extent size, in sectors */
-       __u32           version;
+       struct bversion version;
 
        __u8            pad[1];
 #endif
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packedaligned(8)));
 
 struct bkey_packed {
        __u64           _data[0];
@@ -149,7 +159,7 @@ struct bkey_packed {
         * to the same size as struct bkey should hopefully be safest.
         */
        __u8            pad[sizeof(struct bkey) - 3];
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packedaligned(8)));
 
 #define BKEY_U64s                      (sizeof(struct bkey) / sizeof(__u64))
 #define KEY_PACKED_BITS_START          24
@@ -164,7 +174,8 @@ enum bch_bkey_fields {
        BKEY_FIELD_OFFSET,
        BKEY_FIELD_SNAPSHOT,
        BKEY_FIELD_SIZE,
-       BKEY_FIELD_VERSION,
+       BKEY_FIELD_VERSION_HIGH,
+       BKEY_FIELD_VERSION_LOW,
        BKEY_NR_FIELDS,
 };
 
@@ -180,7 +191,8 @@ enum bch_bkey_fields {
                bkey_format_field(OFFSET,       p.offset),              \
                bkey_format_field(SNAPSHOT,     p.snapshot),            \
                bkey_format_field(SIZE,         size),                  \
-               bkey_format_field(VERSION,      version),               \
+               bkey_format_field(VERSION_HIGH, version.high),          \
+               bkey_format_field(VERSION_LOW,  version.low),           \
        },                                                              \
 })
 
@@ -358,39 +370,46 @@ struct bch_extent_crc32 {
                                uncompressed_size:8,
                                csum_type:4,
                                compression_type:4;
+       __u32                   csum;
 #elif defined (__BIG_ENDIAN_BITFIELD)
-       __u32                   csum_type:4,
-                               compression_type:4,
+       __u32                   csum;
+       __u32                   compression_type:4,
+                               csum_type:4,
                                uncompressed_size:8,
                                compressed_size:8,
                                offset:7,
                                type:1;
 #endif
-       __u32                   csum;
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
 
 #define CRC32_EXTENT_SIZE_MAX  (1U << 7)
 
+/* 64k */
+#define BCH_COMPRESSED_EXTENT_MAX 128
+
 struct bch_extent_crc64 {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
        __u64                   type:3,
-                               compressed_size:18,
-                               uncompressed_size:18,
-                               offset:17,
+                               compressed_size:10,
+                               uncompressed_size:10,
+                               offset:10,
+                               nonce:23,
                                csum_type:4,
                                compression_type:4;
 #elif defined (__BIG_ENDIAN_BITFIELD)
-       __u64                   csum_type:4,
-                               compression_type:4,
-                               offset:17,
-                               uncompressed_size:18,
-                               compressed_size:18,
+       __u64                   compression_type:4,
+                               csum_type:4,
+                               nonce:23,
+                               offset:10,
+                               uncompressed_size:10,
+                               compressed_size:10,
                                type:3;
 #endif
        __u64                   csum;
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packedaligned(8)));
 
-#define CRC64_EXTENT_SIZE_MAX  (1U << 17)
+#define CRC64_EXTENT_SIZE_MAX  (1U << 10) /* inclusive */
+#define CRC64_NONCE_MAX                (1U << 23) /* exclusive */
 
 /*
  * @reservation - pointer hasn't been written to, just reserved
@@ -411,10 +430,17 @@ struct bch_extent_ptr {
                                erasure_coded:1,
                                type:2;
 #endif
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packedaligned(8)));
 
 union bch_extent_entry {
-       __u8                            type;
+#if defined(__LITTLE_ENDIAN__) ||  BITS_PER_LONG == 64
+       unsigned long                   type;
+#elif BITS_PER_LONG == 32
+       struct {
+               unsigned long           pad;
+               unsigned long           type;
+       };
+#endif
        struct bch_extent_crc32         crc32;
        struct bch_extent_crc64         crc64;
        struct bch_extent_ptr           ptr;
@@ -441,9 +467,29 @@ struct bch_extent {
 
        union bch_extent_entry  start[0];
        __u64                   _data[0];
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packedaligned(8)));
 BKEY_VAL_TYPE(extent,          BCH_EXTENT);
 
+/* Maximum size (in u64s) a single pointer could be: */
+#define BKEY_EXTENT_PTR_U64s_MAX\
+       ((sizeof(struct bch_extent_crc64) +                     \
+         sizeof(struct bch_extent_ptr)) / sizeof(u64))
+
+/* Maximum possible size of an entire extent value: */
+#if 0
+/* There's a hack in the keylist code that needs to be fixed.. */
+#define BKEY_EXTENT_VAL_U64s_MAX                               \
+       (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX)
+#else
+#define BKEY_EXTENT_VAL_U64s_MAX       8
+#endif
+
+/* * Maximum possible size of an entire extent, key + value: */
+#define BKEY_EXTENT_U64s_MAX   (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+
+#define BKEY_BTREE_PTR_VAL_U64s_MAX    BCH_REPLICAS_MAX
+#define BKEY_BTREE_PTR_U64s_MAX                (BKEY_U64s + BCH_REPLICAS_MAX)
+
 /* Inodes */
 
 #define BLOCKDEV_INODE_MAX     4096
@@ -453,18 +499,8 @@ BKEY_VAL_TYPE(extent,              BCH_EXTENT);
 enum bch_inode_types {
        BCH_INODE_FS            = 128,
        BCH_INODE_BLOCKDEV      = 129,
-       BCH_INODE_CACHED_DEV    = 130,
 };
 
-enum {
-       BCH_FS_PRIVATE_START            = 16,
-       __BCH_INODE_I_SIZE_DIRTY        = 16,
-};
-
-#define BCH_FL_USER_FLAGS      ((1U << BCH_FS_PRIVATE_START) - 1)
-
-#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
-
 struct bch_inode {
        struct bch_val          v;
 
@@ -478,24 +514,64 @@ struct bch_inode {
        __le64                  i_mtime;
 
        __le64                  i_size;
+       __le64                  i_sectors;
 
        __le32                  i_uid;
        __le32                  i_gid;
        __le32                  i_nlink;
 
        __le32                  i_dev;
+
+       __le64                  i_hash_seed;
 } __attribute__((packed));
 BKEY_VAL_TYPE(inode,           BCH_INODE_FS);
 
+enum {
+       /*
+        * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
+        * flags)
+        */
+       __BCH_INODE_SYNC        = 0,
+       __BCH_INODE_IMMUTABLE   = 1,
+       __BCH_INODE_APPEND      = 2,
+       __BCH_INODE_NODUMP      = 3,
+       __BCH_INODE_NOATIME     = 4,
+
+       __BCH_INODE_I_SIZE_DIRTY= 5,
+       __BCH_INODE_I_SECTORS_DIRTY= 6,
+
+       /* not implemented yet: */
+       __BCH_INODE_HAS_XATTRS  = 7, /* has xattrs in xattr btree */
+};
+
+LE32_BITMASK(INODE_STR_HASH_TYPE, struct bch_inode, i_flags, 28, 32);
+
+#define BCH_INODE_SYNC         (1 << __BCH_INODE_SYNC)
+#define BCH_INODE_IMMUTABLE    (1 << __BCH_INODE_IMMUTABLE)
+#define BCH_INODE_APPEND       (1 << __BCH_INODE_APPEND)
+#define BCH_INODE_NODUMP       (1 << __BCH_INODE_NODUMP)
+#define BCH_INODE_NOATIME      (1 << __BCH_INODE_NOATIME)
+#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
+#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
+#define BCH_INODE_HAS_XATTRS   (1 << __BCH_INODE_HAS_XATTRS)
+
 struct bch_inode_blockdev {
        struct bch_val          v;
-       struct bch_inode        i_inode;
+
+       __le64                  i_size;
+       __le64                  i_flags;
+
+       /* Seconds: */
+       __le64                  i_ctime;
+       __le64                  i_mtime;
 
        uuid_le                 i_uuid;
        __u8                    i_label[32];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
 BKEY_VAL_TYPE(inode_blockdev,  BCH_INODE_BLOCKDEV);
 
+/* Thin provisioned volume, or cache for another block device? */
+LE64_BITMASK(CACHED_DEV,       struct bch_inode_blockdev, i_flags, 0,  1)
 /* Dirents */
 
 /*
@@ -644,7 +720,9 @@ struct cache_sb {
         * to change:
         */
        uuid_le                 user_uuid;
-       __le64                  pad1[6];
+
+       __le64                  flags2;
+       __le64                  encryption_key[5];
 
        /* Number of cache_member entries: */
        __u8                    nr_in_set;
@@ -671,9 +749,11 @@ struct cache_sb {
        };
 };
 
-LE64_BITMASK(CACHE_SYNC,               struct cache_sb, flags, 0, 1);
+/* XXX: rename CACHE_SET -> BCH_FS or something? */
+
+LE64_BITMASK(CACHE_SET_SYNC,           struct cache_sb, flags, 0, 1);
 
-LE64_BITMASK(CACHE_ERROR_ACTION,       struct cache_sb, flags, 1, 4);
+LE64_BITMASK(CACHE_SET_ERROR_ACTION,   struct cache_sb, flags, 1, 4);
 #define BCH_ON_ERROR_CONTINUE          0U
 #define BCH_ON_ERROR_RO                        1U
 #define BCH_ON_ERROR_PANIC             2U
@@ -686,35 +766,144 @@ LE64_BITMASK(CACHE_SET_DATA_REPLICAS_WANT,struct cache_sb, flags, 8, 12);
 
 LE64_BITMASK(CACHE_SB_CSUM_TYPE,       struct cache_sb, flags, 12, 16);
 
-LE64_BITMASK(CACHE_META_PREFERRED_CSUM_TYPE,struct cache_sb, flags, 16, 20);
+LE64_BITMASK(CACHE_SET_META_CSUM_TYPE,struct cache_sb, flags, 16, 20);
 #define BCH_CSUM_NONE                  0U
 #define BCH_CSUM_CRC32C                        1U
 #define BCH_CSUM_CRC64                 2U
-#define BCH_CSUM_NR                    3U
+#define BCH_CSUM_CHACHA20_POLY1305     3U
+#define BCH_CSUM_NR                    4U
+
+static inline _Bool bch_csum_type_is_encryption(unsigned type)
+{
+       switch (type) {
+       case BCH_CSUM_CHACHA20_POLY1305:
+               return 1;
+       default:
+               return 0;
+       }
+}
 
-LE64_BITMASK(CACHE_BTREE_NODE_SIZE,    struct cache_sb, flags, 20, 36);
+LE64_BITMASK(CACHE_SET_BTREE_NODE_SIZE,        struct cache_sb, flags, 20, 36);
 
 LE64_BITMASK(CACHE_SET_META_REPLICAS_HAVE,struct cache_sb, flags, 36, 40);
 LE64_BITMASK(CACHE_SET_DATA_REPLICAS_HAVE,struct cache_sb, flags, 40, 44);
 
-LE64_BITMASK(CACHE_SET_DIRENT_CSUM_TYPE,struct cache_sb, flags, 44, 48);
-enum {
-       BCH_DIRENT_CSUM_CRC32C          = 0,
-       BCH_DIRENT_CSUM_CRC64           = 1,
-       BCH_DIRENT_CSUM_SIPHASH         = 2,
-       BCH_DIRENT_CSUM_SHA1            = 3,
+LE64_BITMASK(CACHE_SET_STR_HASH_TYPE,struct cache_sb, flags, 44, 48);
+enum bch_str_hash_type {
+       BCH_STR_HASH_CRC32C             = 0,
+       BCH_STR_HASH_CRC64              = 1,
+       BCH_STR_HASH_SIPHASH            = 2,
+       BCH_STR_HASH_SHA1               = 3,
 };
 
-LE64_BITMASK(CACHE_DATA_PREFERRED_CSUM_TYPE, struct cache_sb, flags, 48, 52);
+#define BCH_STR_HASH_NR                        4
+
+LE64_BITMASK(CACHE_SET_DATA_CSUM_TYPE, struct cache_sb, flags, 48, 52);
 
-LE64_BITMASK(CACHE_COMPRESSION_TYPE,   struct cache_sb, flags, 52, 56);
+LE64_BITMASK(CACHE_SET_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56);
 enum {
        BCH_COMPRESSION_NONE            = 0,
-       BCH_COMPRESSION_LZO1X           = 1,
+       BCH_COMPRESSION_LZ4             = 1,
        BCH_COMPRESSION_GZIP            = 2,
-       BCH_COMPRESSION_XZ              = 3,
 };
 
+#define BCH_COMPRESSION_NR             3U
+
+/* Limit inode numbers to 32 bits: */
+LE64_BITMASK(CACHE_INODE_32BIT,                struct cache_sb, flags, 56, 57);
+
+LE64_BITMASK(CACHE_SET_GC_RESERVE,     struct cache_sb, flags, 57, 63);
+
+LE64_BITMASK(CACHE_SET_ROOT_RESERVE,   struct cache_sb, flags2, 0,  6);
+
+/*
+ * If nonzero, encryption is enabled; overrides DATA/META_CSUM_TYPE. Also
+ * indicates encryption algorithm in use, if/when we get more than one:
+ *
+ */
+LE64_BITMASK(CACHE_SET_ENCRYPTION_TYPE,        struct cache_sb, flags2, 6,  10);
+
+/*
+ * If nonzero, we have an encryption key in the superblock, which is the key
+ * used to encrypt all other data/metadata. The key will normally be encrypted
+ * with the key userspace provides, but if encryption has been turned off we'll
+ * just store the master key unencrypted in the superblock so we can access the
+ * previously encrypted data.
+ */
+LE64_BITMASK(CACHE_SET_ENCRYPTION_KEY, struct cache_sb, flags2, 10, 11);
+
+/* options: */
+
+/**
+ * CACHE_SET_OPT(name, choices, min, max, sb_option, sysfs_writeable)
+ *
+ * @name - name of mount option, sysfs attribute, and struct cache_set_opts
+ *     member
+ *
+ * @choices - array of strings that the user can select from - option is by
+ *     array index
+ *
+ *     Booleans are special cased; if @choices is bch_bool_opt the mount
+ *     options name and noname will work as expected.
+ *
+ * @min, @max
+ *
+ * @sb_option - name of corresponding superblock option
+ *
+ * @sysfs_writeable - if true, option will be modifiable at runtime via sysfs
+ */
+
+#define CACHE_SET_SB_OPTS()                                    \
+       CACHE_SET_OPT(errors,                                   \
+                     bch_error_actions,                        \
+                     0, BCH_NR_ERROR_ACTIONS,                  \
+                     CACHE_SET_ERROR_ACTION,                   \
+                     true)                                     \
+       CACHE_SET_OPT(metadata_replicas,                        \
+                     bch_uint_opt,                             \
+                     0, BCH_REPLICAS_MAX,                      \
+                     CACHE_SET_META_REPLICAS_WANT,             \
+                     false)                                    \
+       CACHE_SET_OPT(data_replicas,                            \
+                     bch_uint_opt,                             \
+                     0, BCH_REPLICAS_MAX,                      \
+                     CACHE_SET_DATA_REPLICAS_WANT,             \
+                     false)                                    \
+       CACHE_SET_OPT(metadata_checksum,                        \
+                     bch_csum_types,                           \
+                     0, BCH_CSUM_NR,                           \
+                     CACHE_SET_META_CSUM_TYPE,                 \
+                     true)                                     \
+       CACHE_SET_OPT(data_checksum,                            \
+                     bch_csum_types,                           \
+                     0, BCH_CSUM_NR,                           \
+                     CACHE_SET_DATA_CSUM_TYPE,                 \
+                     true)                                     \
+       CACHE_SET_OPT(compression,                              \
+                     bch_compression_types,                    \
+                     0, BCH_COMPRESSION_NR,                    \
+                     CACHE_SET_COMPRESSION_TYPE,               \
+                     true)                                     \
+       CACHE_SET_OPT(str_hash,                                 \
+                     bch_str_hash_types,                       \
+                     0, BCH_STR_HASH_NR,                       \
+                     CACHE_SET_STR_HASH_TYPE,                  \
+                     true)                                     \
+       CACHE_SET_OPT(inodes_32bit,                             \
+                     bch_bool_opt, 0, 2,                       \
+                     CACHE_INODE_32BIT,                        \
+                     true)                                     \
+       CACHE_SET_OPT(gc_reserve_percent,                       \
+                     bch_uint_opt,                             \
+                     5, 21,                                    \
+                     CACHE_SET_GC_RESERVE,                     \
+                     false)                                    \
+       CACHE_SET_OPT(root_reserve_percent,                     \
+                     bch_uint_opt,                             \
+                     0, 21,                                    \
+                     CACHE_SET_ROOT_RESERVE,                   \
+                     false)
+
 /* backing device specific stuff: */
 
 struct backingdev_sb {
@@ -828,18 +1017,13 @@ static inline __u64 bset_magic(struct cache_sb *sb)
        return __le64_to_cpu(sb->set_magic) ^ BSET_MAGIC;
 }
 
-/*
- * Journal
- *
- * On disk format for a journal entry:
- * seq is monotonically increasing; every journal entry has its own unique
- * sequence number.
- *
- * last_seq is the oldest journal entry that still has keys the btree hasn't
- * flushed to disk yet.
- *
- * version is for on disk format changes.
- */
+/* 128 bits, sufficient for cryptographic MACs: */
+struct bch_csum {
+       __le64                  lo;
+       __le64                  hi;
+};
+
+/* Journal */
 
 #define BCACHE_JSET_VERSION_UUIDv1     1
 #define BCACHE_JSET_VERSION_UUID       1       /* Always latest UUID format */
@@ -860,12 +1044,11 @@ struct jset_entry {
 
 #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
 
-
-LE32_BITMASK(JKEYS_TYPE,       struct jset_entry, flags, 0, 8);
+LE32_BITMASK(JOURNAL_ENTRY_TYPE,       struct jset_entry, flags, 0, 8);
 enum {
-       JKEYS_BTREE_KEYS                = 0,
-       JKEYS_BTREE_ROOT                = 1,
-       JKEYS_PRIO_PTRS                 = 2,
+       JOURNAL_ENTRY_BTREE_KEYS        = 0,
+       JOURNAL_ENTRY_BTREE_ROOT        = 1,
+       JOURNAL_ENTRY_PRIO_PTRS         = 2,
 
        /*
         * Journal sequence numbers can be blacklisted: bsets record the max
@@ -877,11 +1060,22 @@ enum {
         * and then record that we skipped it so that the next time we crash and
         * recover we don't think there was a missing journal entry.
         */
-       JKEYS_JOURNAL_SEQ_BLACKLISTED   = 3,
+       JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED = 3,
 };
 
+/*
+ * On disk format for a journal entry:
+ * seq is monotonically increasing; every journal entry has its own unique
+ * sequence number.
+ *
+ * last_seq is the oldest journal entry that still has keys the btree hasn't
+ * flushed to disk yet.
+ *
+ * version is for on disk format changes.
+ */
 struct jset {
-       __le64                  csum;
+       struct bch_csum         csum;
+
        __le64                  magic;
        __le32                  version;
        __le32                  flags;
@@ -901,11 +1095,15 @@ struct jset {
 };
 
 LE32_BITMASK(JSET_CSUM_TYPE,   struct jset, flags, 0, 4);
+LE32_BITMASK(JSET_BIG_ENDIAN,  struct jset, flags, 4, 5);
+
+#define BCH_JOURNAL_BUCKETS_MIN                20
 
 /* Bucket prios/gens */
 
 struct prio_set {
-       __le64                  csum;
+       struct bch_csum         csum;
+
        __le64                  magic;
        __le32                  version;
        __le32                  flags;
@@ -985,7 +1183,7 @@ LE32_BITMASK(BSET_BTREE_LEVEL,     struct bset, flags, 4, 8);
 LE32_BITMASK(BSET_BIG_ENDIAN,  struct bset, flags, 8, 9);
 
 struct btree_node {
-       __le64                  csum;
+       struct bch_csum         csum;
        __le64                  magic;
 
        /* Closed interval: */
@@ -997,10 +1195,22 @@ struct btree_node {
 } __attribute__((packed));
 
 struct btree_node_entry {
-       __le64                  csum;
+       struct bch_csum         csum;
+
        struct bset             keys;
 } __attribute__((packed));
 
+/* Crypto: */
+
+struct nonce {
+       __le32 d[4];
+};
+
+#define BCACHE_MASTER_KEY_HEADER       "bch**key"
+#define BCACHE_MASTER_KEY_NONCE                ((struct nonce)                 \
+       {{ __cpu_to_le32(1), __cpu_to_le32(2),                          \
+          __cpu_to_le32(3), __cpu_to_le32(4) }})
+
 /* OBSOLETE */
 
 #define BITMASK(name, type, field, offset, end)                                \
index 511db7ba11290540ea5a998ef17ac66fc246c7a2..bf975ae89bbcc5e04de9b762d42c6a58ecdd0b9f 100644 (file)
--- a/bcache.c
+++ b/bcache.c
@@ -31,6 +31,7 @@
 #include "bcache-format.h"
 #include "bcache-fs.h"
 #include "bcache-run.h"
+#include "bcache-key.h"
 
 #define PACKAGE_NAME "bcache"
 #define PACKAGE_VERSION "1.0"
@@ -130,6 +131,11 @@ static NihCommand commands[] = {
        CMD(device_remove, N_("<volume> <devices>"),
            "Removes a device from its volume"),
 
+       /* Crypto */
+
+       CMD(unlock, N_("<device>"),
+           "Unlock an encrypted filesystem"),
+
 #if 0
        CMD(modify, N_("<options>"),
            "Modifies attributes related to the volume",
diff --git a/crypto.c b/crypto.c
new file mode 100644 (file)
index 0000000..3f68835
--- /dev/null
+++ b/crypto.c
@@ -0,0 +1,130 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/random.h>
+#include <libscrypt.h>
+#include <sodium/crypto_stream_chacha20.h>
+
+#include "crypto.h"
+
+char *read_passphrase(const char *prompt)
+{
+       struct termios old, new;
+       char *buf = NULL;
+       size_t buflen = 0;
+       ssize_t ret;
+
+       fprintf(stderr, "%s", prompt);
+       fflush(stderr);
+
+       if (tcgetattr(fileno(stdin), &old))
+               die("error getting terminal attrs");
+
+       new = old;
+       new.c_lflag &= ~ECHO;
+       if (tcsetattr(fileno(stdin), TCSAFLUSH, &new))
+               die("error setting terminal attrs");
+
+       ret = getline(&buf, &buflen, stdin);
+       if (ret <= 0)
+               die("error reading passphrase");
+
+       tcsetattr(fileno(stdin), TCSAFLUSH, &old);
+       fprintf(stderr, "\n");
+       return buf;
+}
+
+void derive_passphrase(struct bcache_key *key, const char *passphrase)
+{
+       const unsigned char salt[] = "bcache";
+       int ret;
+
+       ret = libscrypt_scrypt((void *) passphrase, strlen(passphrase),
+                              salt, sizeof(salt),
+                              SCRYPT_N, SCRYPT_r, SCRYPT_p,
+                              (void *) key, sizeof(*key));
+       if (ret)
+               die("scrypt error: %i", ret);
+}
+
+void disk_key_encrypt(struct bcache_disk_key *disk_key,
+                     struct bcache_key *key)
+{
+       int ret;
+
+       ret = crypto_stream_chacha20_xor((void *) disk_key,
+                                        (void *) disk_key, sizeof(*disk_key),
+                                        (void *) &bch_master_key_nonce,
+                                        (void *) key);
+       if (ret)
+               die("chacha20 error: %i", ret);
+}
+
+void disk_key_init(struct bcache_disk_key *disk_key)
+{
+       ssize_t ret;
+
+       memcpy(&disk_key->header, bch_key_header, sizeof(bch_key_header));
+#if 0
+       ret = getrandom(disk_key->key, sizeof(disk_key->key), GRND_RANDOM);
+       if (ret != sizeof(disk_key->key))
+               die("error getting random bytes for key");
+#else
+       int fd = open("/dev/random", O_RDONLY|O_NONBLOCK);
+       if (fd < 0)
+               die("error opening /dev/random");
+
+       size_t n = 0;
+       struct timespec start;
+       bool printed = false;
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       while (n < sizeof(disk_key->key)) {
+               struct timeval timeout = { 1, 0 };
+               fd_set set;
+
+               FD_ZERO(&set);
+               FD_SET(fd, &set);
+
+               if (select(fd + 1, &set, NULL, NULL, &timeout) < 0)
+                       die("select error");
+
+               ret = read(fd,
+                          (void *) disk_key->key + n,
+                          sizeof(disk_key->key) - n);
+               if (ret == -1 && errno != EINTR && errno != EAGAIN)
+                       die("error reading from /dev/random");
+               if (ret > 0)
+                       n += ret;
+
+               struct timespec now;
+               clock_gettime(CLOCK_MONOTONIC, &now);
+
+               now.tv_sec      -= start.tv_sec;
+               now.tv_nsec     -= start.tv_nsec;
+
+               while (now.tv_nsec < 0) {
+                       long nsec_per_sec = 1000 * 1000 * 1000;
+                       long sec = now.tv_nsec / nsec_per_sec - 1;
+                       now.tv_nsec     -= sec * nsec_per_sec;
+                       now.tv_sec      += sec;
+               }
+
+               if (!printed && now.tv_sec >= 3) {
+                       printf("Reading from /dev/random is taking a long time...\n)");
+                       printed = true;
+               }
+       }
+       close(fd);
+#endif
+}
diff --git a/crypto.h b/crypto.h
new file mode 100644 (file)
index 0000000..335d63d
--- /dev/null
+++ b/crypto.h
@@ -0,0 +1,23 @@
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include "util.h"
+
+struct bcache_key {
+       u64     key[4];
+};
+
+struct bcache_disk_key {
+       u64     header;
+       u64     key[4];
+};
+
+static const char bch_key_header[8]            = BCACHE_MASTER_KEY_HEADER;
+static const struct nonce bch_master_key_nonce = BCACHE_MASTER_KEY_NONCE;
+
+char *read_passphrase(const char *);
+void derive_passphrase(struct bcache_key *, const char *);
+void disk_key_encrypt(struct bcache_disk_key *, struct bcache_key *);
+void disk_key_init(struct bcache_disk_key *);
+
+#endif /* _CRYPTO_H */
diff --git a/libbcache.c b/libbcache.c
new file mode 100644 (file)
index 0000000..d57f26a
--- /dev/null
@@ -0,0 +1,206 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "ccan/ilog/ilog.h"
+
+#include "bcache-ondisk.h"
+#include "libbcache.h"
+#include "crypto.h"
+
+void __do_write_sb(int fd, void *sb, size_t bytes)
+{
+       char zeroes[SB_SECTOR << 9] = {0};
+
+       /* Zero start of disk */
+       if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) {
+               perror("write error trying to zero start of disk\n");
+               exit(EXIT_FAILURE);
+       }
+       /* Write superblock */
+       if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) {
+               perror("write error trying to write superblock\n");
+               exit(EXIT_FAILURE);
+       }
+
+       fsync(fd);
+       close(fd);
+}
+
+#define do_write_sb(_fd, _sb)                  \
+       __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
+
+void bcache_format(struct dev_opts *devs, size_t nr_devs,
+                  unsigned block_size,
+                  unsigned btree_node_size,
+                  unsigned meta_csum_type,
+                  unsigned data_csum_type,
+                  unsigned compression_type,
+                  const char *passphrase,
+                  unsigned meta_replicas,
+                  unsigned data_replicas,
+                  unsigned on_error_action,
+                  char *label,
+                  uuid_le uuid)
+{
+       struct cache_sb *sb;
+       struct dev_opts *i;
+
+       /* calculate block size: */
+       if (!block_size)
+               for (i = devs; i < devs + nr_devs; i++)
+                       block_size = max(block_size,
+                                        get_blocksize(i->dev, i->fd));
+
+       /* calculate bucket sizes: */
+       for (i = devs; i < devs + nr_devs; i++) {
+               if (!i->size)
+                       i->size = get_size(i->dev, i->fd);
+
+               if (!i->bucket_size) {
+                       u64 bytes = i->size << 9;
+
+                       if (bytes < 1 << 20) /* 1M device - 256 4k buckets*/
+                               i->bucket_size = rounddown_pow_of_two(bytes >> 17);
+                       else
+                               /* Max 1M bucket at around 256G */
+                               i->bucket_size = 8 << min((ilog2(bytes >> 20) / 2), 9U);
+               }
+
+               if (i->bucket_size < block_size)
+                       die("Bucket size cannot be smaller than block size");
+
+               i->nbuckets     = i->size / i->bucket_size;
+               i->first_bucket = (23 / i->bucket_size) + 3;
+
+               if (i->nbuckets < 1 << 7)
+                       die("Not enough buckets: %llu, need %u",
+                           i->nbuckets, 1 << 7);
+       }
+
+       /* calculate btree node size: */
+       if (!btree_node_size) {
+               /* 256k default btree node size */
+               btree_node_size = 512;
+
+               for (i = devs; i < devs + nr_devs; i++)
+                       btree_node_size = min(btree_node_size, i->bucket_size);
+       }
+
+       sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) * nr_devs);
+
+       sb->offset      = __cpu_to_le64(SB_SECTOR);
+       sb->version     = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3);
+       sb->magic       = BCACHE_MAGIC;
+       sb->block_size  = __cpu_to_le16(block_size);
+       sb->user_uuid   = uuid;
+       sb->nr_in_set   = nr_devs;
+
+       uuid_generate(sb->set_uuid.b);
+
+       if (label)
+               strncpy((char *) sb->label, label, sizeof(sb->label));
+
+       /*
+        * don't have a userspace crc32c implementation handy, just always use
+        * crc64
+        */
+       SET_CACHE_SB_CSUM_TYPE(sb,              BCH_CSUM_CRC64);
+       SET_CACHE_SET_META_CSUM_TYPE(sb,        meta_csum_type);
+       SET_CACHE_SET_DATA_CSUM_TYPE(sb,        data_csum_type);
+       SET_CACHE_SET_COMPRESSION_TYPE(sb,      compression_type);
+
+       SET_CACHE_SET_BTREE_NODE_SIZE(sb,       btree_node_size);
+       SET_CACHE_SET_META_REPLICAS_WANT(sb,    meta_replicas);
+       SET_CACHE_SET_META_REPLICAS_HAVE(sb,    meta_replicas);
+       SET_CACHE_SET_DATA_REPLICAS_WANT(sb,    data_replicas);
+       SET_CACHE_SET_DATA_REPLICAS_HAVE(sb,    data_replicas);
+       SET_CACHE_SET_ERROR_ACTION(sb,          on_error_action);
+
+       if (passphrase) {
+               struct bcache_key key;
+               struct bcache_disk_key disk_key;
+
+               derive_passphrase(&key, passphrase);
+               disk_key_init(&disk_key);
+               disk_key_encrypt(&disk_key, &key);
+
+               memcpy(sb->encryption_key, &disk_key, sizeof(disk_key));
+               SET_CACHE_SET_ENCRYPTION_TYPE(sb, 1);
+               SET_CACHE_SET_ENCRYPTION_KEY(sb, 1);
+
+               memzero_explicit(&disk_key, sizeof(disk_key));
+               memzero_explicit(&key, sizeof(key));
+       }
+
+       for (i = devs; i < devs + nr_devs; i++) {
+               struct cache_member *m = sb->members + (i - devs);
+
+               uuid_generate(m->uuid.b);
+               m->nbuckets     = __cpu_to_le64(i->nbuckets);
+               m->first_bucket = __cpu_to_le16(i->first_bucket);
+               m->bucket_size  = __cpu_to_le16(i->bucket_size);
+
+               SET_CACHE_TIER(m,               i->tier);
+               SET_CACHE_REPLACEMENT(m,        i->replacement_policy);
+               SET_CACHE_DISCARD(m,            i->discard);
+       }
+
+       sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb));
+
+       for (i = devs; i < devs + nr_devs; i++) {
+               struct cache_member *m = sb->members + (i - devs);
+               char uuid_str[40], set_uuid_str[40];
+
+               sb->disk_uuid   = m->uuid;
+               sb->nr_this_dev = i - devs;
+               sb->csum        = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s),
+                                                          CACHE_SB_CSUM_TYPE(sb)));
+
+               uuid_unparse(sb->disk_uuid.b, uuid_str);
+               uuid_unparse(sb->user_uuid.b, set_uuid_str);
+               printf("UUID:                   %s\n"
+                      "Set UUID:               %s\n"
+                      "version:                %u\n"
+                      "nbuckets:               %llu\n"
+                      "block_size:             %u\n"
+                      "bucket_size:            %u\n"
+                      "nr_in_set:              %u\n"
+                      "nr_this_dev:            %u\n"
+                      "first_bucket:           %u\n",
+                      uuid_str, set_uuid_str,
+                      (unsigned) sb->version,
+                      __le64_to_cpu(m->nbuckets),
+                      __le16_to_cpu(sb->block_size),
+                      __le16_to_cpu(m->bucket_size),
+                      sb->nr_in_set,
+                      sb->nr_this_dev,
+                      __le16_to_cpu(m->first_bucket));
+
+               do_write_sb(i->fd, sb);
+       }
+
+       free(sb);
+}
+
+void bcache_super_read(const char *path, struct cache_sb *sb)
+{
+       int fd = open(path, O_RDONLY);
+       if (fd < 0)
+               die("couldn't open %s", path);
+
+       if (pread(fd, sb, sizeof(*sb), SB_SECTOR << 9) != sizeof(*sb))
+               die("error reading superblock");
+
+       if (memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic)))
+               die("not a bcache superblock");
+}
diff --git a/libbcache.h b/libbcache.h
new file mode 100644 (file)
index 0000000..c8d668e
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef _LIBBCACHE_H
+#define _LIBBCACHE_H
+
+#include "util.h"
+#include "stdbool.h"
+
+struct dev_opts {
+       int             fd;
+       const char      *dev;
+       u64             size; /* 512 byte sectors */
+       unsigned        bucket_size;
+       unsigned        tier;
+       unsigned        replacement_policy;
+       bool            discard;
+
+       u64             first_bucket;
+       u64             nbuckets;
+};
+
+void bcache_format(struct dev_opts *devs, size_t nr_devs,
+                  unsigned block_size,
+                  unsigned btree_node_size,
+                  unsigned meta_csum_type,
+                  unsigned data_csum_type,
+                  unsigned compression_type,
+                  const char *passphrase,
+                  unsigned meta_replicas,
+                  unsigned data_replicas,
+                  unsigned on_error_action,
+                  char *label,
+                  uuid_le uuid);
+
+void bcache_super_read(const char *, struct cache_sb *);
+
+#endif /* _LIBBCACHE_H */
diff --git a/util.c b/util.c
index 86b11208618fb8b68c10b752c3b4c4850e4b4e06..8369d6beb16ab23e77cb853ed25129ccaaf965a4 100644 (file)
--- a/util.c
+++ b/util.c
@@ -487,3 +487,9 @@ struct bcache_handle bcache_fs_open(const char *path)
 
        return ret;
 }
+
+void memzero_explicit(void *buf, size_t len)
+{
+    void *(* volatile memset_s)(void *s, int c, size_t n) = memset;
+    memset_s(buf, 0, len);
+}
diff --git a/util.h b/util.h
index 2af277a1415c4d3434fcf03b60a9215ec6b64482..101b5c8657a10954c6a1aeec75cd2699013c84f1 100644 (file)
--- a/util.h
+++ b/util.h
@@ -74,7 +74,7 @@ u64 bch_checksum(unsigned, const void *, size_t);
 
 #define __csum_set(i, u64s, type)                                      \
 ({                                                                     \
-       const void *start = ((const void *) (i)) + sizeof(u64);         \
+       const void *start = ((const void *) (i)) + sizeof(i->csum);     \
        const void *end = __bkey_idx(i, u64s);                          \
                                                                        \
        bch_checksum(type, start, end - start);                         \
@@ -93,4 +93,6 @@ struct bcache_handle {
 
 struct bcache_handle bcache_fs_open(const char *);
 
+void memzero_explicit(void *, size_t);
+
 #endif /* _UTIL_H */