bcache.o: CFLAGS += `pkg-config --cflags libnih`
bcache-objs = bcache.o bcache-assemble.o bcache-device.o bcache-format.o\
- bcache-fs.o bcache-run.o
+ bcache-fs.o bcache-run.o bcache-key.o libbcache.o crypto.o
-bcache: LDLIBS += `pkg-config --libs uuid blkid libnih`
+bcache: LDLIBS += `pkg-config --libs uuid blkid libnih` -lscrypt -lsodium -lkeyutils
bcache: $(bcache-objs) util.o libccan.a
bcache-test: LDLIBS += `pkg-config --libs openssl`
#include <nih/command.h>
#include <nih/option.h>
-#include "ccan/ilog/ilog.h"
#include "ccan/darray/darray.h"
#include "bcache.h"
+#include "libbcache.h"
#include "bcache-format.h"
-
-struct cache_opts {
- int fd;
- const char *dev;
- unsigned bucket_size;
- unsigned tier;
- unsigned replacement_policy;
- unsigned replication_set;
- u64 size; /* 512 byte sectors */
-
- u64 first_bucket;
- u64 nbuckets;
-};
-
-struct backingdev_opts {
- int fd;
- const char *dev;
- const char *label;
-};
-
-static darray(struct cache_opts) cache_devices;
-static darray(struct backingdev_opts) backing_devices;
-
-static char *label = NULL;
+#include "crypto.h"
/* All in units of 512 byte sectors */
-static unsigned block_size, bucket_size, btree_node_size;
-static u64 filesystem_size;
-static unsigned tier, replacement_policy;
-static uuid_le set_uuid, user_uuid;
+static darray(struct dev_opts) cache_devices;
+
+static unsigned block_size, btree_node_size;
static unsigned meta_csum_type = BCH_CSUM_CRC32C;
static unsigned data_csum_type = BCH_CSUM_CRC32C;
static unsigned compression_type = BCH_COMPRESSION_NONE;
-
-static unsigned replication_set, meta_replicas = 1, data_replicas = 1;
+static int encrypted;
+static unsigned meta_replicas = 1, data_replicas = 1;
static unsigned on_error_action;
-static int discard;
-static unsigned version = 1;
+static char *label = NULL;
+static uuid_le uuid;
-static u64 data_offset = BDEV_DATA_START_DEFAULT;
-static unsigned cache_mode = CACHE_MODE_WRITEBACK;
+/* Device specific options: */
+static u64 filesystem_size;
+static unsigned bucket_size;
+static unsigned tier;
+static unsigned replacement_policy;
+static int discard;
static int set_cache(NihOption *option, const char *arg)
{
- darray_append(cache_devices, (struct cache_opts) {
+ darray_append(cache_devices, (struct dev_opts) {
.fd = dev_open(arg),
.dev = strdup(arg),
+ .size = filesystem_size,
.bucket_size = bucket_size,
.tier = tier,
.replacement_policy = replacement_policy,
- .replication_set = replication_set,
- .size = filesystem_size,
- });
- return 0;
-}
-
-static int set_bdev(NihOption *option, const char *arg)
-{
- darray_append(backing_devices, (struct backingdev_opts) {
- .fd = dev_open(arg),
- .dev = strdup(arg),
- .label = label ? strdup(label) : NULL,
+ .discard = discard,
});
return 0;
}
-static int set_cache_set_uuid(NihOption *option, const char *arg)
+static int set_uuid(NihOption *option, const char *arg)
{
- if (uuid_parse(arg, user_uuid.b))
+ if (uuid_parse(arg, uuid.b))
die("Bad uuid");
return 0;
}
return 0;
}
-static int set_replication_set(NihOption *option, const char *arg)
-{
- replication_set = strtoul_or_die(arg, CACHE_REPLICATION_SET_MAX,
- "replication set");
- return 0;
-}
-
static int set_meta_replicas(NihOption *option, const char *arg)
{
meta_replicas = strtoul_or_die(arg, CACHE_SET_META_REPLICAS_WANT_MAX,
return 0;
}
-static int set_cache_mode(NihOption *option, const char *arg)
-{
- cache_mode = read_string_list_or_die(arg, bdev_cache_mode,
- "cache mode");
- return 0;
-}
-
-static int set_version(NihOption *option, const char *arg)
-{
- version = strtoul_or_die(arg, 2, "version");
- return 0;
-}
-
NihOption opts_format[] = {
// { int shortoption, char *longoption, char *help, NihOptionGroup, char *argname, void *value, NihOptionSetter}
{ 'C', "cache", N_("Format a cache device"),
NULL, "dev", NULL, set_cache },
- { 'B', "bdev", N_("Format a backing device"),
- NULL, "dev", NULL, set_bdev },
- { 'l', "label", N_("label"),
- NULL, "label", &label, NULL},
- { 0, "cset_uuid", N_("UUID for the cache set"),
- NULL, "uuid", NULL, set_cache_set_uuid },
-
- { 'w', "block", N_("block size (hard sector size of SSD, often 2k"),
+ { 'w', "block", N_("block size"),
NULL, "size", NULL, set_block_size },
- { 'b', "bucket", N_("bucket size"),
- NULL, "size", NULL, set_bucket_sizes },
{ 'n', "btree_node", N_("Btree node size, default 256k"),
NULL, "size", NULL, set_btree_node_size },
- { 0, "fs_size", N_("Size of filesystem on device" ),
- NULL, "size", NULL, set_filesystem_size },
-
- { 'p', "cache_replacement_policy", NULL,
- NULL, "(lru|fifo|random)", NULL, set_replacement_policy },
{ 0, "metadata_csum_type", N_("Checksum type"),
NULL, "(none|crc32c|crc64)", &meta_csum_type, set_csum_type },
-
{ 0, "data_csum_type", N_("Checksum type"),
NULL, "(none|crc32c|crc64)", &data_csum_type, set_csum_type },
-
{ 0, "compression_type", N_("Compression type"),
NULL, "(none|gzip)", NULL, set_compression_type },
-
- { 0, "error_action", N_("Action to take on filesystem error"),
- NULL, "(continue|readonly|panic)", NULL, set_on_error_action },
-
- { 0, "discard", N_("Enable discards"),
- NULL, NULL, &discard, NULL },
-
- { 't', "tier", N_("tier of subsequent devices"),
- NULL, "#", NULL, set_tier },
-
- { 0, "replication_set", N_("replication set of subsequent devices"),
- NULL, "#", NULL, set_replication_set },
+ { 0, "encrypted", N_("enable encryption"),
+ NULL, NULL, &encrypted, NULL },
{ 0, "meta_replicas", N_("number of metadata replicas"),
NULL, "#", NULL, set_meta_replicas },
-
{ 0, "data_replicas", N_("number of data replicas"),
NULL, "#", NULL, set_data_replicas },
- { 0, "cache_mode", N_("Cache mode (for backing devices)"),
- NULL, "(writethrough|writeback|writearound", NULL, set_cache_mode },
+ { 0, "error_action", N_("Action to take on filesystem error"),
+ NULL, "(continue|readonly|panic)", NULL, set_on_error_action },
- { 'o', "data_offset", N_("data offset in sectors"),
- NULL, "offset", &data_offset, NULL},
+ { 'l', "label", N_("label"),
+ NULL, "label", &label, NULL},
+ { 0, "uuid", N_("filesystem UUID"),
+ NULL, "uuid", NULL, set_uuid },
- { 'v', "version", N_("superblock version"),
- NULL, "#", NULL, set_version},
+ /* Device specific options: */
+ { 0, "fs_size", N_("Size of filesystem on device" ),
+ NULL, "size", NULL, set_filesystem_size },
+ { 'b', "bucket", N_("bucket size"),
+ NULL, "size", NULL, set_bucket_sizes },
+ { 't', "tier", N_("tier of subsequent devices"),
+ NULL, "#", NULL, set_tier },
+ { 'p', "cache_replacement_policy", NULL,
+ NULL, "(lru|fifo|random)", NULL, set_replacement_policy },
+ { 0, "discard", N_("Enable discards"),
+ NULL, NULL, &discard, NULL },
NIH_OPTION_LAST
};
-void __do_write_sb(int fd, void *sb, size_t bytes)
-{
- char zeroes[SB_SECTOR << 9] = {0};
-
- /* Zero start of disk */
- if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) {
- perror("write error trying to zero start of disk\n");
- exit(EXIT_FAILURE);
- }
- /* Write superblock */
- if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) {
- perror("write error trying to write superblock\n");
- exit(EXIT_FAILURE);
- }
-
- fsync(fd);
- close(fd);
-}
-
-#define do_write_sb(_fd, _sb) \
- __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
-
-void write_backingdev_sb(int fd, unsigned block_size, unsigned mode,
- u64 data_offset, const char *label,
- uuid_le set_uuid)
-{
- char uuid_str[40];
- struct backingdev_sb sb;
-
- memset(&sb, 0, sizeof(struct cache_sb));
-
- sb.offset = SB_SECTOR;
- sb.version = BCACHE_SB_VERSION_BDEV;
- sb.magic = BCACHE_MAGIC;
- uuid_generate(sb.disk_uuid.b);
- sb.set_uuid = set_uuid;
- sb.block_size = block_size;
-
- uuid_unparse(sb.disk_uuid.b, uuid_str);
- if (label)
- memcpy(sb.label, label, SB_LABEL_SIZE);
-
- SET_BDEV_CACHE_MODE(&sb, mode);
-
- if (data_offset != BDEV_DATA_START_DEFAULT) {
- sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
- sb.data_offset = data_offset;
- }
-
- sb.csum = csum_set(&sb, BCH_CSUM_CRC64);
-
- printf("UUID: %s\n"
- "version: %u\n"
- "block_size: %u\n"
- "data_offset: %llu\n",
- uuid_str, (unsigned) sb.version,
- sb.block_size, data_offset);
-
- do_write_sb(fd, &sb);
-}
-
-static void format_v0(void)
-{
- struct cache_opts *i;
-
- set_uuid = user_uuid;
-
- darray_foreach(i, cache_devices)
- bucket_size = min(bucket_size, i->bucket_size);
-
- struct cache_sb_v0 *sb = calloc(1, sizeof(*sb));
-
- sb->offset = SB_SECTOR;
- sb->version = BCACHE_SB_VERSION_CDEV_WITH_UUID;
- sb->magic = BCACHE_MAGIC;
- sb->block_size = block_size;
- sb->bucket_size = bucket_size;
- sb->set_uuid = set_uuid;
- sb->nr_in_set = darray_size(cache_devices);
-
- if (label)
- memcpy(sb->label, label, sizeof(sb->label));
-
- darray_foreach(i, cache_devices) {
- char uuid_str[40], set_uuid_str[40];
-
- uuid_generate(sb->uuid.b);
- sb->nbuckets = i->nbuckets;
- sb->first_bucket = i->first_bucket;
- sb->nr_this_dev = i - cache_devices.item;
- sb->csum = csum_set(sb, BCH_CSUM_CRC64);
-
- uuid_unparse(sb->uuid.b, uuid_str);
- uuid_unparse(sb->set_uuid.b, set_uuid_str);
- printf("UUID: %s\n"
- "Set UUID: %s\n"
- "version: %u\n"
- "nbuckets: %llu\n"
- "block_size: %u\n"
- "bucket_size: %u\n"
- "nr_in_set: %u\n"
- "nr_this_dev: %u\n"
- "first_bucket: %u\n",
- uuid_str, set_uuid_str,
- (unsigned) sb->version,
- sb->nbuckets,
- sb->block_size,
- sb->bucket_size,
- sb->nr_in_set,
- sb->nr_this_dev,
- sb->first_bucket);
-
- do_write_sb(i->fd, sb);
- }
-}
-
-static void format_v1(void)
-{
- struct cache_sb *sb;
- struct cache_opts *i;
-
- sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) *
- darray_size(cache_devices));
-
- sb->offset = __cpu_to_le64(SB_SECTOR);
- sb->version = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3);
- sb->magic = BCACHE_MAGIC;
- sb->block_size = __cpu_to_le16(block_size);
- sb->set_uuid = set_uuid;
- sb->user_uuid = user_uuid;
- sb->nr_in_set = darray_size(cache_devices);
-
- if (label)
- memcpy(sb->label, label, sizeof(sb->label));
-
- /*
- * don't have a userspace crc32c implementation handy, just always use
- * crc64
- */
- SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
- SET_CACHE_META_PREFERRED_CSUM_TYPE(sb, meta_csum_type);
- SET_CACHE_DATA_PREFERRED_CSUM_TYPE(sb, data_csum_type);
- SET_CACHE_COMPRESSION_TYPE(sb, compression_type);
-
- SET_CACHE_BTREE_NODE_SIZE(sb, btree_node_size);
- SET_CACHE_SET_META_REPLICAS_WANT(sb, meta_replicas);
- SET_CACHE_SET_META_REPLICAS_HAVE(sb, meta_replicas);
- SET_CACHE_SET_DATA_REPLICAS_WANT(sb, data_replicas);
- SET_CACHE_SET_DATA_REPLICAS_HAVE(sb, data_replicas);
- SET_CACHE_ERROR_ACTION(sb, on_error_action);
-
- darray_foreach(i, cache_devices) {
- struct cache_member *m = sb->members +
- (i - cache_devices.item);
-
- uuid_generate(m->uuid.b);
- m->nbuckets = __cpu_to_le64(i->nbuckets);
- m->first_bucket = __cpu_to_le16(i->first_bucket);
- m->bucket_size = __cpu_to_le16(i->bucket_size);
-
- if (__le64_to_cpu(m->nbuckets < 1 << 7))
- die("Not enough buckets: %llu, need %u",
- __le64_to_cpu(m->nbuckets), 1 << 7);
-
- SET_CACHE_TIER(m, i->tier);
- SET_CACHE_REPLICATION_SET(m, i->replication_set);
- SET_CACHE_REPLACEMENT(m, i->replacement_policy);
- SET_CACHE_DISCARD(m, discard);
- }
-
- sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb));
-
- darray_foreach(i, cache_devices) {
- char uuid_str[40], set_uuid_str[40];
- struct cache_member *m = sb->members +
- (i - cache_devices.item);
-
- sb->disk_uuid = m->uuid;
- sb->nr_this_dev = i - cache_devices.item;
- sb->csum = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s),
- CACHE_SB_CSUM_TYPE(sb)));
-
- uuid_unparse(sb->disk_uuid.b, uuid_str);
- uuid_unparse(sb->user_uuid.b, set_uuid_str);
- printf("UUID: %s\n"
- "Set UUID: %s\n"
- "version: %u\n"
- "nbuckets: %llu\n"
- "block_size: %u\n"
- "bucket_size: %u\n"
- "nr_in_set: %u\n"
- "nr_this_dev: %u\n"
- "first_bucket: %u\n",
- uuid_str, set_uuid_str,
- (unsigned) sb->version,
- __le64_to_cpu(m->nbuckets),
- __le16_to_cpu(sb->block_size),
- __le16_to_cpu(m->bucket_size),
- sb->nr_in_set,
- sb->nr_this_dev,
- __le16_to_cpu(m->first_bucket));
-
- do_write_sb(i->fd, sb);
- }
-}
-
int cmd_format(NihCommand *command, char * const *args)
{
- struct cache_opts *i;
- struct backingdev_opts *ib;
+ char *passphrase = NULL;
- if (!darray_size(cache_devices) &&
- !darray_size(backing_devices))
+ if (!darray_size(cache_devices))
die("Please supply a device");
- if (uuid_is_null(user_uuid.b))
- uuid_generate(user_uuid.b);
-
- uuid_generate(set_uuid.b);
-
- if (!block_size) {
- darray_foreach(i, cache_devices)
- block_size = max(block_size,
- get_blocksize(i->dev, i->fd));
-
- darray_foreach(ib, backing_devices)
- block_size = max(block_size,
- get_blocksize(ib->dev, ib->fd));
- }
+ if (uuid_is_null(uuid.b))
+ uuid_generate(uuid.b);
- darray_foreach(i, cache_devices) {
- if (!i->size)
- i->size = get_size(i->dev, i->fd);
+ if (encrypted) {
+ char *pass2;
- if (!i->bucket_size) {
- u64 bytes = i->size << 9;
+ passphrase = read_passphrase("Enter passphrase: ");
+ pass2 = read_passphrase("Enter same passphrase again: ");
- if (bytes < 1 << 20) /* 1M device - 256 4k buckets*/
- i->bucket_size = rounddown_pow_of_two(bytes >> 17);
- else
- /* Max 1M bucket at around 256G */
- i->bucket_size = 8 << min((ilog2(bytes >> 20) / 2), 9U);
+ if (strcmp(passphrase, pass2)) {
+ memzero_explicit(passphrase, strlen(passphrase));
+ memzero_explicit(pass2, strlen(pass2));
+ die("Passphrases do not match");
}
- if (i->bucket_size < block_size)
- die("Bucket size cannot be smaller than block size");
-
- i->nbuckets = i->size / i->bucket_size;
- i->first_bucket = (23 / i->bucket_size) + 3;
-
- if (i->nbuckets < 1 << 7)
- die("Not enough buckets: %llu, need %u",
- i->nbuckets, 1 << 7);
+ memzero_explicit(pass2, strlen(pass2));
+ free(pass2);
}
- if (!btree_node_size) {
- /* 256k default btree node size */
- btree_node_size = 512;
-
- darray_foreach(i, cache_devices)
- btree_node_size = min(btree_node_size, i->bucket_size);
+ bcache_format(cache_devices.item, darray_size(cache_devices),
+ block_size,
+ btree_node_size,
+ meta_csum_type,
+ data_csum_type,
+ compression_type,
+ passphrase,
+ meta_replicas,
+ data_replicas,
+ on_error_action,
+ label,
+ uuid);
+
+ if (passphrase) {
+ memzero_explicit(passphrase, strlen(passphrase));
+ free(passphrase);
}
- switch (version) {
- case 0:
- format_v0();
- break;
- case 1:
- format_v1();
- break;
- }
-
- darray_foreach(ib, backing_devices)
- write_backingdev_sb(ib->fd, block_size, cache_mode,
- data_offset, ib->label,
- set_uuid);
-
return 0;
}
--- /dev/null
+#include <errno.h>
+#include <unistd.h>
+#include <keyutils.h>
+#include <uuid/uuid.h>
+#include <nih/command.h>
+#include <nih/option.h>
+
+#include "bcache.h"
+#include "libbcache.h"
+#include "crypto.h"
+
+NihOption opts_unlock[] = {
+ NIH_OPTION_LAST
+};
+
+int cmd_unlock(NihCommand *command, char * const *args)
+{
+ struct bcache_disk_key disk_key;
+ struct bcache_key key;
+ struct cache_sb sb;
+ char *passphrase;
+ char uuid[40];
+ char description[60];
+
+ if (!args[0] || args[1])
+ die("please supply a single device");
+
+ bcache_super_read(args[0], &sb);
+
+ if (!CACHE_SET_ENCRYPTION_KEY(&sb))
+ die("filesystem is not encrypted");
+
+ memcpy(&disk_key, sb.encryption_key, sizeof(disk_key));
+
+ if (!memcmp(&disk_key, bch_key_header, sizeof(bch_key_header)))
+ die("filesystem does not have encryption key");
+
+ passphrase = read_passphrase("Enter passphrase: ");
+
+ derive_passphrase(&key, passphrase);
+ disk_key_encrypt(&disk_key, &key);
+
+ if (memcmp(&disk_key, bch_key_header, sizeof(bch_key_header)))
+ die("incorrect passphrase");
+
+ uuid_unparse_lower(sb.user_uuid.b, uuid);
+ sprintf(description, "bcache:%s", uuid);
+
+ if (add_key("logon", description, &key, sizeof(key),
+ KEY_SPEC_USER_KEYRING) < 0)
+ die("add_key error: %s", strerror(errno));
+
+ memzero_explicit(&disk_key, sizeof(disk_key));
+ memzero_explicit(&key, sizeof(key));
+ memzero_explicit(passphrase, strlen(passphrase));
+ free(passphrase);
+ return 0;
+}
--- /dev/null
+#ifndef _BCACHE_KEY_H
+#define _BCACHE_KEY_H
+
+extern NihOption opts_unlock[];
+int cmd_unlock(NihCommand *, char * const *);
+
+#endif /* _BCACHE_KEY_H */
#else
#error edit for your odd byteorder.
#endif
-} __attribute__((packed)) __attribute__((aligned(4)));
+} __attribute__((packed, aligned(4)));
#define KEY_INODE_MAX ((__u64)~0ULL)
#define KEY_OFFSET_MAX ((__u64)~0ULL)
__u64 __nothing[0];
};
+struct bversion {
+#if defined(__LITTLE_ENDIAN)
+ __u64 low;
+ __u32 high;
+#elif defined(__BIG_ENDIAN)
+ __u32 high;
+ __u64 low;
+#endif
+} __attribute__((packed, aligned(4)));
+
struct bkey {
__u64 _data[0];
#if defined(__LITTLE_ENDIAN)
__u8 pad[1];
- __u32 version;
+ struct bversion version;
__u32 size; /* extent size, in sectors */
struct bpos p;
#elif defined(__BIG_ENDIAN)
struct bpos p;
__u32 size; /* extent size, in sectors */
- __u32 version;
+ struct bversion version;
__u8 pad[1];
#endif
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
struct bkey_packed {
__u64 _data[0];
* to the same size as struct bkey should hopefully be safest.
*/
__u8 pad[sizeof(struct bkey) - 3];
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
#define KEY_PACKED_BITS_START 24
BKEY_FIELD_OFFSET,
BKEY_FIELD_SNAPSHOT,
BKEY_FIELD_SIZE,
- BKEY_FIELD_VERSION,
+ BKEY_FIELD_VERSION_HIGH,
+ BKEY_FIELD_VERSION_LOW,
BKEY_NR_FIELDS,
};
bkey_format_field(OFFSET, p.offset), \
bkey_format_field(SNAPSHOT, p.snapshot), \
bkey_format_field(SIZE, size), \
- bkey_format_field(VERSION, version), \
+ bkey_format_field(VERSION_HIGH, version.high), \
+ bkey_format_field(VERSION_LOW, version.low), \
}, \
})
uncompressed_size:8,
csum_type:4,
compression_type:4;
+ __u32 csum;
#elif defined (__BIG_ENDIAN_BITFIELD)
- __u32 csum_type:4,
- compression_type:4,
+ __u32 csum;
+ __u32 compression_type:4,
+ csum_type:4,
uncompressed_size:8,
compressed_size:8,
offset:7,
type:1;
#endif
- __u32 csum;
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
#define CRC32_EXTENT_SIZE_MAX (1U << 7)
+/* 64k */
+#define BCH_COMPRESSED_EXTENT_MAX 128
+
struct bch_extent_crc64 {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:3,
- compressed_size:18,
- uncompressed_size:18,
- offset:17,
+ compressed_size:10,
+ uncompressed_size:10,
+ offset:10,
+ nonce:23,
csum_type:4,
compression_type:4;
#elif defined (__BIG_ENDIAN_BITFIELD)
- __u64 csum_type:4,
- compression_type:4,
- offset:17,
- uncompressed_size:18,
- compressed_size:18,
+ __u64 compression_type:4,
+ csum_type:4,
+ nonce:23,
+ offset:10,
+ uncompressed_size:10,
+ compressed_size:10,
type:3;
#endif
__u64 csum;
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
-#define CRC64_EXTENT_SIZE_MAX (1U << 17)
+#define CRC64_EXTENT_SIZE_MAX (1U << 10) /* inclusive */
+#define CRC64_NONCE_MAX (1U << 23) /* exclusive */
/*
* @reservation - pointer hasn't been written to, just reserved
erasure_coded:1,
type:2;
#endif
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
union bch_extent_entry {
- __u8 type;
+#if defined(__LITTLE_ENDIAN__) || BITS_PER_LONG == 64
+ unsigned long type;
+#elif BITS_PER_LONG == 32
+ struct {
+ unsigned long pad;
+ unsigned long type;
+ };
+#endif
struct bch_extent_crc32 crc32;
struct bch_extent_crc64 crc64;
struct bch_extent_ptr ptr;
union bch_extent_entry start[0];
__u64 _data[0];
-} __attribute__((packed)) __attribute__((aligned(8)));
+} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(extent, BCH_EXTENT);
+/* Maximum size (in u64s) a single pointer could be: */
+#define BKEY_EXTENT_PTR_U64s_MAX\
+ ((sizeof(struct bch_extent_crc64) + \
+ sizeof(struct bch_extent_ptr)) / sizeof(u64))
+
+/* Maximum possible size of an entire extent value: */
+#if 0
+/* There's a hack in the keylist code that needs to be fixed.. */
+#define BKEY_EXTENT_VAL_U64s_MAX \
+ (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX)
+#else
+#define BKEY_EXTENT_VAL_U64s_MAX 8
+#endif
+
+/* * Maximum possible size of an entire extent, key + value: */
+#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+
+#define BKEY_BTREE_PTR_VAL_U64s_MAX BCH_REPLICAS_MAX
+#define BKEY_BTREE_PTR_U64s_MAX (BKEY_U64s + BCH_REPLICAS_MAX)
+
/* Inodes */
#define BLOCKDEV_INODE_MAX 4096
enum bch_inode_types {
BCH_INODE_FS = 128,
BCH_INODE_BLOCKDEV = 129,
- BCH_INODE_CACHED_DEV = 130,
};
-enum {
- BCH_FS_PRIVATE_START = 16,
- __BCH_INODE_I_SIZE_DIRTY = 16,
-};
-
-#define BCH_FL_USER_FLAGS ((1U << BCH_FS_PRIVATE_START) - 1)
-
-#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
-
struct bch_inode {
struct bch_val v;
__le64 i_mtime;
__le64 i_size;
+ __le64 i_sectors;
__le32 i_uid;
__le32 i_gid;
__le32 i_nlink;
__le32 i_dev;
+
+ __le64 i_hash_seed;
} __attribute__((packed));
BKEY_VAL_TYPE(inode, BCH_INODE_FS);
+enum {
+ /*
+ * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
+ * flags)
+ */
+ __BCH_INODE_SYNC = 0,
+ __BCH_INODE_IMMUTABLE = 1,
+ __BCH_INODE_APPEND = 2,
+ __BCH_INODE_NODUMP = 3,
+ __BCH_INODE_NOATIME = 4,
+
+ __BCH_INODE_I_SIZE_DIRTY= 5,
+ __BCH_INODE_I_SECTORS_DIRTY= 6,
+
+ /* not implemented yet: */
+ __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */
+};
+
+LE32_BITMASK(INODE_STR_HASH_TYPE, struct bch_inode, i_flags, 28, 32);
+
+#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC)
+#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE)
+#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND)
+#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP)
+#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
+#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
+#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
+#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS)
+
struct bch_inode_blockdev {
struct bch_val v;
- struct bch_inode i_inode;
+
+ __le64 i_size;
+ __le64 i_flags;
+
+ /* Seconds: */
+ __le64 i_ctime;
+ __le64 i_mtime;
uuid_le i_uuid;
__u8 i_label[32];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV);
+/* Thin provisioned volume, or cache for another block device? */
+LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1)
/* Dirents */
/*
* to change:
*/
uuid_le user_uuid;
- __le64 pad1[6];
+
+ __le64 flags2;
+ __le64 encryption_key[5];
/* Number of cache_member entries: */
__u8 nr_in_set;
};
};
-LE64_BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
+/* XXX: rename CACHE_SET -> BCH_FS or something? */
+
+LE64_BITMASK(CACHE_SET_SYNC, struct cache_sb, flags, 0, 1);
-LE64_BITMASK(CACHE_ERROR_ACTION, struct cache_sb, flags, 1, 4);
+LE64_BITMASK(CACHE_SET_ERROR_ACTION, struct cache_sb, flags, 1, 4);
#define BCH_ON_ERROR_CONTINUE 0U
#define BCH_ON_ERROR_RO 1U
#define BCH_ON_ERROR_PANIC 2U
LE64_BITMASK(CACHE_SB_CSUM_TYPE, struct cache_sb, flags, 12, 16);
-LE64_BITMASK(CACHE_META_PREFERRED_CSUM_TYPE,struct cache_sb, flags, 16, 20);
+LE64_BITMASK(CACHE_SET_META_CSUM_TYPE,struct cache_sb, flags, 16, 20);
#define BCH_CSUM_NONE 0U
#define BCH_CSUM_CRC32C 1U
#define BCH_CSUM_CRC64 2U
-#define BCH_CSUM_NR 3U
+#define BCH_CSUM_CHACHA20_POLY1305 3U
+#define BCH_CSUM_NR 4U
+
+static inline _Bool bch_csum_type_is_encryption(unsigned type)
+{
+ switch (type) {
+ case BCH_CSUM_CHACHA20_POLY1305:
+ return 1;
+ default:
+ return 0;
+ }
+}
-LE64_BITMASK(CACHE_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36);
+LE64_BITMASK(CACHE_SET_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36);
LE64_BITMASK(CACHE_SET_META_REPLICAS_HAVE,struct cache_sb, flags, 36, 40);
LE64_BITMASK(CACHE_SET_DATA_REPLICAS_HAVE,struct cache_sb, flags, 40, 44);
-LE64_BITMASK(CACHE_SET_DIRENT_CSUM_TYPE,struct cache_sb, flags, 44, 48);
-enum {
- BCH_DIRENT_CSUM_CRC32C = 0,
- BCH_DIRENT_CSUM_CRC64 = 1,
- BCH_DIRENT_CSUM_SIPHASH = 2,
- BCH_DIRENT_CSUM_SHA1 = 3,
+LE64_BITMASK(CACHE_SET_STR_HASH_TYPE,struct cache_sb, flags, 44, 48);
+enum bch_str_hash_type {
+ BCH_STR_HASH_CRC32C = 0,
+ BCH_STR_HASH_CRC64 = 1,
+ BCH_STR_HASH_SIPHASH = 2,
+ BCH_STR_HASH_SHA1 = 3,
};
-LE64_BITMASK(CACHE_DATA_PREFERRED_CSUM_TYPE, struct cache_sb, flags, 48, 52);
+#define BCH_STR_HASH_NR 4
+
+LE64_BITMASK(CACHE_SET_DATA_CSUM_TYPE, struct cache_sb, flags, 48, 52);
-LE64_BITMASK(CACHE_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56);
+LE64_BITMASK(CACHE_SET_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56);
enum {
BCH_COMPRESSION_NONE = 0,
- BCH_COMPRESSION_LZO1X = 1,
+ BCH_COMPRESSION_LZ4 = 1,
BCH_COMPRESSION_GZIP = 2,
- BCH_COMPRESSION_XZ = 3,
};
+#define BCH_COMPRESSION_NR 3U
+
+/* Limit inode numbers to 32 bits: */
+LE64_BITMASK(CACHE_INODE_32BIT, struct cache_sb, flags, 56, 57);
+
+LE64_BITMASK(CACHE_SET_GC_RESERVE, struct cache_sb, flags, 57, 63);
+
+LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6);
+
+/*
+ * If nonzero, encryption is enabled; overrides DATA/META_CSUM_TYPE. Also
+ * indicates encryption algorithm in use, if/when we get more than one:
+ *
+ */
+LE64_BITMASK(CACHE_SET_ENCRYPTION_TYPE, struct cache_sb, flags2, 6, 10);
+
+/*
+ * If nonzero, we have an encryption key in the superblock, which is the key
+ * used to encrypt all other data/metadata. The key will normally be encrypted
+ * with the key userspace provides, but if encryption has been turned off we'll
+ * just store the master key unencrypted in the superblock so we can access the
+ * previously encrypted data.
+ */
+LE64_BITMASK(CACHE_SET_ENCRYPTION_KEY, struct cache_sb, flags2, 10, 11);
+
+/* options: */
+
+/**
+ * CACHE_SET_OPT(name, choices, min, max, sb_option, sysfs_writeable)
+ *
+ * @name - name of mount option, sysfs attribute, and struct cache_set_opts
+ * member
+ *
+ * @choices - array of strings that the user can select from - option is by
+ * array index
+ *
+ * Booleans are special cased; if @choices is bch_bool_opt the mount
+ * options name and noname will work as expected.
+ *
+ * @min, @max
+ *
+ * @sb_option - name of corresponding superblock option
+ *
+ * @sysfs_writeable - if true, option will be modifiable at runtime via sysfs
+ */
+
+#define CACHE_SET_SB_OPTS() \
+ CACHE_SET_OPT(errors, \
+ bch_error_actions, \
+ 0, BCH_NR_ERROR_ACTIONS, \
+ CACHE_SET_ERROR_ACTION, \
+ true) \
+ CACHE_SET_OPT(metadata_replicas, \
+ bch_uint_opt, \
+ 0, BCH_REPLICAS_MAX, \
+ CACHE_SET_META_REPLICAS_WANT, \
+ false) \
+ CACHE_SET_OPT(data_replicas, \
+ bch_uint_opt, \
+ 0, BCH_REPLICAS_MAX, \
+ CACHE_SET_DATA_REPLICAS_WANT, \
+ false) \
+ CACHE_SET_OPT(metadata_checksum, \
+ bch_csum_types, \
+ 0, BCH_CSUM_NR, \
+ CACHE_SET_META_CSUM_TYPE, \
+ true) \
+ CACHE_SET_OPT(data_checksum, \
+ bch_csum_types, \
+ 0, BCH_CSUM_NR, \
+ CACHE_SET_DATA_CSUM_TYPE, \
+ true) \
+ CACHE_SET_OPT(compression, \
+ bch_compression_types, \
+ 0, BCH_COMPRESSION_NR, \
+ CACHE_SET_COMPRESSION_TYPE, \
+ true) \
+ CACHE_SET_OPT(str_hash, \
+ bch_str_hash_types, \
+ 0, BCH_STR_HASH_NR, \
+ CACHE_SET_STR_HASH_TYPE, \
+ true) \
+ CACHE_SET_OPT(inodes_32bit, \
+ bch_bool_opt, 0, 2, \
+ CACHE_INODE_32BIT, \
+ true) \
+ CACHE_SET_OPT(gc_reserve_percent, \
+ bch_uint_opt, \
+ 5, 21, \
+ CACHE_SET_GC_RESERVE, \
+ false) \
+ CACHE_SET_OPT(root_reserve_percent, \
+ bch_uint_opt, \
+ 0, 21, \
+ CACHE_SET_ROOT_RESERVE, \
+ false)
+
/* backing device specific stuff: */
struct backingdev_sb {
return __le64_to_cpu(sb->set_magic) ^ BSET_MAGIC;
}
-/*
- * Journal
- *
- * On disk format for a journal entry:
- * seq is monotonically increasing; every journal entry has its own unique
- * sequence number.
- *
- * last_seq is the oldest journal entry that still has keys the btree hasn't
- * flushed to disk yet.
- *
- * version is for on disk format changes.
- */
+/* 128 bits, sufficient for cryptographic MACs: */
+struct bch_csum {
+ __le64 lo;
+ __le64 hi;
+};
+
+/* Journal */
#define BCACHE_JSET_VERSION_UUIDv1 1
#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
-
-LE32_BITMASK(JKEYS_TYPE, struct jset_entry, flags, 0, 8);
+LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8);
enum {
- JKEYS_BTREE_KEYS = 0,
- JKEYS_BTREE_ROOT = 1,
- JKEYS_PRIO_PTRS = 2,
+ JOURNAL_ENTRY_BTREE_KEYS = 0,
+ JOURNAL_ENTRY_BTREE_ROOT = 1,
+ JOURNAL_ENTRY_PRIO_PTRS = 2,
/*
* Journal sequence numbers can be blacklisted: bsets record the max
* and then record that we skipped it so that the next time we crash and
* recover we don't think there was a missing journal entry.
*/
- JKEYS_JOURNAL_SEQ_BLACKLISTED = 3,
+ JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED = 3,
};
+/*
+ * On disk format for a journal entry:
+ * seq is monotonically increasing; every journal entry has its own unique
+ * sequence number.
+ *
+ * last_seq is the oldest journal entry that still has keys the btree hasn't
+ * flushed to disk yet.
+ *
+ * version is for on disk format changes.
+ */
struct jset {
- __le64 csum;
+ struct bch_csum csum;
+
__le64 magic;
__le32 version;
__le32 flags;
};
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
+LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
+
+#define BCH_JOURNAL_BUCKETS_MIN 20
/* Bucket prios/gens */
struct prio_set {
- __le64 csum;
+ struct bch_csum csum;
+
__le64 magic;
__le32 version;
__le32 flags;
LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 8, 9);
struct btree_node {
- __le64 csum;
+ struct bch_csum csum;
__le64 magic;
/* Closed interval: */
} __attribute__((packed));
struct btree_node_entry {
- __le64 csum;
+ struct bch_csum csum;
+
struct bset keys;
} __attribute__((packed));
+/* Crypto: */
+
+struct nonce {
+ __le32 d[4];
+};
+
+#define BCACHE_MASTER_KEY_HEADER "bch**key"
+#define BCACHE_MASTER_KEY_NONCE ((struct nonce) \
+ {{ __cpu_to_le32(1), __cpu_to_le32(2), \
+ __cpu_to_le32(3), __cpu_to_le32(4) }})
+
/* OBSOLETE */
#define BITMASK(name, type, field, offset, end) \
#include "bcache-format.h"
#include "bcache-fs.h"
#include "bcache-run.h"
+#include "bcache-key.h"
#define PACKAGE_NAME "bcache"
#define PACKAGE_VERSION "1.0"
CMD(device_remove, N_("<volume> <devices>"),
"Removes a device from its volume"),
+ /* Crypto */
+
+ CMD(unlock, N_("<device>"),
+ "Unlock an encrypted filesystem"),
+
#if 0
CMD(modify, N_("<options>"),
"Modifies attributes related to the volume",
--- /dev/null
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/random.h>
+#include <libscrypt.h>
+#include <sodium/crypto_stream_chacha20.h>
+
+#include "crypto.h"
+
+char *read_passphrase(const char *prompt)
+{
+ struct termios old, new;
+ char *buf = NULL;
+ size_t buflen = 0;
+ ssize_t ret;
+
+ fprintf(stderr, "%s", prompt);
+ fflush(stderr);
+
+ if (tcgetattr(fileno(stdin), &old))
+ die("error getting terminal attrs");
+
+ new = old;
+ new.c_lflag &= ~ECHO;
+ if (tcsetattr(fileno(stdin), TCSAFLUSH, &new))
+ die("error setting terminal attrs");
+
+ ret = getline(&buf, &buflen, stdin);
+ if (ret <= 0)
+ die("error reading passphrase");
+
+ tcsetattr(fileno(stdin), TCSAFLUSH, &old);
+ fprintf(stderr, "\n");
+ return buf;
+}
+
+void derive_passphrase(struct bcache_key *key, const char *passphrase)
+{
+ const unsigned char salt[] = "bcache";
+ int ret;
+
+ ret = libscrypt_scrypt((void *) passphrase, strlen(passphrase),
+ salt, sizeof(salt),
+ SCRYPT_N, SCRYPT_r, SCRYPT_p,
+ (void *) key, sizeof(*key));
+ if (ret)
+ die("scrypt error: %i", ret);
+}
+
+void disk_key_encrypt(struct bcache_disk_key *disk_key,
+ struct bcache_key *key)
+{
+ int ret;
+
+ ret = crypto_stream_chacha20_xor((void *) disk_key,
+ (void *) disk_key, sizeof(*disk_key),
+ (void *) &bch_master_key_nonce,
+ (void *) key);
+ if (ret)
+ die("chacha20 error: %i", ret);
+}
+
+void disk_key_init(struct bcache_disk_key *disk_key)
+{
+ ssize_t ret;
+
+ memcpy(&disk_key->header, bch_key_header, sizeof(bch_key_header));
+#if 0
+ ret = getrandom(disk_key->key, sizeof(disk_key->key), GRND_RANDOM);
+ if (ret != sizeof(disk_key->key))
+ die("error getting random bytes for key");
+#else
+ int fd = open("/dev/random", O_RDONLY|O_NONBLOCK);
+ if (fd < 0)
+ die("error opening /dev/random");
+
+ size_t n = 0;
+ struct timespec start;
+ bool printed = false;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ while (n < sizeof(disk_key->key)) {
+ struct timeval timeout = { 1, 0 };
+ fd_set set;
+
+ FD_ZERO(&set);
+ FD_SET(fd, &set);
+
+ if (select(fd + 1, &set, NULL, NULL, &timeout) < 0)
+ die("select error");
+
+ ret = read(fd,
+ (void *) disk_key->key + n,
+ sizeof(disk_key->key) - n);
+ if (ret == -1 && errno != EINTR && errno != EAGAIN)
+ die("error reading from /dev/random");
+ if (ret > 0)
+ n += ret;
+
+ struct timespec now;
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ now.tv_sec -= start.tv_sec;
+ now.tv_nsec -= start.tv_nsec;
+
+ while (now.tv_nsec < 0) {
+ long nsec_per_sec = 1000 * 1000 * 1000;
+ long sec = now.tv_nsec / nsec_per_sec - 1;
+ now.tv_nsec -= sec * nsec_per_sec;
+ now.tv_sec += sec;
+ }
+
+ if (!printed && now.tv_sec >= 3) {
+ printf("Reading from /dev/random is taking a long time...\n)");
+ printed = true;
+ }
+ }
+ close(fd);
+#endif
+}
--- /dev/null
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include "util.h"
+
+struct bcache_key {
+ u64 key[4];
+};
+
+struct bcache_disk_key {
+ u64 header;
+ u64 key[4];
+};
+
+static const char bch_key_header[8] = BCACHE_MASTER_KEY_HEADER;
+static const struct nonce bch_master_key_nonce = BCACHE_MASTER_KEY_NONCE;
+
+char *read_passphrase(const char *);
+void derive_passphrase(struct bcache_key *, const char *);
+void disk_key_encrypt(struct bcache_disk_key *, struct bcache_key *);
+void disk_key_init(struct bcache_disk_key *);
+
+#endif /* _CRYPTO_H */
--- /dev/null
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "ccan/ilog/ilog.h"
+
+#include "bcache-ondisk.h"
+#include "libbcache.h"
+#include "crypto.h"
+
+void __do_write_sb(int fd, void *sb, size_t bytes)
+{
+ char zeroes[SB_SECTOR << 9] = {0};
+
+ /* Zero start of disk */
+ if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) {
+ perror("write error trying to zero start of disk\n");
+ exit(EXIT_FAILURE);
+ }
+ /* Write superblock */
+ if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) {
+ perror("write error trying to write superblock\n");
+ exit(EXIT_FAILURE);
+ }
+
+ fsync(fd);
+ close(fd);
+}
+
+#define do_write_sb(_fd, _sb) \
+ __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
+
+void bcache_format(struct dev_opts *devs, size_t nr_devs,
+ unsigned block_size,
+ unsigned btree_node_size,
+ unsigned meta_csum_type,
+ unsigned data_csum_type,
+ unsigned compression_type,
+ const char *passphrase,
+ unsigned meta_replicas,
+ unsigned data_replicas,
+ unsigned on_error_action,
+ char *label,
+ uuid_le uuid)
+{
+ struct cache_sb *sb;
+ struct dev_opts *i;
+
+ /* calculate block size: */
+ if (!block_size)
+ for (i = devs; i < devs + nr_devs; i++)
+ block_size = max(block_size,
+ get_blocksize(i->dev, i->fd));
+
+ /* calculate bucket sizes: */
+ for (i = devs; i < devs + nr_devs; i++) {
+ if (!i->size)
+ i->size = get_size(i->dev, i->fd);
+
+ if (!i->bucket_size) {
+ u64 bytes = i->size << 9;
+
+ if (bytes < 1 << 20) /* 1M device - 256 4k buckets*/
+ i->bucket_size = rounddown_pow_of_two(bytes >> 17);
+ else
+ /* Max 1M bucket at around 256G */
+ i->bucket_size = 8 << min((ilog2(bytes >> 20) / 2), 9U);
+ }
+
+ if (i->bucket_size < block_size)
+ die("Bucket size cannot be smaller than block size");
+
+ i->nbuckets = i->size / i->bucket_size;
+ i->first_bucket = (23 / i->bucket_size) + 3;
+
+ if (i->nbuckets < 1 << 7)
+ die("Not enough buckets: %llu, need %u",
+ i->nbuckets, 1 << 7);
+ }
+
+ /* calculate btree node size: */
+ if (!btree_node_size) {
+ /* 256k default btree node size */
+ btree_node_size = 512;
+
+ for (i = devs; i < devs + nr_devs; i++)
+ btree_node_size = min(btree_node_size, i->bucket_size);
+ }
+
+ sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) * nr_devs);
+
+ sb->offset = __cpu_to_le64(SB_SECTOR);
+ sb->version = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3);
+ sb->magic = BCACHE_MAGIC;
+ sb->block_size = __cpu_to_le16(block_size);
+ sb->user_uuid = uuid;
+ sb->nr_in_set = nr_devs;
+
+ uuid_generate(sb->set_uuid.b);
+
+ if (label)
+ strncpy((char *) sb->label, label, sizeof(sb->label));
+
+ /*
+ * don't have a userspace crc32c implementation handy, just always use
+ * crc64
+ */
+ SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
+ SET_CACHE_SET_META_CSUM_TYPE(sb, meta_csum_type);
+ SET_CACHE_SET_DATA_CSUM_TYPE(sb, data_csum_type);
+ SET_CACHE_SET_COMPRESSION_TYPE(sb, compression_type);
+
+ SET_CACHE_SET_BTREE_NODE_SIZE(sb, btree_node_size);
+ SET_CACHE_SET_META_REPLICAS_WANT(sb, meta_replicas);
+ SET_CACHE_SET_META_REPLICAS_HAVE(sb, meta_replicas);
+ SET_CACHE_SET_DATA_REPLICAS_WANT(sb, data_replicas);
+ SET_CACHE_SET_DATA_REPLICAS_HAVE(sb, data_replicas);
+ SET_CACHE_SET_ERROR_ACTION(sb, on_error_action);
+
+ if (passphrase) {
+ struct bcache_key key;
+ struct bcache_disk_key disk_key;
+
+ derive_passphrase(&key, passphrase);
+ disk_key_init(&disk_key);
+ disk_key_encrypt(&disk_key, &key);
+
+ memcpy(sb->encryption_key, &disk_key, sizeof(disk_key));
+ SET_CACHE_SET_ENCRYPTION_TYPE(sb, 1);
+ SET_CACHE_SET_ENCRYPTION_KEY(sb, 1);
+
+ memzero_explicit(&disk_key, sizeof(disk_key));
+ memzero_explicit(&key, sizeof(key));
+ }
+
+ for (i = devs; i < devs + nr_devs; i++) {
+ struct cache_member *m = sb->members + (i - devs);
+
+ uuid_generate(m->uuid.b);
+ m->nbuckets = __cpu_to_le64(i->nbuckets);
+ m->first_bucket = __cpu_to_le16(i->first_bucket);
+ m->bucket_size = __cpu_to_le16(i->bucket_size);
+
+ SET_CACHE_TIER(m, i->tier);
+ SET_CACHE_REPLACEMENT(m, i->replacement_policy);
+ SET_CACHE_DISCARD(m, i->discard);
+ }
+
+ sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb));
+
+ for (i = devs; i < devs + nr_devs; i++) {
+ struct cache_member *m = sb->members + (i - devs);
+ char uuid_str[40], set_uuid_str[40];
+
+ sb->disk_uuid = m->uuid;
+ sb->nr_this_dev = i - devs;
+ sb->csum = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s),
+ CACHE_SB_CSUM_TYPE(sb)));
+
+ uuid_unparse(sb->disk_uuid.b, uuid_str);
+ uuid_unparse(sb->user_uuid.b, set_uuid_str);
+ printf("UUID: %s\n"
+ "Set UUID: %s\n"
+ "version: %u\n"
+ "nbuckets: %llu\n"
+ "block_size: %u\n"
+ "bucket_size: %u\n"
+ "nr_in_set: %u\n"
+ "nr_this_dev: %u\n"
+ "first_bucket: %u\n",
+ uuid_str, set_uuid_str,
+ (unsigned) sb->version,
+ __le64_to_cpu(m->nbuckets),
+ __le16_to_cpu(sb->block_size),
+ __le16_to_cpu(m->bucket_size),
+ sb->nr_in_set,
+ sb->nr_this_dev,
+ __le16_to_cpu(m->first_bucket));
+
+ do_write_sb(i->fd, sb);
+ }
+
+ free(sb);
+}
+
+void bcache_super_read(const char *path, struct cache_sb *sb)
+{
+ int fd = open(path, O_RDONLY);
+ if (fd < 0)
+ die("couldn't open %s", path);
+
+ if (pread(fd, sb, sizeof(*sb), SB_SECTOR << 9) != sizeof(*sb))
+ die("error reading superblock");
+
+ if (memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic)))
+ die("not a bcache superblock");
+}
--- /dev/null
+#ifndef _LIBBCACHE_H
+#define _LIBBCACHE_H
+
+#include "util.h"
+#include "stdbool.h"
+
+struct dev_opts {
+ int fd;
+ const char *dev;
+ u64 size; /* 512 byte sectors */
+ unsigned bucket_size;
+ unsigned tier;
+ unsigned replacement_policy;
+ bool discard;
+
+ u64 first_bucket;
+ u64 nbuckets;
+};
+
+void bcache_format(struct dev_opts *devs, size_t nr_devs,
+ unsigned block_size,
+ unsigned btree_node_size,
+ unsigned meta_csum_type,
+ unsigned data_csum_type,
+ unsigned compression_type,
+ const char *passphrase,
+ unsigned meta_replicas,
+ unsigned data_replicas,
+ unsigned on_error_action,
+ char *label,
+ uuid_le uuid);
+
+void bcache_super_read(const char *, struct cache_sb *);
+
+#endif /* _LIBBCACHE_H */
return ret;
}
+
+void memzero_explicit(void *buf, size_t len)
+{
+ void *(* volatile memset_s)(void *s, int c, size_t n) = memset;
+ memset_s(buf, 0, len);
+}
#define __csum_set(i, u64s, type) \
({ \
- const void *start = ((const void *) (i)) + sizeof(u64); \
+ const void *start = ((const void *) (i)) + sizeof(i->csum); \
const void *end = __bkey_idx(i, u64s); \
\
bch_checksum(type, start, end - start); \
struct bcache_handle bcache_fs_open(const char *);
+void memzero_explicit(void *, size_t);
+
#endif /* _UTIL_H */