2 * Authors: Kent Overstreet <kmo@daterainc.com>
3 * Gabriel de Perthuis <g2p.code@gmail.com>
4 * Jacob Malevich <jam@datera.io>
11 #include <nih/logging.h>
19 #include <sys/ioctl.h>
30 #include <sys/types.h>
32 #include <uuid/uuid.h>
34 #include <nih/command.h>
35 #include <nih/option.h>
38 #include "bcacheadm.h"
40 static struct cache_opts {
45 unsigned replacement_policy;
46 unsigned replication_set;
47 uint64_t filesystem_size;
48 } cache_devices[MAX_DEVS];
50 static struct backingdev_opts {
54 } backing_devices[MAX_DEVS];
56 static size_t nr_backing_devices = 0, nr_cache_devices = 0;
58 static char *label = NULL;
60 /* All in units of 512 byte sectors */
61 static unsigned block_size;
62 static unsigned bucket_size = 2048;
63 static unsigned btree_node_size;
64 static uint64_t filesystem_size;
65 static unsigned tier, replacement_policy;
67 static uuid_t cache_set_uuid;
68 static unsigned csum_type = BCH_CSUM_CRC32C;
69 static unsigned replication_set, meta_replicas = 1, data_replicas = 1;
70 static unsigned on_error_action;
73 static uint64_t data_offset = BDEV_DATA_START_DEFAULT;
74 static unsigned cache_mode = CACHE_MODE_WRITEBACK;
76 static int set_cache(NihOption *option, const char *arg)
78 cache_devices[nr_cache_devices++] = (struct cache_opts) {
80 .bucket_size = bucket_size,
82 .replacement_policy = replacement_policy,
83 .replication_set = replication_set,
84 .filesystem_size = filesystem_size,
89 static int set_bdev(NihOption *option, const char *arg)
91 backing_devices[nr_backing_devices++] = (struct backingdev_opts) {
93 .label = strdup(label),
98 static int set_cache_set_uuid(NihOption *option, const char *arg)
100 if (uuid_parse(arg, cache_set_uuid))
105 static int set_block_size(NihOption *option, const char *arg)
107 block_size = hatoi_validate(arg, "block size");
111 static int set_bucket_sizes(NihOption *option, const char *arg)
113 bucket_size = hatoi_validate(arg, "bucket size");
117 static int set_btree_node_size(NihOption *option, const char *arg)
119 btree_node_size = hatoi_validate(arg, "btree node size");
123 static int set_filesystem_size(NihOption *option, const char *arg)
125 filesystem_size = hatoi(arg) >> 9;
129 static int set_replacement_policy(NihOption *option, const char *arg)
131 replacement_policy = read_string_list_or_die(arg, replacement_policies,
132 "replacement policy");
136 static int set_csum_type(NihOption *option, const char *arg)
138 csum_type = read_string_list_or_die(arg, csum_types, "checksum type");
142 static int set_on_error_action(NihOption *option, const char *arg)
144 on_error_action = read_string_list_or_die(arg, error_actions,
149 static int set_tier(NihOption *option, const char *arg)
151 tier = strtoul_or_die(arg, CACHE_TIERS, "tier");
155 static int set_replication_set(NihOption *option, const char *arg)
157 replication_set = strtoul_or_die(arg, CACHE_REPLICATION_SET_MAX,
162 static int set_meta_replicas(NihOption *option, const char *arg)
164 meta_replicas = strtoul_or_die(arg, CACHE_SET_META_REPLICAS_WANT_MAX,
169 static int set_data_replicas(NihOption *option, const char *arg)
171 data_replicas = strtoul_or_die(arg, CACHE_SET_DATA_REPLICAS_WANT_MAX,
176 static int set_cache_mode(NihOption *option, const char *arg)
178 cache_mode = read_string_list_or_die(arg, bdev_cache_mode,
183 NihOption bcacheadm_format_options[] = {
184 // { int shortoption, char *longoption, char *help, NihOptionGroup, char *argname, void *value, NihOptionSetter}
186 { 'C', "cache", N_("Format a cache device"),
187 NULL, "dev", NULL, set_cache },
188 { 'B', "bdev", N_("Format a backing device"),
189 NULL, "dev", NULL, set_bdev },
191 { 'l', "label", N_("label"),
192 NULL, "label", &label, NULL},
193 { 0, "cset_uuid", N_("UUID for the cache set"),
194 NULL, "uuid", NULL, set_cache_set_uuid },
196 { 'w', "block", N_("block size (hard sector size of SSD, often 2k"),
197 NULL, "size", NULL, set_block_size },
198 { 'b', "bucket", N_("bucket size"),
199 NULL, "size", NULL, set_bucket_sizes },
200 { 'n', "btree-node", N_("Btree node size, default 256k"),
201 NULL, "size", NULL, set_btree_node_size },
202 { 0, "fs-size", N_("Size of filesystem on device" ),
203 NULL, "size", NULL, set_filesystem_size },
205 { 'p', "cache_replacement_policy", NULL,
206 NULL, "(lru|fifo|random)", NULL, set_replacement_policy },
208 { 0, "csum-type", N_("Checksum type"),
209 NULL, "(none|crc32c|crc64)", NULL, set_csum_type },
211 { 0, "on-error", N_("Action to take on filesystem error"),
212 NULL, "(continue|readonly|panic)", NULL, set_on_error_action },
214 { 0, "discard", N_("Enable discards"),
215 NULL, NULL, &discard, NULL },
217 { 't', "tier", N_("tier of subsequent devices"),
218 NULL, "#", NULL, set_tier },
220 { 0, "replication_set", N_("replication set of subsequent devices"),
221 NULL, "#", NULL, set_replication_set },
223 { 0, "meta-replicas", N_("number of metadata replicas"),
224 NULL, "#", NULL, set_meta_replicas },
226 { 0, "data-replicas", N_("number of data replicas"),
227 NULL, "#", NULL, set_data_replicas },
229 { 0, "cache_mode", N_("Cache mode (for backing devices)"),
230 NULL, "(writethrough|writeback|writearound", NULL, set_cache_mode },
232 { 'o', "data_offset", N_("data offset in sectors"),
233 NULL, "offset", &data_offset, NULL},
238 int bcacheadm_format(NihCommand *command, char *const *args)
240 struct cache_sb *cache_set_sb;
242 if (!nr_cache_devices && !nr_backing_devices)
243 die("Please supply a device");
245 for (struct cache_opts *i = cache_devices;
246 i < cache_devices + nr_cache_devices;
248 i->fd = dev_open(i->dev);
250 for (struct backingdev_opts *i = backing_devices;
251 i < backing_devices + nr_backing_devices;
253 i->fd = dev_open(i->dev);
256 for (struct cache_opts *i = cache_devices;
257 i < cache_devices + nr_cache_devices;
259 block_size = max(block_size, get_blocksize(i->dev, i->fd));
261 for (struct backingdev_opts *i = backing_devices;
262 i < backing_devices + nr_backing_devices;
264 block_size = max(block_size, get_blocksize(i->dev, i->fd));
267 if (!btree_node_size) {
268 btree_node_size = 512;
270 for (struct cache_opts *i = cache_devices;
271 i < cache_devices + nr_cache_devices;
273 btree_node_size = min(btree_node_size, i->bucket_size);
276 cache_set_sb = calloc(1, sizeof(*cache_set_sb) +
277 sizeof(struct cache_member) * nr_cache_devices);
279 cache_set_sb->offset = SB_SECTOR;
280 cache_set_sb->version = BCACHE_SB_VERSION_CDEV_V3;
281 cache_set_sb->magic = BCACHE_MAGIC;
282 cache_set_sb->block_size = block_size;
283 uuid_generate(cache_set_sb->set_uuid.b);
285 if (uuid_is_null(cache_set_uuid))
286 uuid_generate(cache_set_sb->user_uuid.b);
288 memcpy(cache_set_sb->user_uuid.b, cache_set_uuid,
289 sizeof(cache_set_sb->user_uuid));
292 memcpy(cache_set_sb->label, label, sizeof(cache_set_sb->label));
295 * don't have a userspace crc32c implementation handy, just always use
298 SET_CACHE_SB_CSUM_TYPE(cache_set_sb, BCH_CSUM_CRC64);
299 SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, csum_type);
301 SET_CACHE_BTREE_NODE_SIZE(cache_set_sb, btree_node_size);
302 SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, meta_replicas);
303 SET_CACHE_SET_META_REPLICAS_HAVE(cache_set_sb, meta_replicas);
304 SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, data_replicas);
305 SET_CACHE_SET_DATA_REPLICAS_HAVE(cache_set_sb, data_replicas);
306 SET_CACHE_ERROR_ACTION(cache_set_sb, on_error_action);
308 for (struct cache_opts *i = cache_devices;
309 i < cache_devices + nr_cache_devices;
311 if (i->bucket_size < block_size)
312 die("Bucket size cannot be smaller than block size");
314 struct cache_member *m = cache_set_sb->members + cache_set_sb->nr_in_set++;
316 uuid_generate(m->uuid.b);
317 m->nbuckets = (i->filesystem_size ?:
318 getblocks(i->fd)) / i->bucket_size;
319 m->first_bucket = (23 / i->bucket_size) + 3;
320 m->bucket_size = i->bucket_size;
322 if (m->nbuckets < 1 << 7)
323 die("Not enough buckets: %llu, need %u",
324 m->nbuckets, 1 << 7);
326 SET_CACHE_TIER(m, i->tier);
327 SET_CACHE_REPLICATION_SET(m, i->replication_set);
328 SET_CACHE_REPLACEMENT(m, i->replacement_policy);
329 SET_CACHE_DISCARD(m, discard);
332 cache_set_sb->u64s = bch_journal_buckets_offset(cache_set_sb);
334 for (unsigned i = 0; i < cache_set_sb->nr_in_set; i++) {
335 char uuid_str[40], set_uuid_str[40];
336 struct cache_member *m = cache_set_sb->members + i;
338 cache_set_sb->disk_uuid = m->uuid;
339 cache_set_sb->nr_this_dev = i;
340 cache_set_sb->csum = csum_set(cache_set_sb,
341 CACHE_SB_CSUM_TYPE(cache_set_sb));
343 uuid_unparse(cache_set_sb->disk_uuid.b, uuid_str);
344 uuid_unparse(cache_set_sb->user_uuid.b, set_uuid_str);
353 "first_bucket: %u\n",
354 uuid_str, set_uuid_str,
355 (unsigned) cache_set_sb->version,
357 cache_set_sb->block_size,
359 cache_set_sb->nr_in_set,
360 cache_set_sb->nr_this_dev,
363 do_write_sb(cache_devices[i].fd, cache_set_sb);
366 for (struct backingdev_opts *i = backing_devices;
367 i < backing_devices + nr_backing_devices;
369 write_backingdev_sb(i->fd, block_size, cache_mode,
370 data_offset, i->label,
371 cache_set_sb->user_uuid,
372 cache_set_sb->set_uuid);