]> git.sesse.net Git - bcachefs-tools-debian/blob - bcache-format.c
Don't install udev hook - it's confusing with bcachefs
[bcachefs-tools-debian] / bcache-format.c
1 /*
2  * Authors: Kent Overstreet <kent.overstreet@gmail.com>
3  *          Gabriel de Perthuis <g2p.code@gmail.com>
4  *          Jacob Malevich <jam@datera.io>
5  *
6  * GPLv2
7  */
8
9 #include <errno.h>
10 #include <stdbool.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/types.h>
16 #include <unistd.h>
17
18 #include <uuid/uuid.h>
19
20 #include <nih/command.h>
21 #include <nih/option.h>
22
23 #include "ccan/ilog/ilog.h"
24 #include "ccan/darray/darray.h"
25
26 #include "bcache.h"
27 #include "bcache-format.h"
28
29 struct cache_opts {
30         int             fd;
31         const char      *dev;
32         unsigned        bucket_size;
33         unsigned        tier;
34         unsigned        replacement_policy;
35         unsigned        replication_set;
36         u64             size; /* 512 byte sectors */
37
38         u64             first_bucket;
39         u64             nbuckets;
40 };
41
42 struct backingdev_opts {
43         int             fd;
44         const char      *dev;
45         const char      *label;
46 };
47
48 static darray(struct cache_opts) cache_devices;
49 static darray(struct backingdev_opts) backing_devices;
50
51 static char *label = NULL;
52
53 /* All in units of 512 byte sectors */
54 static unsigned block_size, bucket_size, btree_node_size;
55 static u64 filesystem_size;
56 static unsigned tier, replacement_policy;
57
58 static uuid_le set_uuid, user_uuid;
59 static unsigned meta_csum_type = BCH_CSUM_CRC32C;
60 static unsigned data_csum_type = BCH_CSUM_CRC32C;
61 static unsigned compression_type = BCH_COMPRESSION_NONE;
62
63 static unsigned replication_set, meta_replicas = 1, data_replicas = 1;
64 static unsigned on_error_action;
65 static int discard;
66 static unsigned version = 1;
67
68 static u64 data_offset = BDEV_DATA_START_DEFAULT;
69 static unsigned cache_mode = CACHE_MODE_WRITEBACK;
70
71 static int set_cache(NihOption *option, const char *arg)
72 {
73         darray_append(cache_devices, (struct cache_opts) {
74                 .fd                     = dev_open(arg),
75                 .dev                    = strdup(arg),
76                 .bucket_size            = bucket_size,
77                 .tier                   = tier,
78                 .replacement_policy     = replacement_policy,
79                 .replication_set        = replication_set,
80                 .size                   = filesystem_size,
81         });
82         return 0;
83 }
84
85 static int set_bdev(NihOption *option, const char *arg)
86 {
87         darray_append(backing_devices, (struct backingdev_opts) {
88                 .fd                     = dev_open(arg),
89                 .dev                    = strdup(arg),
90                 .label                  = label ? strdup(label) : NULL,
91         });
92         return 0;
93 }
94
95 static int set_cache_set_uuid(NihOption *option, const char *arg)
96 {
97         if (uuid_parse(arg, user_uuid.b))
98                 die("Bad uuid");
99         return 0;
100 }
101
102 static int set_block_size(NihOption *option, const char *arg)
103 {
104         block_size = hatoi_validate(arg, "block size");
105         return 0;
106 }
107
108 static int set_bucket_sizes(NihOption *option, const char *arg)
109 {
110         bucket_size = hatoi_validate(arg, "bucket size");
111         return 0;
112 }
113
114 static int set_btree_node_size(NihOption *option, const char *arg)
115 {
116         btree_node_size = hatoi_validate(arg, "btree node size");
117         return 0;
118 }
119
120 static int set_filesystem_size(NihOption *option, const char *arg)
121 {
122         filesystem_size = hatoi(arg) >> 9;
123         return 0;
124 }
125
126 static int set_replacement_policy(NihOption *option, const char *arg)
127 {
128         replacement_policy = read_string_list_or_die(arg, replacement_policies,
129                                                      "replacement policy");
130         return 0;
131 }
132
133 static int set_csum_type(NihOption *option, const char *arg)
134 {
135         unsigned *csum_type = option->value;
136
137         *csum_type = read_string_list_or_die(arg, csum_types, "checksum type");
138         return 0;
139 }
140
141 static int set_compression_type(NihOption *option, const char *arg)
142 {
143         compression_type = read_string_list_or_die(arg, compression_types,
144                                                    "compression type");
145         return 0;
146 }
147
148 static int set_on_error_action(NihOption *option, const char *arg)
149 {
150         on_error_action = read_string_list_or_die(arg, error_actions,
151                                                   "error action");
152         return 0;
153 }
154
155 static int set_tier(NihOption *option, const char *arg)
156 {
157         tier = strtoul_or_die(arg, CACHE_TIERS, "tier");
158         return 0;
159 }
160
161 static int set_replication_set(NihOption *option, const char *arg)
162 {
163         replication_set = strtoul_or_die(arg, CACHE_REPLICATION_SET_MAX,
164                                          "replication set");
165         return 0;
166 }
167
168 static int set_meta_replicas(NihOption *option, const char *arg)
169 {
170         meta_replicas = strtoul_or_die(arg, CACHE_SET_META_REPLICAS_WANT_MAX,
171                                        "meta_replicas");
172         return 0;
173 }
174
175 static int set_data_replicas(NihOption *option, const char *arg)
176 {
177         data_replicas = strtoul_or_die(arg, CACHE_SET_DATA_REPLICAS_WANT_MAX,
178                                        "data_replicas");
179         return 0;
180 }
181
182 static int set_cache_mode(NihOption *option, const char *arg)
183 {
184         cache_mode = read_string_list_or_die(arg, bdev_cache_mode,
185                                              "cache mode");
186         return 0;
187 }
188
189 static int set_version(NihOption *option, const char *arg)
190 {
191         version = strtoul_or_die(arg, 2, "version");
192         return 0;
193 }
194
195 NihOption opts_format[] = {
196 //      { int shortoption, char *longoption, char *help, NihOptionGroup, char *argname, void *value, NihOptionSetter}
197
198         { 'C',  "cache",                N_("Format a cache device"),
199                 NULL, "dev",    NULL,   set_cache },
200         { 'B',  "bdev",                 N_("Format a backing device"),
201                 NULL, "dev",    NULL,   set_bdev },
202
203         { 'l',  "label",                N_("label"),
204                 NULL, "label",  &label, NULL},
205         { 0,    "cset_uuid",            N_("UUID for the cache set"),
206                 NULL, "uuid",   NULL,   set_cache_set_uuid },
207
208         { 'w',  "block",                N_("block size (hard sector size of SSD, often 2k"),
209                 NULL, "size",   NULL,   set_block_size },
210         { 'b',  "bucket",               N_("bucket size"),
211                 NULL, "size",   NULL,   set_bucket_sizes },
212         { 'n',  "btree_node",           N_("Btree node size, default 256k"),
213                 NULL, "size",   NULL,   set_btree_node_size },
214         { 0,    "fs_size",              N_("Size of filesystem on device" ),
215                 NULL, "size",   NULL,   set_filesystem_size },
216
217         { 'p',  "cache_replacement_policy", NULL,
218                 NULL, "(lru|fifo|random)", NULL, set_replacement_policy },
219
220         { 0,    "metadata_csum_type",   N_("Checksum type"),
221                 NULL, "(none|crc32c|crc64)", &meta_csum_type, set_csum_type },
222
223         { 0,    "data_csum_type",       N_("Checksum type"),
224                 NULL, "(none|crc32c|crc64)", &data_csum_type, set_csum_type },
225
226         { 0,    "compression_type",     N_("Compression type"),
227                 NULL, "(none|gzip)", NULL, set_compression_type },
228
229         { 0,    "error_action",         N_("Action to take on filesystem error"),
230                 NULL, "(continue|readonly|panic)", NULL, set_on_error_action },
231
232         { 0,    "discard",              N_("Enable discards"),
233                 NULL, NULL,             &discard,       NULL },
234
235         { 't',  "tier",                 N_("tier of subsequent devices"),
236                 NULL, "#",      NULL,   set_tier },
237
238         { 0,    "replication_set",      N_("replication set of subsequent devices"),
239                 NULL, "#",      NULL,   set_replication_set },
240
241         { 0,    "meta_replicas",        N_("number of metadata replicas"),
242                 NULL, "#",      NULL,   set_meta_replicas },
243
244         { 0,    "data_replicas",        N_("number of data replicas"),
245                 NULL, "#",      NULL,   set_data_replicas },
246
247         { 0,    "cache_mode",           N_("Cache mode (for backing devices)"),
248                 NULL, "(writethrough|writeback|writearound", NULL, set_cache_mode },
249
250         { 'o',  "data_offset",          N_("data offset in sectors"),
251                 NULL, "offset", &data_offset, NULL},
252
253         { 'v',  "version",              N_("superblock version"),
254                 NULL, "#",      NULL,   set_version},
255
256         NIH_OPTION_LAST
257 };
258
259 void __do_write_sb(int fd, void *sb, size_t bytes)
260 {
261         char zeroes[SB_SECTOR << 9] = {0};
262
263         /* Zero start of disk */
264         if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) {
265                 perror("write error trying to zero start of disk\n");
266                 exit(EXIT_FAILURE);
267         }
268         /* Write superblock */
269         if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) {
270                 perror("write error trying to write superblock\n");
271                 exit(EXIT_FAILURE);
272         }
273
274         fsync(fd);
275         close(fd);
276 }
277
278 #define do_write_sb(_fd, _sb)                   \
279         __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
280
281 void write_backingdev_sb(int fd, unsigned block_size, unsigned mode,
282                          u64 data_offset, const char *label,
283                          uuid_le set_uuid)
284 {
285         char uuid_str[40];
286         struct cache_sb sb;
287
288         memset(&sb, 0, sizeof(struct cache_sb));
289
290         sb.offset       = SB_SECTOR;
291         sb.version      = BCACHE_SB_VERSION_BDEV;
292         sb.magic        = BCACHE_MAGIC;
293         uuid_generate(sb.disk_uuid.b);
294         sb.set_uuid     = set_uuid;
295         sb.block_size   = block_size;
296
297         uuid_unparse(sb.disk_uuid.b, uuid_str);
298         if (label)
299                 memcpy(sb.label, label, SB_LABEL_SIZE);
300
301         SET_BDEV_CACHE_MODE(&sb, mode);
302
303         if (data_offset != BDEV_DATA_START_DEFAULT) {
304                 sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
305                 sb.bdev_data_offset = data_offset;
306         }
307
308         sb.csum = csum_set(&sb, BCH_CSUM_CRC64);
309
310         printf("UUID:                   %s\n"
311                "version:                %u\n"
312                "block_size:             %u\n"
313                "data_offset:            %llu\n",
314                uuid_str, (unsigned) sb.version,
315                sb.block_size, data_offset);
316
317         do_write_sb(fd, &sb);
318 }
319
320 static void format_v0(void)
321 {
322         struct cache_opts *i;
323
324         set_uuid = user_uuid;
325
326         darray_foreach(i, cache_devices)
327                 bucket_size = min(bucket_size, i->bucket_size);
328
329         struct cache_sb_v0 *sb = calloc(1, sizeof(*sb));
330
331         sb->offset              = SB_SECTOR;
332         sb->version             = BCACHE_SB_VERSION_CDEV_WITH_UUID;
333         sb->magic               = BCACHE_MAGIC;
334         sb->block_size  = block_size;
335         sb->bucket_size = bucket_size;
336         sb->set_uuid            = set_uuid;
337         sb->nr_in_set           = darray_size(cache_devices);
338
339         if (label)
340                 memcpy(sb->label, label, sizeof(sb->label));
341
342         darray_foreach(i, cache_devices) {
343                 char uuid_str[40], set_uuid_str[40];
344
345                 uuid_generate(sb->uuid.b);
346                 sb->nbuckets            = i->nbuckets;
347                 sb->first_bucket        = i->first_bucket;
348                 sb->nr_this_dev         = i - cache_devices.item;
349                 sb->csum                = csum_set(sb, BCH_CSUM_CRC64);
350
351                 uuid_unparse(sb->uuid.b, uuid_str);
352                 uuid_unparse(sb->set_uuid.b, set_uuid_str);
353                 printf("UUID:                   %s\n"
354                        "Set UUID:               %s\n"
355                        "version:                %u\n"
356                        "nbuckets:               %llu\n"
357                        "block_size:             %u\n"
358                        "bucket_size:            %u\n"
359                        "nr_in_set:              %u\n"
360                        "nr_this_dev:            %u\n"
361                        "first_bucket:           %u\n",
362                        uuid_str, set_uuid_str,
363                        (unsigned) sb->version,
364                        sb->nbuckets,
365                        sb->block_size,
366                        sb->bucket_size,
367                        sb->nr_in_set,
368                        sb->nr_this_dev,
369                        sb->first_bucket);
370
371                 do_write_sb(i->fd, sb);
372         }
373 }
374
375 static void format_v1(void)
376 {
377         struct cache_sb *sb;
378         struct cache_opts *i;
379
380         sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) *
381                     darray_size(cache_devices));
382
383         sb->offset      = SB_SECTOR;
384         sb->version     = BCACHE_SB_VERSION_CDEV_V3;
385         sb->magic       = BCACHE_MAGIC;
386         sb->block_size  = block_size;
387         sb->set_uuid    = set_uuid;
388         sb->user_uuid   = user_uuid;
389         sb->nr_in_set   = darray_size(cache_devices);
390
391         if (label)
392                 memcpy(sb->label, label, sizeof(sb->label));
393
394         /*
395          * don't have a userspace crc32c implementation handy, just always use
396          * crc64
397          */
398         SET_CACHE_SB_CSUM_TYPE(sb,              BCH_CSUM_CRC64);
399         SET_CACHE_META_PREFERRED_CSUM_TYPE(sb,  meta_csum_type);
400         SET_CACHE_DATA_PREFERRED_CSUM_TYPE(sb,  data_csum_type);
401         SET_CACHE_COMPRESSION_TYPE(sb,          compression_type);
402
403         SET_CACHE_BTREE_NODE_SIZE(sb,           btree_node_size);
404         SET_CACHE_SET_META_REPLICAS_WANT(sb,    meta_replicas);
405         SET_CACHE_SET_META_REPLICAS_HAVE(sb,    meta_replicas);
406         SET_CACHE_SET_DATA_REPLICAS_WANT(sb,    data_replicas);
407         SET_CACHE_SET_DATA_REPLICAS_HAVE(sb,    data_replicas);
408         SET_CACHE_ERROR_ACTION(sb,              on_error_action);
409
410         darray_foreach(i, cache_devices) {
411                 struct cache_member *m = sb->members +
412                         (i - cache_devices.item);
413
414                 uuid_generate(m->uuid.b);
415                 m->nbuckets     = i->nbuckets;
416                 m->first_bucket = i->first_bucket;
417                 m->bucket_size  = i->bucket_size;
418
419                 if (m->nbuckets < 1 << 7)
420                         die("Not enough buckets: %llu, need %u",
421                             m->nbuckets, 1 << 7);
422
423                 SET_CACHE_TIER(m,               i->tier);
424                 SET_CACHE_REPLICATION_SET(m,    i->replication_set);
425                 SET_CACHE_REPLACEMENT(m,        i->replacement_policy);
426                 SET_CACHE_DISCARD(m,            discard);
427         }
428
429         sb->u64s = bch_journal_buckets_offset(sb);
430
431         darray_foreach(i, cache_devices) {
432                 char uuid_str[40], set_uuid_str[40];
433                 struct cache_member *m = sb->members +
434                         (i - cache_devices.item);
435
436                 sb->disk_uuid   = m->uuid;
437                 sb->nr_this_dev = i - cache_devices.item;
438                 sb->csum        = csum_set(sb, CACHE_SB_CSUM_TYPE(sb));
439
440                 uuid_unparse(sb->disk_uuid.b, uuid_str);
441                 uuid_unparse(sb->user_uuid.b, set_uuid_str);
442                 printf("UUID:                   %s\n"
443                        "Set UUID:               %s\n"
444                        "version:                %u\n"
445                        "nbuckets:               %llu\n"
446                        "block_size:             %u\n"
447                        "bucket_size:            %u\n"
448                        "nr_in_set:              %u\n"
449                        "nr_this_dev:            %u\n"
450                        "first_bucket:           %u\n",
451                        uuid_str, set_uuid_str,
452                        (unsigned) sb->version,
453                        m->nbuckets,
454                        sb->block_size,
455                        m->bucket_size,
456                        sb->nr_in_set,
457                        sb->nr_this_dev,
458                        m->first_bucket);
459
460                 do_write_sb(i->fd, sb);
461         }
462 }
463
464 int cmd_format(NihCommand *command, char * const *args)
465 {
466         struct cache_opts *i;
467         struct backingdev_opts *ib;
468
469         if (!darray_size(cache_devices) &&
470             !darray_size(backing_devices))
471                 die("Please supply a device");
472
473         if (uuid_is_null(user_uuid.b))
474                 uuid_generate(user_uuid.b);
475
476         uuid_generate(set_uuid.b);
477
478         if (!block_size) {
479                 darray_foreach(i, cache_devices)
480                         block_size = max(block_size,
481                                          get_blocksize(i->dev, i->fd));
482
483                 darray_foreach(ib, backing_devices)
484                         block_size = max(block_size,
485                                          get_blocksize(ib->dev, ib->fd));
486         }
487
488         darray_foreach(i, cache_devices) {
489                 if (!i->size)
490                         i->size = get_size(i->dev, i->fd);
491
492                 if (!i->bucket_size) {
493                         u64 bytes = i->size << 9;
494
495                         if (bytes < 1 << 20) /* 1M device - 256 4k buckets*/
496                                 i->bucket_size = rounddown_pow_of_two(bytes >> 17);
497                         else
498                                 /* Max 1M bucket at around 256G */
499                                 i->bucket_size = 8 << min((ilog2(bytes >> 20) / 2), 9U);
500                 }
501
502                 if (i->bucket_size < block_size)
503                         die("Bucket size cannot be smaller than block size");
504
505                 i->nbuckets     = i->size / i->bucket_size;
506                 i->first_bucket = (23 / i->bucket_size) + 3;
507
508                 if (i->nbuckets < 1 << 7)
509                         die("Not enough buckets: %llu, need %u",
510                             i->nbuckets, 1 << 7);
511         }
512
513         if (!btree_node_size) {
514                 /* 256k default btree node size */
515                 btree_node_size = 512;
516
517                 darray_foreach(i, cache_devices)
518                         btree_node_size = min(btree_node_size, i->bucket_size);
519         }
520
521         switch (version) {
522         case 0:
523                 format_v0();
524                 break;
525         case 1:
526                 format_v1();
527                 break;
528         }
529
530         darray_foreach(ib, backing_devices)
531                 write_backingdev_sb(ib->fd, block_size, cache_mode,
532                                     data_offset, ib->label,
533                                     set_uuid);
534
535         return 0;
536 }