]> git.sesse.net Git - bcachefs-tools-debian/blob - bcacheadm-format.c
b06271e4d5ea571a58b356caa5002ff5a0cd0661
[bcachefs-tools-debian] / bcacheadm-format.c
1 /*
2  * Authors: Kent Overstreet <kmo@daterainc.com>
3  *          Gabriel de Perthuis <g2p.code@gmail.com>
4  *          Jacob Malevich <jam@datera.io>
5  *
6  * GPLv2
7  */
8
9 #if 0
10 #include <nih/main.h>
11 #include <nih/logging.h>
12 #include <ctype.h>
13 #include <errno.h>
14 #include <inttypes.h>
15 #include <limits.h>
16 #include <fcntl.h>
17 #include <unistd.h>
18 #include <blkid.h>
19 #include <sys/ioctl.h>
20 #include <sys/stat.h>
21 #include <dirent.h>
22 #endif
23
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32
33 #include <uuid/uuid.h>
34
35 #include <nih/command.h>
36 #include <nih/option.h>
37
38 #include "bcache.h"
39 #include "bcacheadm-format.h"
40
41 static struct cache_opts {
42         int             fd;
43         const char      *dev;
44         unsigned        bucket_size;
45         unsigned        tier;
46         unsigned        replacement_policy;
47         unsigned        replication_set;
48         u64             filesystem_size;
49
50         u64             first_bucket;
51         u64             nbuckets;
52 } cache_devices[MAX_DEVS];
53
54 static struct backingdev_opts {
55         int             fd;
56         const char      *dev;
57         const char      *label;
58 } backing_devices[MAX_DEVS];
59
60 static size_t nr_backing_devices = 0, nr_cache_devices = 0;
61
62 static char *label = NULL;
63
64 /* All in units of 512 byte sectors */
65 static unsigned block_size, bucket_size, btree_node_size;
66 static u64 filesystem_size;
67 static unsigned tier, replacement_policy;
68
69 static uuid_le set_uuid, user_uuid;
70 static unsigned meta_csum_type = BCH_CSUM_CRC32C;
71 static unsigned data_csum_type = BCH_CSUM_CRC32C;
72 static unsigned compression_type = BCH_COMPRESSION_NONE;
73
74 static unsigned replication_set, meta_replicas = 1, data_replicas = 1;
75 static unsigned on_error_action;
76 static int discard;
77 static unsigned version = 1;
78
79 static u64 data_offset = BDEV_DATA_START_DEFAULT;
80 static unsigned cache_mode = CACHE_MODE_WRITEBACK;
81
82 static int set_cache(NihOption *option, const char *arg)
83 {
84         cache_devices[nr_cache_devices++] = (struct cache_opts) {
85                 .fd                     = dev_open(arg),
86                 .dev                    = strdup(arg),
87                 .bucket_size            = bucket_size,
88                 .tier                   = tier,
89                 .replacement_policy     = replacement_policy,
90                 .replication_set        = replication_set,
91                 .filesystem_size        = filesystem_size,
92         };
93         return 0;
94 }
95
96 static int set_bdev(NihOption *option, const char *arg)
97 {
98         backing_devices[nr_backing_devices++] = (struct backingdev_opts) {
99                 .fd                     = dev_open(arg),
100                 .dev                    = strdup(arg),
101                 .label                  = label ? strdup(label) : NULL,
102         };
103         return 0;
104 }
105
106 static int set_cache_set_uuid(NihOption *option, const char *arg)
107 {
108         if (uuid_parse(arg, user_uuid.b))
109                 die("Bad uuid");
110         return 0;
111 }
112
113 static int set_block_size(NihOption *option, const char *arg)
114 {
115         block_size = hatoi_validate(arg, "block size");
116         return 0;
117 }
118
119 static int set_bucket_sizes(NihOption *option, const char *arg)
120 {
121         bucket_size = hatoi_validate(arg, "bucket size");
122         return 0;
123 }
124
125 static int set_btree_node_size(NihOption *option, const char *arg)
126 {
127         btree_node_size = hatoi_validate(arg, "btree node size");
128         return 0;
129 }
130
131 static int set_filesystem_size(NihOption *option, const char *arg)
132 {
133         filesystem_size = hatoi(arg) >> 9;
134         return 0;
135 }
136
137 static int set_replacement_policy(NihOption *option, const char *arg)
138 {
139         replacement_policy = read_string_list_or_die(arg, replacement_policies,
140                                                      "replacement policy");
141         return 0;
142 }
143
144 static int set_csum_type(NihOption *option, const char *arg)
145 {
146         unsigned *csum_type = option->value;
147
148         *csum_type = read_string_list_or_die(arg, csum_types, "checksum type");
149         return 0;
150 }
151
152 static int set_compression_type(NihOption *option, const char *arg)
153 {
154         compression_type = read_string_list_or_die(arg, compression_types,
155                                                    "compression type");
156         return 0;
157 }
158
159 static int set_on_error_action(NihOption *option, const char *arg)
160 {
161         on_error_action = read_string_list_or_die(arg, error_actions,
162                                                   "error action");
163         return 0;
164 }
165
166 static int set_tier(NihOption *option, const char *arg)
167 {
168         tier = strtoul_or_die(arg, CACHE_TIERS, "tier");
169         return 0;
170 }
171
172 static int set_replication_set(NihOption *option, const char *arg)
173 {
174         replication_set = strtoul_or_die(arg, CACHE_REPLICATION_SET_MAX,
175                                          "replication set");
176         return 0;
177 }
178
179 static int set_meta_replicas(NihOption *option, const char *arg)
180 {
181         meta_replicas = strtoul_or_die(arg, CACHE_SET_META_REPLICAS_WANT_MAX,
182                                        "meta_replicas");
183         return 0;
184 }
185
186 static int set_data_replicas(NihOption *option, const char *arg)
187 {
188         data_replicas = strtoul_or_die(arg, CACHE_SET_DATA_REPLICAS_WANT_MAX,
189                                        "data_replicas");
190         return 0;
191 }
192
193 static int set_cache_mode(NihOption *option, const char *arg)
194 {
195         cache_mode = read_string_list_or_die(arg, bdev_cache_mode,
196                                              "cache mode");
197         return 0;
198 }
199
200 static int set_version(NihOption *option, const char *arg)
201 {
202         version = strtoul_or_die(arg, 2, "version");
203         return 0;
204 }
205
206 NihOption opts_format[] = {
207 //      { int shortoption, char *longoption, char *help, NihOptionGroup, char *argname, void *value, NihOptionSetter}
208
209         { 'C',  "cache",                N_("Format a cache device"),
210                 NULL, "dev",    NULL,   set_cache },
211         { 'B',  "bdev",                 N_("Format a backing device"),
212                 NULL, "dev",    NULL,   set_bdev },
213
214         { 'l',  "label",                N_("label"),
215                 NULL, "label",  &label, NULL},
216         { 0,    "cset_uuid",            N_("UUID for the cache set"),
217                 NULL, "uuid",   NULL,   set_cache_set_uuid },
218
219         { 'w',  "block",                N_("block size (hard sector size of SSD, often 2k"),
220                 NULL, "size",   NULL,   set_block_size },
221         { 'b',  "bucket",               N_("bucket size"),
222                 NULL, "size",   NULL,   set_bucket_sizes },
223         { 'n',  "btree_node",           N_("Btree node size, default 256k"),
224                 NULL, "size",   NULL,   set_btree_node_size },
225         { 0,    "fs_size",              N_("Size of filesystem on device" ),
226                 NULL, "size",   NULL,   set_filesystem_size },
227
228         { 'p',  "cache_replacement_policy", NULL,
229                 NULL, "(lru|fifo|random)", NULL, set_replacement_policy },
230
231         { 0,    "metadata_csum_type",   N_("Checksum type"),
232                 NULL, "(none|crc32c|crc64)", &meta_csum_type, set_csum_type },
233
234         { 0,    "data_csum_type",       N_("Checksum type"),
235                 NULL, "(none|crc32c|crc64)", &data_csum_type, set_csum_type },
236
237         { 0,    "compression_type",     N_("Compression type"),
238                 NULL, "(none|gzip)", NULL, set_compression_type },
239
240         { 0,    "error_action",         N_("Action to take on filesystem error"),
241                 NULL, "(continue|readonly|panic)", NULL, set_on_error_action },
242
243         { 0,    "discard",              N_("Enable discards"),
244                 NULL, NULL,             &discard,       NULL },
245
246         { 't',  "tier",                 N_("tier of subsequent devices"),
247                 NULL, "#",      NULL,   set_tier },
248
249         { 0,    "replication_set",      N_("replication set of subsequent devices"),
250                 NULL, "#",      NULL,   set_replication_set },
251
252         { 0,    "meta_replicas",        N_("number of metadata replicas"),
253                 NULL, "#",      NULL,   set_meta_replicas },
254
255         { 0,    "data_replicas",        N_("number of data replicas"),
256                 NULL, "#",      NULL,   set_data_replicas },
257
258         { 0,    "cache_mode",           N_("Cache mode (for backing devices)"),
259                 NULL, "(writethrough|writeback|writearound", NULL, set_cache_mode },
260
261         { 'o',  "data_offset",          N_("data offset in sectors"),
262                 NULL, "offset", &data_offset, NULL},
263
264         { 'v',  "version",              N_("superblock version"),
265                 NULL, "#",      NULL,   set_version},
266
267         NIH_OPTION_LAST
268 };
269
270 static unsigned rounddown_pow_of_two(unsigned n)
271 {
272         unsigned ret;
273
274         do {
275                 ret = n;
276                 n &= n - 1;
277         } while (n);
278
279         return ret;
280 }
281
282 static unsigned ilog2(u64 n)
283 {
284         unsigned ret = 0;
285
286         while (n) {
287                 ret++;
288                 n >>= 1;
289         }
290
291         return ret;
292 }
293
294 void __do_write_sb(int fd, void *sb, size_t bytes)
295 {
296         char zeroes[SB_START] = {0};
297
298         /* Zero start of disk */
299         if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
300                 perror("write error trying to zero start of disk\n");
301                 exit(EXIT_FAILURE);
302         }
303         /* Write superblock */
304         if (pwrite(fd, sb, bytes, SB_START) != bytes) {
305                 perror("write error trying to write superblock\n");
306                 exit(EXIT_FAILURE);
307         }
308
309         fsync(fd);
310         close(fd);
311 }
312
313 #define do_write_sb(_fd, _sb)                   \
314         __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
315
316 void write_backingdev_sb(int fd, unsigned block_size, unsigned mode,
317                          u64 data_offset, const char *label,
318                          uuid_le set_uuid)
319 {
320         char uuid_str[40];
321         struct cache_sb sb;
322
323         memset(&sb, 0, sizeof(struct cache_sb));
324
325         sb.offset       = SB_SECTOR;
326         sb.version      = BCACHE_SB_VERSION_BDEV;
327         sb.magic        = BCACHE_MAGIC;
328         uuid_generate(sb.disk_uuid.b);
329         sb.set_uuid     = set_uuid;
330         sb.block_size   = block_size;
331
332         uuid_unparse(sb.disk_uuid.b, uuid_str);
333         if (label)
334                 memcpy(sb.label, label, SB_LABEL_SIZE);
335
336         SET_BDEV_CACHE_MODE(&sb, mode);
337
338         if (data_offset != BDEV_DATA_START_DEFAULT) {
339                 sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
340                 sb.bdev_data_offset = data_offset;
341         }
342
343         sb.csum = csum_set(&sb, BCH_CSUM_CRC64);
344
345         printf("UUID:                   %s\n"
346                "version:                %u\n"
347                "block_size:             %u\n"
348                "data_offset:            %llu\n",
349                uuid_str, (unsigned) sb.version,
350                sb.block_size, data_offset);
351
352         do_write_sb(fd, &sb);
353 }
354
355 static void format_v0(void)
356 {
357         set_uuid = user_uuid;
358
359         for (struct cache_opts *i = cache_devices;
360              i < cache_devices + nr_cache_devices;
361              i++)
362                 bucket_size = min(bucket_size, i->bucket_size);
363
364         struct cache_sb_v0 *sb = calloc(1, sizeof(*sb));
365
366         sb->offset              = SB_SECTOR;
367         sb->version             = BCACHE_SB_VERSION_CDEV_WITH_UUID;
368         sb->magic               = BCACHE_MAGIC;
369         sb->block_size  = block_size;
370         sb->bucket_size = bucket_size;
371         sb->set_uuid            = set_uuid;
372         sb->nr_in_set           = nr_cache_devices;
373
374         if (label)
375                 memcpy(sb->label, label, sizeof(sb->label));
376
377         for (struct cache_opts *i = cache_devices;
378              i < cache_devices + nr_cache_devices;
379              i++) {
380                 char uuid_str[40], set_uuid_str[40];
381
382                 uuid_generate(sb->uuid.b);
383                 sb->nbuckets            = i->nbuckets;
384                 sb->first_bucket        = i->first_bucket;
385                 sb->nr_this_dev         = i - cache_devices;
386                 sb->csum                = csum_set(sb, BCH_CSUM_CRC64);
387
388                 uuid_unparse(sb->uuid.b, uuid_str);
389                 uuid_unparse(sb->set_uuid.b, set_uuid_str);
390                 printf("UUID:                   %s\n"
391                        "Set UUID:               %s\n"
392                        "version:                %u\n"
393                        "nbuckets:               %llu\n"
394                        "block_size:             %u\n"
395                        "bucket_size:            %u\n"
396                        "nr_in_set:              %u\n"
397                        "nr_this_dev:            %u\n"
398                        "first_bucket:           %u\n",
399                        uuid_str, set_uuid_str,
400                        (unsigned) sb->version,
401                        sb->nbuckets,
402                        sb->block_size,
403                        sb->bucket_size,
404                        sb->nr_in_set,
405                        sb->nr_this_dev,
406                        sb->first_bucket);
407
408                 do_write_sb(i->fd, sb);
409         }
410 }
411
412 static void format_v1(void)
413 {
414         struct cache_sb *sb;
415
416         sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) *
417                     nr_cache_devices);
418
419         sb->offset              = SB_SECTOR;
420         sb->version             = BCACHE_SB_VERSION_CDEV_V3;
421         sb->magic               = BCACHE_MAGIC;
422         sb->block_size  = block_size;
423         sb->set_uuid            = set_uuid;
424         sb->user_uuid           = user_uuid;
425
426         if (label)
427                 memcpy(sb->label, label, sizeof(sb->label));
428
429         /*
430          * don't have a userspace crc32c implementation handy, just always use
431          * crc64
432          */
433         SET_CACHE_SB_CSUM_TYPE(sb,              BCH_CSUM_CRC64);
434         SET_CACHE_META_PREFERRED_CSUM_TYPE(sb,  meta_csum_type);
435         SET_CACHE_DATA_PREFERRED_CSUM_TYPE(sb,  data_csum_type);
436         SET_CACHE_COMPRESSION_TYPE(sb,          compression_type);
437
438         SET_CACHE_BTREE_NODE_SIZE(sb,           btree_node_size);
439         SET_CACHE_SET_META_REPLICAS_WANT(sb,    meta_replicas);
440         SET_CACHE_SET_META_REPLICAS_HAVE(sb,    meta_replicas);
441         SET_CACHE_SET_DATA_REPLICAS_WANT(sb,    data_replicas);
442         SET_CACHE_SET_DATA_REPLICAS_HAVE(sb,    data_replicas);
443         SET_CACHE_ERROR_ACTION(sb,              on_error_action);
444
445         for (struct cache_opts *i = cache_devices;
446              i < cache_devices + nr_cache_devices;
447              i++) {
448                 struct cache_member *m = sb->members + sb->nr_in_set++;
449
450                 uuid_generate(m->uuid.b);
451                 m->nbuckets     = i->nbuckets;
452                 m->first_bucket = i->first_bucket;
453                 m->bucket_size  = i->bucket_size;
454
455                 if (m->nbuckets < 1 << 7)
456                         die("Not enough buckets: %llu, need %u",
457                             m->nbuckets, 1 << 7);
458
459                 SET_CACHE_TIER(m,               i->tier);
460                 SET_CACHE_REPLICATION_SET(m,    i->replication_set);
461                 SET_CACHE_REPLACEMENT(m,        i->replacement_policy);
462                 SET_CACHE_DISCARD(m,            discard);
463         }
464
465         sb->u64s = bch_journal_buckets_offset(sb);
466
467         for (unsigned i = 0; i < sb->nr_in_set; i++) {
468                 char uuid_str[40], set_uuid_str[40];
469                 struct cache_member *m = sb->members + i;
470
471                 sb->disk_uuid           = m->uuid;
472                 sb->nr_this_dev = i;
473                 sb->csum                = csum_set(sb,
474                                                 CACHE_SB_CSUM_TYPE(sb));
475
476                 uuid_unparse(sb->disk_uuid.b, uuid_str);
477                 uuid_unparse(sb->user_uuid.b, set_uuid_str);
478                 printf("UUID:                   %s\n"
479                        "Set UUID:               %s\n"
480                        "version:                %u\n"
481                        "nbuckets:               %llu\n"
482                        "block_size:             %u\n"
483                        "bucket_size:            %u\n"
484                        "nr_in_set:              %u\n"
485                        "nr_this_dev:            %u\n"
486                        "first_bucket:           %u\n",
487                        uuid_str, set_uuid_str,
488                        (unsigned) sb->version,
489                        m->nbuckets,
490                        sb->block_size,
491                        m->bucket_size,
492                        sb->nr_in_set,
493                        sb->nr_this_dev,
494                        m->first_bucket);
495
496                 do_write_sb(cache_devices[i].fd, sb);
497         }
498 }
499
500 int cmd_format(NihCommand *command, char *const *args)
501 {
502         if (!nr_cache_devices && !nr_backing_devices)
503                 die("Please supply a device");
504
505         if (uuid_is_null(user_uuid.b))
506                 uuid_generate(user_uuid.b);
507
508         uuid_generate(set_uuid.b);
509
510         if (!block_size) {
511                 for (struct cache_opts *i = cache_devices;
512                      i < cache_devices + nr_cache_devices;
513                      i++)
514                         block_size = max(block_size, get_blocksize(i->dev, i->fd));
515
516                 for (struct backingdev_opts *i = backing_devices;
517                      i < backing_devices + nr_backing_devices;
518                      i++)
519                         block_size = max(block_size, get_blocksize(i->dev, i->fd));
520         }
521
522         for (struct cache_opts *i = cache_devices;
523              i < cache_devices + nr_cache_devices;
524              i++) {
525                 if (!i->bucket_size) {
526                         u64 size = (i->filesystem_size ?:
527                                     getblocks(i->fd)) << 9;
528
529                         if (size < 1 << 20) /* 1M device - 256 4k buckets*/
530                                 i->bucket_size = rounddown_pow_of_two(size >> 17);
531                         else
532                                 /* Max 1M bucket at around 256G */
533                                 i->bucket_size = 8 << min((ilog2(size >> 20) / 2), 9U);
534                 }
535
536                 if (i->bucket_size < block_size)
537                         die("Bucket size cannot be smaller than block size");
538
539                 i->nbuckets     = (i->filesystem_size ?:
540                                    getblocks(i->fd)) / i->bucket_size;
541                 i->first_bucket = (23 / i->bucket_size) + 3;
542
543                 if (i->nbuckets < 1 << 7)
544                         die("Not enough buckets: %llu, need %u",
545                             i->nbuckets, 1 << 7);
546         }
547
548         if (!btree_node_size) {
549                 /* 256k default btree node size */
550                 btree_node_size = 512;
551
552                 for (struct cache_opts *i = cache_devices;
553                      i < cache_devices + nr_cache_devices;
554                      i++)
555                         btree_node_size = min(btree_node_size, i->bucket_size);
556         }
557
558         switch (version) {
559         case 0:
560                 format_v0();
561                 break;
562         case 1:
563                 format_v1();
564                 break;
565         }
566
567         for (struct backingdev_opts *i = backing_devices;
568              i < backing_devices + nr_backing_devices;
569              i++)
570                 write_backingdev_sb(i->fd, block_size, cache_mode,
571                                     data_offset, i->label,
572                                     set_uuid);
573
574         return 0;
575 }