2 * Author: Kent Overstreet <kmo@daterainc.com>
7 #define _FILE_OFFSET_BITS 64
8 #define __USE_FILE_OFFSET64
9 #define _XOPEN_SOURCE 600
23 #include <sys/ioctl.h>
24 #include <sys/types.h>
27 #include <uuid/uuid.h>
31 #define max(x, y) ({ \
32 typeof(x) _max1 = (x); \
33 typeof(y) _max2 = (y); \
34 (void) (&_max1 == &_max2); \
35 _max1 > _max2 ? _max1 : _max2; })
37 uint64_t getblocks(int fd)
41 if (fstat(fd, &statbuf)) {
42 perror("stat error\n");
45 ret = statbuf.st_size / 512;
46 if (S_ISBLK(statbuf.st_mode))
47 if (ioctl(fd, BLKGETSIZE, &ret)) {
48 perror("ioctl error");
54 uint64_t hatoi(const char *s)
57 long long i = strtoll(s, &e, 10);
75 unsigned hatoi_validate(const char *s, const char *msg)
77 uint64_t v = hatoi(s);
80 fprintf(stderr, "%s must be a power of two\n", msg);
87 fprintf(stderr, "%s too large\n", msg);
92 fprintf(stderr, "%s too small\n", msg);
99 char *skip_spaces(const char *str)
101 while (isspace(*str))
117 while (end >= s && isspace(*end))
124 ssize_t read_string_list(const char *buf, const char * const list[])
127 char *s, *d = strdup(buf);
133 for (i = 0; list[i]; i++)
134 if (!strcmp(list[i], s))
148 "Usage: make-bcache [options] device\n"
149 " -C, --cache Format a cache device\n"
150 " -B, --bdev Format a backing device\n"
151 " -b, --bucket bucket size\n"
152 " -w, --block block size (hard sector size of SSD, often 2k)\n"
153 " -o, --data-offset data offset in sectors\n"
154 " --cset-uuid UUID for the cache set\n"
156 " --writeback enable writeback\n"
157 " --discard enable discards\n"
158 " --wipe-bcache destroy existing bcache data if present\n"
159 " --tier set tier of subsequent cache devices\n"
160 " --cache_replacement_policy=(lru|fifo|random)\n"
161 " -l, --label label\n"
162 " -h, --help display this help and exit\n");
166 const char * const cache_replacement_policies[] = {
173 static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
174 bool writeback, bool discard, bool wipe_bcache,
175 unsigned cache_replacement_policy, uint64_t data_offset,
176 uuid_t set_uuid, unsigned tier, bool bdev,
177 uint16_t nr_in_set, uint16_t nr_this_dev,
181 char uuid_str[40], set_uuid_str[40], zeroes[SB_START] = {0};
185 if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) {
186 fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno));
190 if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb))
193 if (!memcmp(sb.magic, bcache_magic, 16) && !wipe_bcache) {
194 fprintf(stderr, "Already a bcache device on %s, "
195 "overwrite with --wipe-bcache\n", dev);
199 if (!(pr = blkid_new_probe()))
201 if (blkid_probe_set_device(pr, fd, 0, 0))
203 /* enable ptable probing; superblock probing is enabled by default */
204 if (blkid_probe_enable_partitions(pr, true))
206 if (!blkid_do_probe(pr)) {
207 /* XXX wipefs doesn't know how to remove partition tables */
208 fprintf(stderr, "Device %s already has a non-bcache superblock, "
209 "remove it using wipefs and wipefs -a\n", dev);
213 memset(&sb, 0, sizeof(struct cache_sb));
215 sb.offset = SB_SECTOR;
217 ? BCACHE_SB_VERSION_BDEV
218 : BCACHE_SB_VERSION_CDEV;
220 memcpy(sb.magic, bcache_magic, 16);
221 uuid_generate(sb.uuid);
222 memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid));
224 sb.bucket_size = bucket_size;
225 sb.block_size = block_size;
227 uuid_unparse(sb.uuid, uuid_str);
228 uuid_unparse(sb.set_uuid, set_uuid_str);
230 memcpy(sb.label, label, SB_LABEL_SIZE);
233 if (SB_IS_BDEV(&sb)) {
235 &sb, writeback ? CACHE_MODE_WRITEBACK : CACHE_MODE_WRITETHROUGH);
237 if (data_offset != BDEV_DATA_START_DEFAULT) {
238 sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
239 sb.data_offset = data_offset;
246 "data_offset: %ju\n",
247 uuid_str, set_uuid_str,
248 (unsigned) sb.version,
252 sb.nbuckets = getblocks(fd) / sb.bucket_size;
253 sb.nr_in_set = nr_in_set;
254 sb.nr_this_dev = nr_this_dev;
255 sb.first_bucket = (23 / sb.bucket_size) + 1;
257 if (sb.nbuckets < 1 << 7) {
258 fprintf(stderr, "Not enough buckets: %ju, need %u\n",
259 sb.nbuckets, 1 << 7);
263 SET_CACHE_DISCARD(&sb, discard);
264 SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy);
265 SET_CACHE_TIER(&sb, tier);
275 "first_bucket: %u\n",
276 uuid_str, set_uuid_str,
277 (unsigned) sb.version,
286 sb.csum = csum_set(&sb);
288 /* Zero start of disk */
289 if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
290 perror("write error\n");
293 /* Write superblock */
294 if (pwrite(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) {
295 perror("write error\n");
303 static unsigned get_blocksize(const char *path)
307 if (stat(path, &statbuf)) {
308 fprintf(stderr, "Error statting %s: %s\n",
309 path, strerror(errno));
313 if (S_ISBLK(statbuf.st_mode)) {
315 * BLKALIGNOFF: alignment_offset
316 * BLKPBSZGET: physical_block_size
317 * BLKSSZGET: logical_block_size
318 * BLKIOMIN: minimum_io_size
319 * BLKIOOPT: optimal_io_size
321 * It may be tempting to use physical_block_size,
322 * or even minimum_io_size.
323 * But to be as transparent as possible,
324 * we want to use logical_block_size.
326 unsigned int logical_block_size;
327 int fd = open(path, O_RDONLY);
330 fprintf(stderr, "open(%s) failed: %m\n", path);
333 if (ioctl(fd, BLKSSZGET, &logical_block_size)) {
334 fprintf(stderr, "ioctl(%s, BLKSSZGET) failed: %m\n", path);
338 return logical_block_size / 512;
341 /* else: not a block device.
342 * Why would we even want to write a bcache super block there? */
344 return statbuf.st_blksize / 512;
347 int main(int argc, char **argv)
350 unsigned i, ncache_devices = 0, nbacking_devices = 0;
351 unsigned long tier = 0;
352 unsigned cache_device_tier[argc];
353 char *cache_devices[argc];
354 char *backing_devices[argc];
356 unsigned block_size = 0, bucket_size = 1024;
357 int writeback = 0, discard = 0, wipe_bcache = 0;
358 unsigned cache_replacement_policy = 0;
359 uint64_t data_offset = BDEV_DATA_START_DEFAULT;
363 uuid_generate(set_uuid);
365 struct option opts[] = {
366 { "cache", 0, NULL, 'C' },
367 { "bdev", 0, NULL, 'B' },
368 { "bucket", 1, NULL, 'b' },
369 { "block", 1, NULL, 'w' },
370 { "writeback", 0, &writeback, 1 },
371 { "wipe-bcache", 0, &wipe_bcache, 1 },
372 { "discard", 0, &discard, 1 },
373 { "cache_replacement_policy", 1, NULL, 'p' },
374 { "data_offset", 1, NULL, 'o' },
375 { "cset-uuid", 1, NULL, 'u' },
376 { "tier", 1, NULL, 't' },
377 { "label", 1, NULL, 'l' },
378 { "help", 0, NULL, 'h' },
379 { NULL, 0, NULL, 0 },
382 while ((c = getopt_long(argc, argv,
393 bucket_size = hatoi_validate(optarg, "bucket size");
396 block_size = hatoi_validate(optarg, "block size");
400 if (uuid_parse(optarg, sb.uuid)) {
401 fprintf(stderr, "Bad uuid\n");
407 cache_replacement_policy = read_string_list(optarg,
408 cache_replacement_policies);
411 data_offset = atoll(optarg);
412 if (data_offset < BDEV_DATA_START_DEFAULT) {
413 fprintf(stderr, "Bad data offset; minimum %d sectors\n",
414 BDEV_DATA_START_DEFAULT);
419 if (uuid_parse(optarg, set_uuid)) {
420 fprintf(stderr, "Bad uuid\n");
428 tier = strtoul(optarg, NULL, 10);
429 if (tier >= CACHE_TIERS) {
430 fprintf(stderr, "Invalid tier %lu\n", tier);
439 fprintf(stderr, "Please specify -C or -B\n");
444 backing_devices[nbacking_devices++] = optarg;
446 cache_device_tier[ncache_devices] = tier;
447 cache_devices[ncache_devices++] = optarg;
453 if (!ncache_devices && !nbacking_devices) {
454 fprintf(stderr, "Please supply a device\n");
458 if (bucket_size < block_size) {
459 fprintf(stderr, "Bucket size cannot be smaller than block size\n");
464 for (i = 0; i < ncache_devices; i++)
465 block_size = max(block_size,
466 get_blocksize(cache_devices[i]));
468 for (i = 0; i < nbacking_devices; i++)
469 block_size = max(block_size,
470 get_blocksize(backing_devices[i]));
473 for (i = 0; i < ncache_devices; i++)
474 write_sb(cache_devices[i], block_size, bucket_size,
475 writeback, discard, wipe_bcache,
476 cache_replacement_policy, data_offset,
477 set_uuid, cache_device_tier[i], false,
478 ncache_devices, i, label);
480 for (i = 0; i < nbacking_devices; i++)
481 write_sb(backing_devices[i], block_size, bucket_size,
482 writeback, discard, wipe_bcache,
483 cache_replacement_policy, data_offset,
484 set_uuid, 0, true, nbacking_devices, i, label);