X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=bcache.c;h=a69a46fb0ad6dacd37fd141d6568b311aaf0d2c5;hb=ff6acc10b405cb06878d15b584cfa82a78ff95f0;hp=e591e91c37ed764b01d7f008dc9e8b2254978bf9;hpb=a19eab52397a1d2637a928c51920708f3dcb824b;p=bcachefs-tools-debian diff --git a/bcache.c b/bcache.c index e591e91..a69a46f 100644 --- a/bcache.c +++ b/bcache.c @@ -1,9 +1,227 @@ #define _GNU_SOURCE +#include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include + +#include "bcache.h" + +#define __KERNEL__ +#include +#undef __KERNEL__ + +const char * const cache_state[] = { + "active", + "ro", + "failed", + "spare", + NULL +}; + +const char * const replacement_policies[] = { + "lru", + "fifo", + "random", + NULL +}; + +const char * const csum_types[] = { + "none", + "crc32c", + "crc64", + NULL +}; + +const char * const bdev_cache_mode[] = { + "writethrough", + "writeback", + "writearound", + "none", + NULL +}; + +const char * const bdev_state[] = { + "detached", + "clean", + "dirty", + "inconsistent", + NULL +}; + +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +char *strim(char *s) +{ + size_t size; + char *end; + + s = skip_spaces(s); + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return s; +} + +ssize_t read_string_list(const char *buf, const char * const list[]) +{ + size_t i; + char *s, *d = strdup(buf); + if (!d) + return -ENOMEM; + + s = strim(d); + + for (i = 0; list[i]; i++) + if (!strcmp(list[i], s)) + break; + + free(d); + + if (!list[i]) + return -EINVAL; + + return i; +} + +ssize_t read_string_list_or_die(const char *opt, const char * const list[], + const char *msg) +{ + ssize_t v = read_string_list(opt, list); + if (v < 0) { + fprintf(stderr, "Bad %s %s\n", msg, opt); + exit(EXIT_FAILURE); + + } + + return v; +} + +void print_string_list(const char * const list[], size_t selected) +{ + size_t i; + + for (i = 0; list[i]; i++) { + if (i) + putchar(' '); + printf(i == selected ? "[%s] ": "%s", list[i]); + } +} + +/* + * This is the CRC-32C table + * Generated with: + * width = 32 bits + * poly = 0x1EDC6F41 + * reflect input bytes = true + * reflect output bytes = true + */ + +static const u32 crc32c_table[256] = { + 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, + 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, + 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, + 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, + 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, + 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, + 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, + 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, + 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, + 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, + 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, + 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, + 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, + 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, + 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, + 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, + 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, + 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, + 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, + 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, + 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, + 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, + 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, + 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, + 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, + 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, + 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, + 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, + 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, + 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, + 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, + 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, + 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, + 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, + 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, + 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, + 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, + 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, + 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, + 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, + 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, + 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, + 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, + 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, + 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, + 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, + 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, + 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, + 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, + 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, + 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, + 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, + 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, + 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, + 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, + 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, + 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, + 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, + 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, + 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, + 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, + 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, + 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, + 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L +}; + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +static u32 crc32c(u32 crc, unsigned char const *data, size_t length) +{ + while (length--) + crc = + crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); + return crc; +} /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any @@ -115,9 +333,8 @@ static const uint64_t crc_table[256] = { 0x9AFCE626CE85B507ULL }; -inline uint64_t crc64(const void *_data, size_t len) +static uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len) { - uint64_t crc = 0xffffffffffffffff; const unsigned char *data = _data; while (len--) { @@ -125,5 +342,1096 @@ inline uint64_t crc64(const void *_data, size_t len) crc = crc_table[i] ^ (crc << 8); } - return crc ^ 0xffffffffffffffff; + return crc; +} + +static uint64_t bch_checksum_update(unsigned type, uint64_t crc, const void *data, size_t len) +{ + switch (type) { + case BCH_CSUM_NONE: + return 0; + case BCH_CSUM_CRC32C: + return crc32c(crc, data, len); + case BCH_CSUM_CRC64: + return bch_crc64_update(crc, data, len); + default: + fprintf(stderr, "Unknown checksum type %u\n", type); + exit(EXIT_FAILURE); + } +} + +uint64_t bch_checksum(unsigned type, const void *data, size_t len) +{ + uint64_t crc = 0xffffffffffffffffULL; + + crc = bch_checksum_update(type, crc, data, len); + + return crc ^ 0xffffffffffffffffULL; +} + +uint64_t getblocks(int fd) +{ + uint64_t ret; + struct stat statbuf; + if (fstat(fd, &statbuf)) { + perror("getblocks: stat error\n"); + exit(EXIT_FAILURE); + } + ret = statbuf.st_size / 512; + if (S_ISBLK(statbuf.st_mode)) + if (ioctl(fd, BLKGETSIZE, &ret)) { + perror("ioctl error getting blksize"); + exit(EXIT_FAILURE); + } + return ret; +} + +uint64_t hatoi(const char *s) +{ + char *e; + long long i = strtoll(s, &e, 10); + switch (*e) { + case 't': + case 'T': + i *= 1024; + case 'g': + case 'G': + i *= 1024; + case 'm': + case 'M': + i *= 1024; + case 'k': + case 'K': + i *= 1024; + } + return i; +} + +unsigned hatoi_validate(const char *s, const char *msg) +{ + uint64_t v = hatoi(s); + + if (v & (v - 1)) { + fprintf(stderr, "%s must be a power of two\n", msg); + exit(EXIT_FAILURE); + } + + v /= 512; + + if (v > USHRT_MAX) { + fprintf(stderr, "%s too large\n", msg); + exit(EXIT_FAILURE); + } + + if (!v) { + fprintf(stderr, "%s too small\n", msg); + exit(EXIT_FAILURE); + } + + return v; +} + +static void do_write_sb(int fd, struct cache_sb *sb) +{ + char zeroes[SB_START] = {0}; + size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb; + + /* Zero start of disk */ + if (pwrite(fd, zeroes, SB_START, 0) != SB_START) { + perror("write error trying to zero start of disk\n"); + exit(EXIT_FAILURE); + } + /* Write superblock */ + if (pwrite(fd, sb, bytes, SB_START) != bytes) { + perror("write error trying to write superblock\n"); + exit(EXIT_FAILURE); + } + + fsync(fd); + close(fd); +} + +void write_backingdev_sb(int fd, unsigned block_size, unsigned *bucket_sizes, + bool writeback, uint64_t data_offset, + const char *label, + uuid_le set_uuid) +{ + char uuid_str[40], set_uuid_str[40]; + struct cache_sb sb; + + memset(&sb, 0, sizeof(struct cache_sb)); + + sb.offset = SB_SECTOR; + sb.version = BCACHE_SB_VERSION_BDEV; + sb.magic = BCACHE_MAGIC; + uuid_generate(sb.uuid.b); + sb.set_uuid = set_uuid; + sb.bucket_size = bucket_sizes[0]; + sb.block_size = block_size; + + uuid_unparse(sb.uuid.b, uuid_str); + uuid_unparse(sb.set_uuid.b, set_uuid_str); + if (label) + memcpy(sb.label, label, SB_LABEL_SIZE); + + SET_BDEV_CACHE_MODE(&sb, writeback + ? CACHE_MODE_WRITEBACK + : CACHE_MODE_WRITETHROUGH); + + if (data_offset != BDEV_DATA_START_DEFAULT) { + sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; + sb.data_offset = data_offset; + } + + sb.csum = csum_set(&sb, BCH_CSUM_CRC64); + + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "block_size: %u\n" + "data_offset: %ju\n", + uuid_str, set_uuid_str, + (unsigned) sb.version, + sb.block_size, + data_offset); + + do_write_sb(fd, &sb); +} + +int dev_open(const char *dev, bool wipe_bcache) +{ + struct cache_sb sb; + blkid_probe pr; + int fd; + char err[MAX_PATH]; + + if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { + sprintf(err, "Can't open dev %s: %s\n", dev, strerror(errno)); + goto err; + } + + if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { + sprintf(err, "Failed to read superblock"); + goto err; + } + + if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) { + sprintf(err, "Already a bcache device on %s, " + "overwrite with --wipe-bcache\n", dev); + goto err; + } + + if (!(pr = blkid_new_probe())) { + sprintf(err, "Failed to create a new probe"); + goto err; + } + if (blkid_probe_set_device(pr, fd, 0, 0)) { + sprintf(err, "failed to set probe to device"); + goto err; + } + /* enable ptable probing; superblock probing is enabled by default */ + if (blkid_probe_enable_partitions(pr, true)) { + sprintf(err, "Failed to enable partitions on probe"); + goto err; + } + if (!blkid_do_probe(pr)) { + /* XXX wipefs doesn't know how to remove partition tables */ + sprintf(err, "Device %s already has a non-bcache superblock, " + "remove it using wipefs and wipefs -a\n", dev); + goto err; + } + + return fd; + + err: + fprintf(stderr, "dev_open failed with: %s", err); + exit(EXIT_FAILURE); +} + +static unsigned min_bucket_size(int num_bucket_sizes, unsigned *bucket_sizes) +{ + int i; + unsigned min = bucket_sizes[0]; + + for (i = 0; i < num_bucket_sizes; i++) + min = bucket_sizes[i] < min ? bucket_sizes[i] : min; + + return min; +} + +static unsigned node_size(unsigned bucket_size) { + + if (bucket_size <= 256) + return bucket_size; + else if (bucket_size <= 512) + return bucket_size / 2; + else + return bucket_size / 4; +} + +void write_cache_sbs(int *fds, struct cache_sb *sb, + unsigned block_size, unsigned *bucket_sizes, + int num_bucket_sizes) +{ + char uuid_str[40], set_uuid_str[40]; + size_t i; + unsigned min_size = min_bucket_size(num_bucket_sizes, bucket_sizes); + + sb->offset = SB_SECTOR; + sb->version = BCACHE_SB_VERSION_CDEV_V3; + sb->magic = BCACHE_MAGIC; + sb->block_size = block_size; + sb->keys = bch_journal_buckets_offset(sb); + + /* + * don't have a userspace crc32c implementation handy, just always use + * crc64 + */ + SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); + + for (i = 0; i < sb->nr_in_set; i++) { + struct cache_member *m = sb->members + i; + + if (num_bucket_sizes <= 1) + sb->bucket_size = bucket_sizes[0]; + else + sb->bucket_size = bucket_sizes[i]; + SET_CACHE_BTREE_NODE_SIZE(sb, node_size(min_size)); + + sb->uuid = m->uuid; + sb->nbuckets = getblocks(fds[i]) / sb->bucket_size; + sb->nr_this_dev = i; + sb->first_bucket = (23 / sb->bucket_size) + 1; + + if (sb->nbuckets < 1 << 7) { + fprintf(stderr, "Not enough buckets: %llu, need %u\n", + sb->nbuckets, 1 << 7); + exit(EXIT_FAILURE); + } + + sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb)); + + uuid_unparse(sb->uuid.b, uuid_str); + uuid_unparse(sb->set_uuid.b, set_uuid_str); + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "nbuckets: %llu\n" + "block_size: %u\n" + "bucket_size: %u\n" + "nr_in_set: %u\n" + "nr_this_dev: %u\n" + "first_bucket: %u\n", + uuid_str, set_uuid_str, + (unsigned) sb->version, + sb->nbuckets, + sb->block_size, + sb->bucket_size, + sb->nr_in_set, + sb->nr_this_dev, + sb->first_bucket); + + do_write_sb(fds[i], sb); + } +} + +void next_cache_device(struct cache_sb *sb, + unsigned replication_set, + int tier, + unsigned replacement_policy, + bool discard) +{ + struct cache_member *m = sb->members + sb->nr_in_set; + + SET_CACHE_REPLICATION_SET(m, replication_set); + SET_CACHE_TIER(m, tier); + SET_CACHE_REPLACEMENT(m, replacement_policy); + SET_CACHE_DISCARD(m, discard); + uuid_generate(m->uuid.b); + + sb->nr_in_set++; +} + +unsigned get_blocksize(const char *path) +{ + struct stat statbuf; + + if (stat(path, &statbuf)) { + fprintf(stderr, "Error statting %s: %s\n", + path, strerror(errno)); + exit(EXIT_FAILURE); + } + + if (S_ISBLK(statbuf.st_mode)) { + /* check IO limits: + * BLKALIGNOFF: alignment_offset + * BLKPBSZGET: physical_block_size + * BLKSSZGET: logical_block_size + * BLKIOMIN: minimum_io_size + * BLKIOOPT: optimal_io_size + * + * It may be tempting to use physical_block_size, + * or even minimum_io_size. + * But to be as transparent as possible, + * we want to use logical_block_size. + */ + unsigned int logical_block_size; + int fd = open(path, O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "open(%s) failed: %m\n", path); + exit(EXIT_FAILURE); + } + if (ioctl(fd, BLKSSZGET, &logical_block_size)) { + fprintf(stderr, "ioctl(%s, BLKSSZGET) failed: %m\n", path); + exit(EXIT_FAILURE); + } + close(fd); + return logical_block_size / 512; + + } + /* else: not a block device. + * Why would we even want to write a bcache super block there? */ + + return statbuf.st_blksize / 512; +} + +long strtoul_or_die(const char *p, size_t max, const char *msg) +{ + errno = 0; + long v = strtol(p, NULL, 10); + if (errno || v < 0 || v >= max) { + fprintf(stderr, "Invalid %s %zi\n", msg, v); + exit(EXIT_FAILURE); + } + + return v; +} + +static void print_encode(char *in) +{ + char *pos; + for (pos = in; *pos; pos++) + if (isalnum(*pos) || strchr(".-_", *pos)) + putchar(*pos); + else + printf("%%%x", *pos); +} + +static void show_uuid_only(struct cache_sb *sb, char *dev_uuid) { + uuid_unparse(sb->uuid.b, dev_uuid); +} + +static void show_super_common(struct cache_sb *sb, bool force_csum) +{ + char uuid[40]; + char label[SB_LABEL_SIZE + 1]; + uint64_t expected_csum; + + printf("sb.magic\t\t"); + if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) { + printf("ok\n"); + } else { + printf("bad magic\n"); + fprintf(stderr, "Invalid superblock (bad magic)\n"); + exit(2); + } + + printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset); + if (sb->offset == SB_SECTOR) { + printf(" [match]\n"); + } else { + printf(" [expected %ds]\n", SB_SECTOR); + fprintf(stderr, "Invalid superblock (bad sector)\n"); + exit(2); + } + + printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum); + expected_csum = csum_set(sb, + sb->version < BCACHE_SB_VERSION_CDEV_V3 + ? BCH_CSUM_CRC64 + : CACHE_SB_CSUM_TYPE(sb)); + if (sb->csum == expected_csum) { + printf(" [match]\n"); + } else { + printf(" [expected %" PRIX64 "]\n", expected_csum); + if (force_csum) { + fprintf(stderr, "Corrupt superblock (bad csum)\n"); + exit(2); + } + } + + printf("sb.version\t\t%ju", (uint64_t) sb->version); + switch (sb->version) { + // These are handled the same by the kernel + case BCACHE_SB_VERSION_CDEV: + case BCACHE_SB_VERSION_CDEV_WITH_UUID: + printf(" [cache device]\n"); + break; + + // The second adds data offset support + case BCACHE_SB_VERSION_BDEV: + case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: + printf(" [backing device]\n"); + break; + + default: + printf(" [unknown]\n"); + // exit code? + exit(EXIT_SUCCESS); + } + + putchar('\n'); + + strncpy(label, (char *) sb->label, SB_LABEL_SIZE); + label[SB_LABEL_SIZE] = '\0'; + printf("dev.label\t\t"); + if (*label) + print_encode(label); + else + printf("(empty)"); + putchar('\n'); + + uuid_unparse(sb->uuid.b, uuid); + printf("dev.uuid\t\t%s\n", uuid); + + uuid_unparse(sb->set_uuid.b, uuid); + printf("cset.uuid\t\t%s\n", uuid); +} + +void show_super_backingdev(struct cache_sb *sb, bool force_csum) +{ + uint64_t first_sector; + + show_super_common(sb, force_csum); + + if (sb->version == BCACHE_SB_VERSION_BDEV) { + first_sector = BDEV_DATA_START_DEFAULT; + } else { + if (sb->keys == 1 || sb->d[0]) { + fprintf(stderr, + "Possible experimental format detected, bailing\n"); + exit(3); + } + first_sector = sb->data_offset; + } + + printf("dev.data.first_sector\t%ju\n" + "dev.data.cache_mode\t%s" + "dev.data.cache_state\t%s\n", + first_sector, + bdev_cache_mode[BDEV_CACHE_MODE(sb)], + bdev_state[BDEV_STATE(sb)]); +} + +static void show_cache_member(struct cache_sb *sb, unsigned i) +{ + struct cache_member *m = ((struct cache_member *) sb->d) + i; + + printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]); + + printf("cache.tier\t%llu\n", CACHE_TIER(m)); + + printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m)); + printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m)); + printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m)); + + printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m)); + printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m)); + + printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]); + printf("cache.discard\t%llu\n", CACHE_DISCARD(m)); +} + +void show_super_cache(struct cache_sb *sb, bool force_csum) +{ + show_super_common(sb, force_csum); + + printf("dev.sectors_per_block\t%u\n" + "dev.sectors_per_bucket\t%u\n", + sb->block_size, + sb->bucket_size); + + // total_sectors includes the superblock; + printf("dev.cache.first_sector\t%u\n" + "dev.cache.cache_sectors\t%llu\n" + "dev.cache.total_sectors\t%llu\n" + "dev.cache.ordered\t%s\n" + "dev.cache.pos\t\t%u\n" + "dev.cache.setsize\t\t%u\n", + sb->bucket_size * sb->first_bucket, + sb->bucket_size * (sb->nbuckets - sb->first_bucket), + sb->bucket_size * sb->nbuckets, + CACHE_SYNC(sb) ? "yes" : "no", + sb->nr_this_dev, + sb->nr_in_set); + + show_cache_member(sb, sb->nr_this_dev); +} + +static int __sysfs_attr_type(char *attr, const char **attr_arr) { + int i, j; + for(i = 0; attr_arr[i] != NULL; i++) + if(!strcmp(attr, attr_arr[i])) + return 1; + return 0; +} + +enum sysfs_attr sysfs_attr_type(char *attr) { + int ret; + if(__sysfs_attr_type(attr, set_attrs)) + return SET_ATTR; + if(__sysfs_attr_type(attr, cache_attrs)) + return CACHE_ATTR; + if(__sysfs_attr_type(attr, internal_attrs)) + return INTERNAL_ATTR; + + printf("No attribute called %s, try --list to see options\n", attr); + + return -1; +} + +static void __sysfs_attr_list(const char **attr_arr) { + int i, j; + for (i = 0; attr_arr[i] != NULL; i++) + printf("%s\n", attr_arr[i]); +} + +void sysfs_attr_list() { + __sysfs_attr_list(set_attrs); + __sysfs_attr_list(cache_attrs); + __sysfs_attr_list(internal_attrs); +} + +struct cache_sb *query_dev(char *dev, bool force_csum, + bool print_sb, bool uuid_only, char *dev_uuid) +{ + size_t bytes = 4096; + struct cache_sb *sb = aligned_alloc(bytes, bytes); + + int fd = open(dev, O_RDONLY|O_DIRECT); + if (fd < 0) { + printf("Can't open dev %s: %s\n", dev, strerror(errno)); + return NULL; + } + + while (true) { + int ret = pread(fd, sb, bytes, SB_START); + if (ret < 0) { + fprintf(stderr, "Couldn't read superblock: %s\n", + strerror(errno)); + close(fd); + free(sb); + return NULL; + } else if (bytes > sizeof(sb) + sb->keys * sizeof(u64)) { + /* We read the whole superblock */ + break; + } + + /* + * otherwise double the size of our dest + * and read again + */ + free(sb); + bytes *= 2; + sb = aligned_alloc(4096, bytes); + } + + close(fd); + + if(uuid_only) { + show_uuid_only(sb, dev_uuid); + return sb; + } + + if(print_sb) { + if (!SB_IS_BDEV(sb)) + show_super_cache(sb, force_csum); + else + show_super_backingdev(sb, force_csum); + } + + return sb; +} + +static char *dev_name(const char *ugly_path) { + char buf[32]; + int i, end = strlen(ugly_path); + + //Chop off "/bcache", then look for the next '/' from the end + for (i = end - 8; ; i--) + if(ugly_path[i] == '/') + break; + + strcpy(buf, ugly_path + i); + buf[end - i - 7] = 0; + + // Is the dev guaranteed to be in /dev? + // This is needed for finding the superblock with a query-dev + return strdup(buf); +} + +static void list_cacheset_devs(char *cset_dir, char *cset_name, bool parse_dev_name) { + int i = 0; + DIR *cachedir, *dir; + struct stat cache_stat; + char entry[MAX_PATH]; + struct dirent *ent; + snprintf(entry, MAX_PATH, "%s/%s", cset_dir, cset_name); + + if((dir = opendir(entry)) != NULL) { + while((ent = readdir(dir)) != NULL) { + char buf[MAX_PATH]; + int len; + char *tmp; + + /* + * We are looking for all cache# directories + * do a strlen < 9 to skip over other entries + * that also start with "cache" + */ + if(strncmp(ent->d_name, "cache", 5) || + !(strlen(ent->d_name) < 9)) + continue; + + snprintf(entry, MAX_PATH, "%s/%s/%s", + cset_dir, + cset_name, + ent->d_name); + + if((cachedir = opendir(entry)) == NULL) + continue; + + if(stat(entry, &cache_stat)) + continue; + + if((len = readlink(entry, buf, sizeof(buf) - 1)) != + -1) { + buf[len] = '\0'; + if(parse_dev_name) { + tmp = dev_name(buf); + printf("/dev%s\n", tmp); + free(tmp); + } else { + printf("\t%s\n", buf); + } + } + } + } +} + +char *find_matching_uuid(char *stats_dir, char *subdir, const char *stats_dev_uuid) { + /* Do a query-dev --uuid only to get the uuid + * repeat on each dev until we find a matching one + * append that cache# to subdir and return + */ + + int i = 0; + DIR *cachedir; + struct stat cache_stat; + char intbuf[4]; + char entry[MAX_PATH]; + char *err = NULL; + + snprintf(entry, MAX_PATH, "%s%s", stats_dir, subdir); + snprintf(intbuf, 4, "%d", i); + strcat(entry, intbuf); + + while(true) { + char buf[MAX_PATH]; + int len; + + if((cachedir = opendir(entry)) == NULL) + break; + + if(stat(entry, &cache_stat)) + break; + + if((len = readlink(entry, buf, sizeof(buf) - 1)) != -1) { + char dev_uuid[40]; + buf[len] = '\0'; + int i, end = strlen(buf); + char tmp[32], devname[32]; + struct cache_sb *sb; + + /* Chop off "/bcache", then look for the + * next '/' from the end + */ + for (i = end - 8; ; i--) + if(buf[i] == '/') + break; + + strcpy(tmp, buf + i); + tmp[end - i - 7] = 0; + strcpy(devname, "/dev"); + strcat(devname, tmp); + + err = "Unable to open superblock"; + sb = query_dev(devname, false, false, true, dev_uuid); + if(!sb) + return err; + else + free(sb); + + if(!strcmp(stats_dev_uuid, dev_uuid)) { + strcat(subdir, intbuf); + return NULL; + } + } + + /* remove i from end and append i++ */ + entry[strlen(entry)-strlen(intbuf)] = 0; + i++; + snprintf(intbuf, 4, "%d", i); + strcat(entry, intbuf); + } + + + err = "dev uuid doesn't exist in cache_set"; + return err; +} + +char *list_cachesets(char *cset_dir, bool list_devs) +{ + struct dirent *ent; + DIR *dir; + char *err = NULL; + + dir = opendir(cset_dir); + if (!dir) { + err = "Failed to open cacheset dir"; + goto err; + } + + while ((ent = readdir(dir)) != NULL) { + struct stat statbuf; + char entry[MAX_PATH]; + + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + + snprintf(entry, MAX_PATH, "%s/%s", cset_dir, ent->d_name); + if(stat(entry, &statbuf) == -1) { + err = "Failed to stat cacheset subdir"; + goto err; + } + + if (S_ISDIR(statbuf.st_mode)) { + printf("%s\n", ent->d_name); + + if(list_devs) { + list_cacheset_devs(cset_dir, ent->d_name, true); + } + } + } + +err: + closedir(dir); + return err; +} + +char *register_bcache(char *const *devs) +{ + int ret, bcachefd; + char *err = NULL; + + bcachefd = open("/dev/bcache", O_RDWR); + if (bcachefd < 0) { + err = "Can't open bcache device"; + goto err; + } + + ret = ioctl(bcachefd, BCH_IOCTL_REGISTER, devs); + if (ret < 0) { + char tmp[64]; + snprintf(tmp, 64, "ioctl register error: %s\n", + strerror(ret)); + err = strdup(tmp); + goto err; + } + +err: + if (bcachefd) + close(bcachefd); + return err; +} + +char *unregister_bcache(char *const *devs) +{ + int ret, bcachefd; + char *err = NULL; + + bcachefd = open("/dev/bcache", O_RDWR); + if (bcachefd < 0) { + err = "Can't open bcache device"; + goto err; + } + + ret = ioctl(bcachefd, BCH_IOCTL_UNREGISTER, devs); + if (ret < 0) { + char tmp[64]; + snprintf(tmp, 64, "ioctl unregister error: %s\n", + strerror(ret)); + err = strdup(tmp); + goto err; + } + +err: + close(bcachefd); + return err; +} + +char *add_devices(char *const *devs, char *uuid) +{ + int ret, bcachefd; + char *err = NULL; + + bcachefd = open("/dev/bcache", O_RDWR); + if (bcachefd < 0) { + err = "Can't open bcache device"; + goto err; + } + + struct bch_ioctl_add_disks ia; + ia.devs = devs; + ia.uuid = uuid; + + ret = ioctl(bcachefd, BCH_IOCTL_ADD_DISKS, &ia); + if (ret < 0) { + char tmp[128]; + snprintf(tmp, 128, "ioctl add disk error: %s\n", + strerror(ret)); + err = strdup(tmp); + } + +err: + close(bcachefd); + return err; +} + +char *remove_device(const char *dev, bool force) +{ + int ret, bcachefd; + char *err = NULL; + + bcachefd = open("/dev/bcache", O_RDWR); + if (bcachefd < 0) { + err = "Can't open bcache device"; + goto err; + } + + struct bch_ioctl_rm_disk ir; + ir.dev = dev; + ir.force = force ? 1 : 0; + + ret = ioctl(bcachefd, BCH_IOCTL_RM_DISK, &ir); + if (ret < 0) { + char tmp[128]; + snprintf(tmp, 128, "ioctl add disk error: %s\n", + strerror(ret)); + err = strdup(tmp); + } + +err: + close(bcachefd); + return err; +} + +char *probe(char *dev, int udev) +{ + struct cache_sb sb; + char uuid[40]; + blkid_probe pr; + char *err = NULL; + + int fd = open(dev, O_RDONLY); + if (fd == -1) { + err = "Got file descriptor -1 trying to open dev"; + goto err; + } + + if (!(pr = blkid_new_probe())) { + err = "Failed trying to get a blkid for new probe"; + goto err; + } + + if (blkid_probe_set_device(pr, fd, 0, 0)) { + err = "Failed blkid probe set device"; + goto err; + } + + /* probe partitions too */ + if (blkid_probe_enable_partitions(pr, true)) { + err = "Enable probe partitions"; + goto err; + } + + /* bail if anything was found + * probe-bcache isn't needed once blkid recognizes bcache */ + if (!blkid_do_probe(pr)) { + err = "blkid recognizes bcache"; + goto err; + } + + if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { + err = "Failed to read superblock"; + goto err; + } + + if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic))) { + err = "Bcache magic incorrect"; + goto err; + } + + uuid_unparse(sb.uuid.b, uuid); + + if (udev) + printf("ID_FS_UUID=%s\n" + "ID_FS_UUID_ENC=%s\n" + "ID_FS_TYPE=bcache\n", + uuid, uuid); + else + printf("%s: UUID=\"\" TYPE=\"bcache\"\n", uuid); + + return 0; + +err: + return err; +} + +char *read_stat_dir(DIR *dir, char *stats_dir, char *stat_name, char *ret) +{ + struct stat statbuf; + char entry[MAX_PATH]; + char *err = NULL; + + snprintf(entry, MAX_PATH, "%s/%s", stats_dir, stat_name); + if(stat(entry, &statbuf) == -1) { + char tmp[MAX_PATH]; + snprintf(tmp, MAX_PATH, "Failed to stat %s\n", entry); + err = strdup(tmp); + goto err; + } + + if (S_ISREG(statbuf.st_mode)) { + char buf[MAX_PATH]; + FILE *fp = NULL; + + fp = fopen(entry, "r"); + if(!fp) { + /* If we can't open the file, this is probably because + * of permissions, just move to the next file */ + return NULL; + } + + while(fgets(ret, MAX_PATH, fp)); + fclose(fp); + } +err: + return err; +} + +char *bcache_get_capacity(const char *cset_dir, const char *capacity_uuid, + bool show_devs) +{ + char *err = NULL; + char bucket_size_path[MAX_PATH]; + char nbuckets_path[MAX_PATH]; + char avail_buckets_path[MAX_PATH]; + char cache_path[MAX_PATH]; + + double bucket_sizes[MAX_DEVS]; + double nbuckets[MAX_DEVS]; + double avail_buckets[MAX_DEVS]; + char *dev_names[MAX_DEVS]; + int dev_count = 0, i; + char intbuf[4]; + double total_cap = 0, total_free = 0; + int precision = 2; + + snprintf(intbuf, 4, "%d", i); + snprintf(bucket_size_path, MAX_PATH, "%s/%s/%s/%s", cset_dir, + capacity_uuid, "cache0", "bucket_size_bytes"); + snprintf(nbuckets_path, MAX_PATH, "%s/%s/%s/%s", cset_dir, + capacity_uuid, "cache0", "nbuckets"); + snprintf(avail_buckets_path, MAX_PATH, "%s/%s/%s/%s", cset_dir, + capacity_uuid, "cache0", "available_buckets"); + snprintf(cache_path, MAX_PATH, "%s/%s/%s", cset_dir, capacity_uuid, + "cache0"); + + while(true) { + char buf[MAX_PATH]; + int len; + DIR *cache_dir; + + if((cache_dir = opendir(cache_path)) == NULL) + break; + + err = read_stat_dir(cache_dir, cache_path, + "bucket_size_bytes", buf); + if (err) + goto err; + else + bucket_sizes[dev_count] = atof(buf); + + err = read_stat_dir(cache_dir, cache_path, + "nbuckets", buf); + if (err) + goto err; + else + nbuckets[dev_count] = atof(buf); + + err = read_stat_dir(cache_dir, cache_path, + "available_buckets", buf); + if (err) + goto err; + else + avail_buckets[dev_count] = atof(buf); + + if((len = readlink(cache_path, buf, sizeof(buf) - 1)) != -1) { + buf[len] = '\0'; + dev_names[dev_count] = dev_name(buf); + } + + /* remove i/stat and append i++/stat */ + bucket_size_path[strlen(cache_path) - strlen(intbuf)] = 0; + nbuckets_path[strlen(cache_path) - strlen(intbuf)] = 0; + avail_buckets_path[strlen(cache_path) - strlen(intbuf)] = 0; + cache_path[strlen(cache_path) - strlen(intbuf)] = 0; + + dev_count++; + + snprintf(intbuf, 4, "%d", dev_count); + strcat(cache_path, intbuf); + strcat(bucket_size_path, intbuf); + strcat(nbuckets_path, intbuf); + strcat(avail_buckets_path, intbuf); + } + + printf("%-15s%-25s%-25s\n", "Device Name", "Capacity (512 Blocks)", "Free (512 Blocks)"); + + if (show_devs) { + for (i = 0; i < dev_count; i++) { + printf("%s%-11s%-25.*f%-25.*f\n", "/dev", dev_names[i], + precision, + (bucket_sizes[i] * nbuckets[i]) / 512, + precision, + (bucket_sizes[i] * avail_buckets[i]) / 512); + } + } + + for (i = 0; i < dev_count; i++) { + total_cap += (bucket_sizes[i] * nbuckets[i]) / 512; + total_free += (bucket_sizes[i] * avail_buckets[i]) / 512; + + } + + printf("%-15s%-25.*f%-25.*f\n", "Total", precision, total_cap, + precision, total_free); + +err: + for (i = 0; i < dev_count; i++) + if (dev_names[i]) + free(dev_names[i]); + return err; }