]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache.c
cc294bd4ac39fd434db95dabe33e7fb3eb838ea2
[bcachefs-tools-debian] / libbcache.c
1 #include <errno.h>
2 #include <fcntl.h>
3 #include <stdbool.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <time.h>
11 #include <unistd.h>
12
13 #include <uuid/uuid.h>
14
15 #include "linux/bcache.h"
16 #include "libbcache.h"
17 #include "checksum.h"
18 #include "crypto.h"
19 #include "opts.h"
20 #include "super-io.h"
21
22 #define NSEC_PER_SEC    1000000000L
23
24 #define BCH_MIN_NR_NBUCKETS     (1 << 10)
25
26 /* first bucket should start 1 mb in, in sectors: */
27 #define FIRST_BUCKET_OFFSET     (1 << 11)
28
29 /* minimum size filesystem we can create, given a bucket size: */
30 static u64 min_size(unsigned bucket_size)
31 {
32         return (DIV_ROUND_UP(FIRST_BUCKET_OFFSET, bucket_size) +
33                 BCH_MIN_NR_NBUCKETS) * bucket_size;
34 }
35
36 static void init_layout(struct bch_sb_layout *l)
37 {
38         memset(l, 0, sizeof(*l));
39
40         l->magic                = BCACHE_MAGIC;
41         l->layout_type          = 0;
42         l->nr_superblocks       = 2;
43         l->sb_max_size_bits     = 7;
44         l->sb_offset[0]         = cpu_to_le64(BCH_SB_SECTOR);
45         l->sb_offset[1]         = cpu_to_le64(BCH_SB_SECTOR +
46                                               (1 << l->sb_max_size_bits));
47 }
48
49 void bcache_format(struct dev_opts *devs, size_t nr_devs,
50                    unsigned block_size,
51                    unsigned btree_node_size,
52                    unsigned meta_csum_type,
53                    unsigned data_csum_type,
54                    unsigned compression_type,
55                    const char *passphrase,
56                    unsigned meta_replicas,
57                    unsigned data_replicas,
58                    unsigned on_error_action,
59                    unsigned max_journal_entry_size,
60                    char *label,
61                    uuid_le uuid)
62 {
63         struct bch_sb *sb;
64         struct dev_opts *i;
65         struct bch_sb_field_members *mi;
66         unsigned u64s, j;
67
68         /* calculate block size: */
69         if (!block_size)
70                 for (i = devs; i < devs + nr_devs; i++)
71                         block_size = max(block_size,
72                                          get_blocksize(i->path, i->fd));
73
74         /* calculate bucket sizes: */
75         for (i = devs; i < devs + nr_devs; i++) {
76                 if (!i->size)
77                         i->size = get_size(i->path, i->fd) >> 9;
78
79                 if (!i->bucket_size) {
80                         if (i->size < min_size(block_size))
81                                 die("cannot format %s, too small (%llu sectors, min %llu)",
82                                     i->path, i->size, min_size(block_size));
83
84                         /* Want a bucket size of at least 128k, if possible: */
85                         i->bucket_size = max(block_size, 256U);
86
87                         if (i->size >= min_size(i->bucket_size)) {
88                                 unsigned scale = max(1,
89                                         ilog2(i->size / min_size(i->bucket_size)) / 4);
90
91                                 scale = rounddown_pow_of_two(scale);
92
93                                 /* max bucket size 1 mb */
94                                 i->bucket_size = min(i->bucket_size * scale, 1U << 11);
95                         } else {
96                                 do {
97                                         i->bucket_size /= 2;
98                                 } while (i->size < min_size(i->bucket_size));
99                         }
100                 }
101
102                 /* first bucket: 1 mb in */
103                 i->first_bucket = DIV_ROUND_UP(FIRST_BUCKET_OFFSET, i->bucket_size);
104                 i->nbuckets     = i->size / i->bucket_size;
105
106                 if (i->bucket_size < block_size)
107                         die("Bucket size cannot be smaller than block size");
108
109                 if (i->nbuckets - i->first_bucket < BCH_MIN_NR_NBUCKETS)
110                         die("Not enough buckets: %llu, need %u (bucket size %u)",
111                             i->nbuckets - i->first_bucket, BCH_MIN_NR_NBUCKETS,
112                             i->bucket_size);
113         }
114
115         /* calculate btree node size: */
116         if (!btree_node_size) {
117                 /* 256k default btree node size */
118                 btree_node_size = 512;
119
120                 for (i = devs; i < devs + nr_devs; i++)
121                         btree_node_size = min(btree_node_size, i->bucket_size);
122         }
123
124         if (!max_journal_entry_size) {
125                 /* 2 MB default: */
126                 max_journal_entry_size = 4096;
127         }
128
129         max_journal_entry_size = roundup_pow_of_two(max_journal_entry_size);
130
131         sb = calloc(1, sizeof(*sb) +
132                     sizeof(struct bch_sb_field_members) +
133                     sizeof(struct bch_member) * nr_devs +
134                     sizeof(struct bch_sb_field_crypt));
135
136         sb->version     = cpu_to_le64(BCACHE_SB_VERSION_CDEV_V4);
137         sb->magic       = BCACHE_MAGIC;
138         sb->block_size  = cpu_to_le16(block_size);
139         sb->user_uuid   = uuid;
140         sb->nr_devices  = nr_devs;
141
142         init_layout(&sb->layout);
143
144         uuid_generate(sb->uuid.b);
145
146         if (label)
147                 strncpy((char *) sb->label, label, sizeof(sb->label));
148
149         /*
150          * don't have a userspace crc32c implementation handy, just always use
151          * crc64
152          */
153         SET_BCH_SB_CSUM_TYPE(sb,                BCH_CSUM_CRC64);
154         SET_BCH_SB_META_CSUM_TYPE(sb,           meta_csum_type);
155         SET_BCH_SB_DATA_CSUM_TYPE(sb,           data_csum_type);
156         SET_BCH_SB_COMPRESSION_TYPE(sb,         compression_type);
157
158         SET_BCH_SB_BTREE_NODE_SIZE(sb,          btree_node_size);
159         SET_BCH_SB_GC_RESERVE(sb,               8);
160         SET_BCH_SB_META_REPLICAS_WANT(sb,       meta_replicas);
161         SET_BCH_SB_META_REPLICAS_HAVE(sb,       meta_replicas);
162         SET_BCH_SB_DATA_REPLICAS_WANT(sb,       data_replicas);
163         SET_BCH_SB_DATA_REPLICAS_HAVE(sb,       data_replicas);
164         SET_BCH_SB_ERROR_ACTION(sb,             on_error_action);
165         SET_BCH_SB_STR_HASH_TYPE(sb,            BCH_STR_HASH_SIPHASH);
166         SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb,       ilog2(max_journal_entry_size));
167
168         struct timespec now;
169         if (clock_gettime(CLOCK_REALTIME, &now))
170                 die("error getting current time: %s", strerror(errno));
171
172         sb->time_base_lo        = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
173         sb->time_precision      = cpu_to_le32(1);
174
175         if (passphrase) {
176                 struct bch_sb_field_crypt *crypt = vstruct_end(sb);
177
178                 u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
179
180                 le32_add_cpu(&sb->u64s, u64s);
181                 crypt->field.u64s = cpu_to_le32(u64s);
182                 crypt->field.type = BCH_SB_FIELD_crypt;
183
184                 bch_sb_crypt_init(sb, crypt, passphrase);
185                 SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
186         }
187
188         mi = vstruct_end(sb);
189         u64s = (sizeof(struct bch_sb_field_members) +
190                 sizeof(struct bch_member) * nr_devs) / sizeof(u64);
191
192         le32_add_cpu(&sb->u64s, u64s);
193         mi->field.u64s = cpu_to_le32(u64s);
194         mi->field.type = BCH_SB_FIELD_members;
195
196         for (i = devs; i < devs + nr_devs; i++) {
197                 struct bch_member *m = mi->members + (i - devs);
198
199                 uuid_generate(m->uuid.b);
200                 m->nbuckets     = cpu_to_le64(i->nbuckets);
201                 m->first_bucket = cpu_to_le16(i->first_bucket);
202                 m->bucket_size  = cpu_to_le16(i->bucket_size);
203
204                 SET_BCH_MEMBER_TIER(m,          i->tier);
205                 SET_BCH_MEMBER_REPLACEMENT(m,   CACHE_REPLACEMENT_LRU);
206                 SET_BCH_MEMBER_DISCARD(m,       i->discard);
207         }
208
209         for (i = devs; i < devs + nr_devs; i++) {
210                 sb->dev_idx = i - devs;
211
212                 static const char zeroes[BCH_SB_SECTOR << 9];
213                 struct nonce nonce = { 0 };
214
215                 /* Zero start of disk */
216                 xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
217
218                 xpwrite(i->fd, &sb->layout, sizeof(sb->layout),
219                         BCH_SB_LAYOUT_SECTOR << 9);
220
221                 for (j = 0; j < sb->layout.nr_superblocks; j++) {
222                         sb->offset = sb->layout.sb_offset[j];
223
224                         sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb),
225                                                    nonce, sb);
226                         xpwrite(i->fd, sb, vstruct_bytes(sb),
227                                 le64_to_cpu(sb->offset) << 9);
228                 }
229
230                 fsync(i->fd);
231                 close(i->fd);
232         }
233
234         bcache_super_print(sb, HUMAN_READABLE);
235
236         free(sb);
237 }
238
239 struct bch_sb *bcache_super_read(const char *path)
240 {
241         struct bch_sb sb, *ret;
242
243         int fd = open(path, O_RDONLY);
244         if (fd < 0)
245                 die("couldn't open %s", path);
246
247         xpread(fd, &sb, sizeof(sb), BCH_SB_SECTOR << 9);
248
249         if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
250                 die("not a bcache superblock");
251
252         size_t bytes = vstruct_bytes(&sb);
253
254         ret = malloc(bytes);
255
256         xpread(fd, ret, bytes, BCH_SB_SECTOR << 9);
257
258         return ret;
259 }
260
261 void bcache_super_print(struct bch_sb *sb, int units)
262 {
263         struct bch_sb_field_members *mi;
264         char user_uuid_str[40], internal_uuid_str[40], member_uuid_str[40];
265         char label[BCH_SB_LABEL_SIZE + 1];
266         unsigned i;
267
268         memset(label, 0, sizeof(label));
269         memcpy(label, sb->label, sizeof(sb->label));
270         uuid_unparse(sb->user_uuid.b, user_uuid_str);
271         uuid_unparse(sb->uuid.b, internal_uuid_str);
272
273         printf("External UUID:                  %s\n"
274                "Internal UUID:                  %s\n"
275                "Label:                          %s\n"
276                "Version:                        %llu\n"
277                "Block_size:                     %s\n"
278                "Btree node size:                %s\n"
279                "Max journal entry size:         %s\n"
280                "Error action:                   %s\n"
281                "Clean:                          %llu\n"
282
283                "Metadata replicas:              have %llu, want %llu\n"
284                "Data replicas:                  have %llu, want %llu\n"
285
286                "Metadata checksum type:         %s\n"
287                "Data checksum type:             %s\n"
288                "Compression type:               %s\n"
289
290                "String hash type:               %s\n"
291                "32 bit inodes:                  %llu\n"
292                "GC reserve percentage:          %llu%%\n"
293                "Root reserve percentage:        %llu%%\n"
294
295                "Devices:                        %u\n",
296                user_uuid_str,
297                internal_uuid_str,
298                label,
299                le64_to_cpu(sb->version),
300                pr_units(le16_to_cpu(sb->block_size), units),
301                pr_units(BCH_SB_BTREE_NODE_SIZE(sb), units),
302                pr_units(1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb), units),
303
304                BCH_SB_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
305                ? bch_error_actions[BCH_SB_ERROR_ACTION(sb)]
306                : "unknown",
307
308                BCH_SB_CLEAN(sb),
309
310                BCH_SB_META_REPLICAS_HAVE(sb),
311                BCH_SB_META_REPLICAS_WANT(sb),
312                BCH_SB_DATA_REPLICAS_HAVE(sb),
313                BCH_SB_DATA_REPLICAS_WANT(sb),
314
315                BCH_SB_META_CSUM_TYPE(sb) < BCH_CSUM_NR
316                ? bch_csum_types[BCH_SB_META_CSUM_TYPE(sb)]
317                : "unknown",
318
319                BCH_SB_DATA_CSUM_TYPE(sb) < BCH_CSUM_NR
320                ? bch_csum_types[BCH_SB_DATA_CSUM_TYPE(sb)]
321                : "unknown",
322
323                BCH_SB_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_NR
324                ? bch_compression_types[BCH_SB_COMPRESSION_TYPE(sb)]
325                : "unknown",
326
327                BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR
328                ? bch_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)]
329                : "unknown",
330
331                BCH_SB_INODE_32BIT(sb),
332                BCH_SB_GC_RESERVE(sb),
333                BCH_SB_ROOT_RESERVE(sb),
334
335                sb->nr_devices);
336
337         mi = bch_sb_get_members(sb);
338         if (!mi) {
339                 printf("Member info section missing\n");
340                 return;
341         }
342
343         for (i = 0; i < sb->nr_devices; i++) {
344                 struct bch_member *m = mi->members + i;
345                 time_t last_mount = le64_to_cpu(m->last_mount);
346
347                 uuid_unparse(m->uuid.b, member_uuid_str);
348
349                 printf("\n"
350                        "Device %u:\n"
351                        "  UUID:                         %s\n"
352                        "  Size:                         %s\n"
353                        "  Bucket size:                  %s\n"
354                        "  First bucket:                 %u\n"
355                        "  Buckets:                      %llu\n"
356                        "  Last mount:                   %s\n"
357                        "  State:                        %s\n"
358                        "  Tier:                         %llu\n"
359                        "  Has metadata:                 %llu\n"
360                        "  Has data:                     %llu\n"
361                        "  Replacement policy:           %s\n"
362                        "  Discard:                      %llu\n",
363                        i, member_uuid_str,
364                        pr_units(le16_to_cpu(m->bucket_size) *
365                                 le64_to_cpu(m->nbuckets), units),
366                        pr_units(le16_to_cpu(m->bucket_size), units),
367                        le16_to_cpu(m->first_bucket),
368                        le64_to_cpu(m->nbuckets),
369                        last_mount ? ctime(&last_mount) : "(never)",
370
371                        BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR
372                        ? bch_cache_state[BCH_MEMBER_STATE(m)]
373                        : "unknown",
374
375                        BCH_MEMBER_TIER(m),
376                        BCH_MEMBER_HAS_METADATA(m),
377                        BCH_MEMBER_HAS_DATA(m),
378
379                        BCH_MEMBER_REPLACEMENT(m) < CACHE_REPLACEMENT_NR
380                        ? bch_cache_replacement_policies[BCH_MEMBER_REPLACEMENT(m)]
381                        : "unknown",
382
383                        BCH_MEMBER_DISCARD(m));
384         }
385 }