]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs.c
Update for new superblock options; makefile improvements
[bcachefs-tools-debian] / libbcachefs.c
1 #include <errno.h>
2 #include <fcntl.h>
3 #include <stdbool.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <time.h>
11 #include <unistd.h>
12
13 #include <uuid/uuid.h>
14
15 #include "bcachefs_format.h"
16 #include "checksum.h"
17 #include "crypto.h"
18 #include "libbcachefs.h"
19 #include "opts.h"
20 #include "super-io.h"
21
22 #define NSEC_PER_SEC    1000000000L
23
24 #define BCH_MIN_NR_NBUCKETS     (1 << 10)
25
26 /* minimum size filesystem we can create, given a bucket size: */
27 static u64 min_size(unsigned bucket_size)
28 {
29         return BCH_MIN_NR_NBUCKETS * bucket_size;
30 }
31
32 static void init_layout(struct bch_sb_layout *l, unsigned block_size,
33                         u64 start, u64 end)
34 {
35         unsigned sb_size;
36         u64 backup; /* offset of 2nd sb */
37
38         memset(l, 0, sizeof(*l));
39
40         if (start != BCH_SB_SECTOR)
41                 start = round_up(start, block_size);
42         end = round_down(end, block_size);
43
44         if (start >= end)
45                 die("insufficient space for superblocks");
46
47         /*
48          * Create two superblocks in the allowed range: reserve a maximum of 64k
49          */
50         sb_size = min_t(u64, 128, end - start / 2);
51
52         backup = start + sb_size;
53         backup = round_up(backup, block_size);
54
55         backup = min(backup, end);
56
57         sb_size = min(end - backup, backup- start);
58         sb_size = rounddown_pow_of_two(sb_size);
59
60         if (sb_size < 8)
61                 die("insufficient space for superblocks");
62
63         l->magic                = BCACHE_MAGIC;
64         l->layout_type          = 0;
65         l->nr_superblocks       = 2;
66         l->sb_max_size_bits     = ilog2(sb_size);
67         l->sb_offset[0]         = cpu_to_le64(start);
68         l->sb_offset[1]         = cpu_to_le64(backup);
69 }
70
71 void bch2_pick_bucket_size(struct format_opts opts, struct dev_opts *dev)
72 {
73         if (!dev->sb_offset) {
74                 dev->sb_offset  = BCH_SB_SECTOR;
75                 dev->sb_end     = BCH_SB_SECTOR + 256;
76         }
77
78         if (!dev->size)
79                 dev->size = get_size(dev->path, dev->fd) >> 9;
80
81         if (!dev->bucket_size) {
82                 if (dev->size < min_size(opts.block_size))
83                         die("cannot format %s, too small (%llu sectors, min %llu)",
84                             dev->path, dev->size, min_size(opts.block_size));
85
86                 /* Bucket size must be >= block size: */
87                 dev->bucket_size = opts.block_size;
88
89                 /* Bucket size must be >= btree node size: */
90                 dev->bucket_size = max(dev->bucket_size, opts.btree_node_size);
91
92                 /* Want a bucket size of at least 128k, if possible: */
93                 dev->bucket_size = max(dev->bucket_size, 256U);
94
95                 if (dev->size >= min_size(dev->bucket_size)) {
96                         unsigned scale = max(1,
97                                              ilog2(dev->size / min_size(dev->bucket_size)) / 4);
98
99                         scale = rounddown_pow_of_two(scale);
100
101                         /* max bucket size 1 mb */
102                         dev->bucket_size = min(dev->bucket_size * scale, 1U << 11);
103                 } else {
104                         do {
105                                 dev->bucket_size /= 2;
106                         } while (dev->size < min_size(dev->bucket_size));
107                 }
108         }
109
110         dev->nbuckets   = dev->size / dev->bucket_size;
111
112         if (dev->bucket_size < opts.block_size)
113                 die("Bucket size cannot be smaller than block size");
114
115         if (dev->bucket_size < opts.btree_node_size)
116                 die("Bucket size cannot be smaller than btree node size");
117
118         if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
119                 die("Not enough buckets: %llu, need %u (bucket size %u)",
120                     dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
121
122 }
123
124 struct bch_sb *bch2_format(struct format_opts opts,
125                            struct dev_opts *devs, size_t nr_devs)
126 {
127         struct bch_sb *sb;
128         struct dev_opts *i;
129         struct bch_sb_field_members *mi;
130         unsigned u64s;
131
132         /* calculate block size: */
133         if (!opts.block_size)
134                 for (i = devs; i < devs + nr_devs; i++)
135                         opts.block_size = max(opts.block_size,
136                                               get_blocksize(i->path, i->fd));
137
138         /* calculate bucket sizes: */
139         for (i = devs; i < devs + nr_devs; i++)
140                 bch2_pick_bucket_size(opts, i);
141
142         /* calculate btree node size: */
143         if (!opts.btree_node_size) {
144                 /* 256k default btree node size */
145                 opts.btree_node_size = 512;
146
147                 for (i = devs; i < devs + nr_devs; i++)
148                         opts.btree_node_size =
149                                 min(opts.btree_node_size, i->bucket_size);
150         }
151
152         if (!is_power_of_2(opts.block_size))
153                 die("block size must be power of 2");
154
155         if (!is_power_of_2(opts.btree_node_size))
156                 die("btree node size must be power of 2");
157
158         if (uuid_is_null(opts.uuid.b))
159                 uuid_generate(opts.uuid.b);
160
161         sb = calloc(1, sizeof(*sb) +
162                     sizeof(struct bch_sb_field_members) +
163                     sizeof(struct bch_member) * nr_devs +
164                     sizeof(struct bch_sb_field_crypt));
165
166         sb->version     = cpu_to_le64(BCH_SB_VERSION_MAX);
167         sb->magic       = BCACHE_MAGIC;
168         sb->block_size  = cpu_to_le16(opts.block_size);
169         sb->user_uuid   = opts.uuid;
170         sb->nr_devices  = nr_devs;
171
172         uuid_generate(sb->uuid.b);
173
174         if (opts.label)
175                 strncpy((char *) sb->label, opts.label, sizeof(sb->label));
176
177         SET_BCH_SB_CSUM_TYPE(sb,                opts.meta_csum_type);
178         SET_BCH_SB_META_CSUM_TYPE(sb,           opts.meta_csum_type);
179         SET_BCH_SB_DATA_CSUM_TYPE(sb,           opts.data_csum_type);
180         SET_BCH_SB_COMPRESSION_TYPE(sb,         opts.compression_type);
181
182         SET_BCH_SB_BTREE_NODE_SIZE(sb,          opts.btree_node_size);
183         SET_BCH_SB_GC_RESERVE(sb,               8);
184         SET_BCH_SB_META_REPLICAS_WANT(sb,       opts.meta_replicas);
185         SET_BCH_SB_META_REPLICAS_REQ(sb,        opts.meta_replicas_required);
186         SET_BCH_SB_DATA_REPLICAS_WANT(sb,       opts.data_replicas);
187         SET_BCH_SB_DATA_REPLICAS_REQ(sb,        opts.data_replicas_required);
188         SET_BCH_SB_ERROR_ACTION(sb,             opts.on_error_action);
189         SET_BCH_SB_STR_HASH_TYPE(sb,            BCH_STR_HASH_SIPHASH);
190         SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb,  ilog2(opts.encoded_extent_max));
191
192         struct timespec now;
193         if (clock_gettime(CLOCK_REALTIME, &now))
194                 die("error getting current time: %m");
195
196         sb->time_base_lo        = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
197         sb->time_precision      = cpu_to_le32(1);
198
199         if (opts.encrypted) {
200                 struct bch_sb_field_crypt *crypt = vstruct_end(sb);
201
202                 u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
203
204                 le32_add_cpu(&sb->u64s, u64s);
205                 crypt->field.u64s = cpu_to_le32(u64s);
206                 crypt->field.type = BCH_SB_FIELD_crypt;
207
208                 bch_sb_crypt_init(sb, crypt, opts.passphrase);
209                 SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
210         }
211
212         mi = vstruct_end(sb);
213         u64s = (sizeof(struct bch_sb_field_members) +
214                 sizeof(struct bch_member) * nr_devs) / sizeof(u64);
215
216         le32_add_cpu(&sb->u64s, u64s);
217         mi->field.u64s = cpu_to_le32(u64s);
218         mi->field.type = BCH_SB_FIELD_members;
219
220         for (i = devs; i < devs + nr_devs; i++) {
221                 struct bch_member *m = mi->members + (i - devs);
222
223                 uuid_generate(m->uuid.b);
224                 m->nbuckets     = cpu_to_le64(i->nbuckets);
225                 m->first_bucket = 0;
226                 m->bucket_size  = cpu_to_le16(i->bucket_size);
227
228                 SET_BCH_MEMBER_TIER(m,          i->tier);
229                 SET_BCH_MEMBER_REPLACEMENT(m,   CACHE_REPLACEMENT_LRU);
230                 SET_BCH_MEMBER_DISCARD(m,       i->discard);
231                 SET_BCH_MEMBER_DATA_ALLOWED(m,  i->data_allowed);
232         }
233
234         for (i = devs; i < devs + nr_devs; i++) {
235                 sb->dev_idx = i - devs;
236
237                 init_layout(&sb->layout, opts.block_size,
238                             i->sb_offset, i->sb_end);
239
240                 if (i->sb_offset == BCH_SB_SECTOR) {
241                         /* Zero start of disk */
242                         static const char zeroes[BCH_SB_SECTOR << 9];
243
244                         xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
245                 }
246
247                 bch2_super_write(i->fd, sb);
248                 close(i->fd);
249         }
250
251         return sb;
252 }
253
254 void bch2_super_write(int fd, struct bch_sb *sb)
255 {
256         struct nonce nonce = { 0 };
257
258         unsigned i;
259         for (i = 0; i < sb->layout.nr_superblocks; i++) {
260                 sb->offset = sb->layout.sb_offset[i];
261
262                 if (sb->offset == BCH_SB_SECTOR) {
263                         /* Write backup layout */
264                         xpwrite(fd, &sb->layout, sizeof(sb->layout),
265                                 BCH_SB_LAYOUT_SECTOR << 9);
266                 }
267
268                 sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
269                 xpwrite(fd, sb, vstruct_bytes(sb),
270                         le64_to_cpu(sb->offset) << 9);
271         }
272
273         fsync(fd);
274 }
275
276 struct bch_sb *__bch2_super_read(int fd, u64 sector)
277 {
278         struct bch_sb sb, *ret;
279
280         xpread(fd, &sb, sizeof(sb), sector << 9);
281
282         if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
283                 die("not a bcachefs superblock");
284
285         size_t bytes = vstruct_bytes(&sb);
286
287         ret = malloc(bytes);
288
289         xpread(fd, ret, bytes, sector << 9);
290
291         return ret;
292 }
293
294 struct bch_sb *bch2_super_read(const char *path)
295 {
296         int fd = xopen(path, O_RDONLY);
297         struct bch_sb *sb = __bch2_super_read(fd, BCH_SB_SECTOR);
298         close(fd);
299         return sb;
300 }
301
302 static unsigned get_dev_has_data(struct bch_sb *sb, unsigned dev)
303 {
304         struct bch_sb_field_replicas *replicas;
305         struct bch_replicas_entry *r;
306         unsigned i, data_has = 0;
307
308         replicas = bch2_sb_get_replicas(sb);
309
310         if (replicas)
311                 for_each_replicas_entry(replicas, r)
312                         for (i = 0; i < r->nr; i++)
313                                 if (r->devs[i] == dev)
314                                         data_has |= 1 << r->data_type;
315
316         return data_has;
317 }
318
319 void bch2_super_print(struct bch_sb *sb, int units)
320 {
321         struct bch_sb_field_members *mi;
322         char user_uuid_str[40], internal_uuid_str[40];
323         char label[BCH_SB_LABEL_SIZE + 1];
324         unsigned i;
325
326         memset(label, 0, sizeof(label));
327         memcpy(label, sb->label, sizeof(sb->label));
328         uuid_unparse(sb->user_uuid.b, user_uuid_str);
329         uuid_unparse(sb->uuid.b, internal_uuid_str);
330
331         printf("External UUID:                  %s\n"
332                "Internal UUID:                  %s\n"
333                "Label:                          %s\n"
334                "Version:                        %llu\n"
335                "Block_size:                     %s\n"
336                "Btree node size:                %s\n"
337                "Error action:                   %s\n"
338                "Clean:                          %llu\n"
339
340                "Metadata replicas:              have %llu, want %llu\n"
341                "Data replicas:                  have %llu, want %llu\n"
342
343                "Metadata checksum type:         %s\n"
344                "Data checksum type:             %s\n"
345                "Compression type:               %s\n"
346
347                "String hash type:               %s\n"
348                "32 bit inodes:                  %llu\n"
349                "GC reserve percentage:          %llu%%\n"
350                "Root reserve percentage:        %llu%%\n"
351
352                "Devices:                        %u\n",
353                user_uuid_str,
354                internal_uuid_str,
355                label,
356                le64_to_cpu(sb->version),
357                pr_units(le16_to_cpu(sb->block_size), units),
358                pr_units(BCH_SB_BTREE_NODE_SIZE(sb), units),
359
360                BCH_SB_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
361                ? bch2_error_actions[BCH_SB_ERROR_ACTION(sb)]
362                : "unknown",
363
364                BCH_SB_CLEAN(sb),
365
366                0LLU, //BCH_SB_META_REPLICAS_HAVE(sb),
367                BCH_SB_META_REPLICAS_WANT(sb),
368                0LLU, //BCH_SB_DATA_REPLICAS_HAVE(sb),
369                BCH_SB_DATA_REPLICAS_WANT(sb),
370
371                BCH_SB_META_CSUM_TYPE(sb) < BCH_CSUM_NR
372                ? bch2_csum_types[BCH_SB_META_CSUM_TYPE(sb)]
373                : "unknown",
374
375                BCH_SB_DATA_CSUM_TYPE(sb) < BCH_CSUM_NR
376                ? bch2_csum_types[BCH_SB_DATA_CSUM_TYPE(sb)]
377                : "unknown",
378
379                BCH_SB_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_NR
380                ? bch2_compression_types[BCH_SB_COMPRESSION_TYPE(sb)]
381                : "unknown",
382
383                BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR
384                ? bch2_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)]
385                : "unknown",
386
387                BCH_SB_INODE_32BIT(sb),
388                BCH_SB_GC_RESERVE(sb),
389                BCH_SB_ROOT_RESERVE(sb),
390
391                sb->nr_devices);
392
393         mi = bch2_sb_get_members(sb);
394         if (!mi) {
395                 printf("Member info section missing\n");
396                 return;
397         }
398
399         for (i = 0; i < sb->nr_devices; i++) {
400                 struct bch_member *m = mi->members + i;
401                 time_t last_mount = le64_to_cpu(m->last_mount);
402                 char member_uuid_str[40];
403                 char data_allowed_str[100];
404                 char data_has_str[100];
405
406                 uuid_unparse(m->uuid.b, member_uuid_str);
407                 bch2_scnprint_flag_list(data_allowed_str,
408                                         sizeof(data_allowed_str),
409                                         bch2_data_types,
410                                         BCH_MEMBER_DATA_ALLOWED(m));
411                 if (!data_allowed_str[0])
412                         strcpy(data_allowed_str, "(none)");
413
414                 bch2_scnprint_flag_list(data_has_str,
415                                         sizeof(data_has_str),
416                                         bch2_data_types,
417                                         get_dev_has_data(sb, i));
418                 if (!data_has_str[0])
419                         strcpy(data_has_str, "(none)");
420
421                 printf("\n"
422                        "Device %u:\n"
423                        "  UUID:                         %s\n"
424                        "  Size:                         %s\n"
425                        "  Bucket size:                  %s\n"
426                        "  First bucket:                 %u\n"
427                        "  Buckets:                      %llu\n"
428                        "  Last mount:                   %s\n"
429                        "  State:                        %s\n"
430                        "  Tier:                         %llu\n"
431                        "  Data allowed:                 %s\n"
432
433                        "  Has data:                     %s\n"
434
435                        "  Replacement policy:           %s\n"
436                        "  Discard:                      %llu\n",
437                        i, member_uuid_str,
438                        pr_units(le16_to_cpu(m->bucket_size) *
439                                 le64_to_cpu(m->nbuckets), units),
440                        pr_units(le16_to_cpu(m->bucket_size), units),
441                        le16_to_cpu(m->first_bucket),
442                        le64_to_cpu(m->nbuckets),
443                        last_mount ? ctime(&last_mount) : "(never)",
444
445                        BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR
446                        ? bch2_dev_state[BCH_MEMBER_STATE(m)]
447                        : "unknown",
448
449                        BCH_MEMBER_TIER(m),
450                        data_allowed_str,
451                        data_has_str,
452
453                        BCH_MEMBER_REPLACEMENT(m) < CACHE_REPLACEMENT_NR
454                        ? bch2_cache_replacement_policies[BCH_MEMBER_REPLACEMENT(m)]
455                        : "unknown",
456
457                        BCH_MEMBER_DISCARD(m));
458         }
459 }