]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs.c
9a07bbcd8dd165a91effdb9abdf87554bf907221
[bcachefs-tools-debian] / libbcachefs.c
1 #include <errno.h>
2 #include <fcntl.h>
3 #include <stdbool.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <time.h>
11 #include <unistd.h>
12
13 #include <uuid/uuid.h>
14
15 #include "bcachefs_format.h"
16 #include "checksum.h"
17 #include "crypto.h"
18 #include "libbcachefs.h"
19 #include "opts.h"
20 #include "super-io.h"
21
22 #define NSEC_PER_SEC    1000000000L
23
24 #define BCH_MIN_NR_NBUCKETS     (1 << 10)
25
26 /* minimum size filesystem we can create, given a bucket size: */
27 static u64 min_size(unsigned bucket_size)
28 {
29         return BCH_MIN_NR_NBUCKETS * bucket_size;
30 }
31
32 static void init_layout(struct bch_sb_layout *l, unsigned block_size,
33                         u64 start, u64 end)
34 {
35         unsigned sb_size;
36         u64 backup; /* offset of 2nd sb */
37
38         memset(l, 0, sizeof(*l));
39
40         if (start != BCH_SB_SECTOR)
41                 start = round_up(start, block_size);
42         end = round_down(end, block_size);
43
44         if (start >= end)
45                 die("insufficient space for superblocks");
46
47         /*
48          * Create two superblocks in the allowed range: reserve a maximum of 64k
49          */
50         sb_size = min_t(u64, 128, end - start / 2);
51
52         backup = start + sb_size;
53         backup = round_up(backup, block_size);
54
55         backup = min(backup, end);
56
57         sb_size = min(end - backup, backup- start);
58         sb_size = rounddown_pow_of_two(sb_size);
59
60         if (sb_size < 8)
61                 die("insufficient space for superblocks");
62
63         l->magic                = BCACHE_MAGIC;
64         l->layout_type          = 0;
65         l->nr_superblocks       = 2;
66         l->sb_max_size_bits     = ilog2(sb_size);
67         l->sb_offset[0]         = cpu_to_le64(start);
68         l->sb_offset[1]         = cpu_to_le64(backup);
69 }
70
71 void bch2_pick_bucket_size(struct format_opts opts, struct dev_opts *dev)
72 {
73         if (!dev->sb_offset) {
74                 dev->sb_offset  = BCH_SB_SECTOR;
75                 dev->sb_end     = BCH_SB_SECTOR + 256;
76         }
77
78         if (!dev->size)
79                 dev->size = get_size(dev->path, dev->fd) >> 9;
80
81         if (!dev->bucket_size) {
82                 if (dev->size < min_size(opts.block_size))
83                         die("cannot format %s, too small (%llu sectors, min %llu)",
84                             dev->path, dev->size, min_size(opts.block_size));
85
86                 /* Bucket size must be >= block size: */
87                 dev->bucket_size = opts.block_size;
88
89                 /* Bucket size must be >= btree node size: */
90                 dev->bucket_size = max(dev->bucket_size, opts.btree_node_size);
91
92                 /* Want a bucket size of at least 128k, if possible: */
93                 dev->bucket_size = max(dev->bucket_size, 256U);
94
95                 if (dev->size >= min_size(dev->bucket_size)) {
96                         unsigned scale = max(1,
97                                              ilog2(dev->size / min_size(dev->bucket_size)) / 4);
98
99                         scale = rounddown_pow_of_two(scale);
100
101                         /* max bucket size 1 mb */
102                         dev->bucket_size = min(dev->bucket_size * scale, 1U << 11);
103                 } else {
104                         do {
105                                 dev->bucket_size /= 2;
106                         } while (dev->size < min_size(dev->bucket_size));
107                 }
108         }
109
110         dev->nbuckets   = dev->size / dev->bucket_size;
111
112         if (dev->bucket_size < opts.block_size)
113                 die("Bucket size cannot be smaller than block size");
114
115         if (dev->bucket_size < opts.btree_node_size)
116                 die("Bucket size cannot be smaller than btree node size");
117
118         if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
119                 die("Not enough buckets: %llu, need %u (bucket size %u)",
120                     dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
121
122 }
123
124 struct bch_sb *bch2_format(struct format_opts opts,
125                            struct dev_opts *devs, size_t nr_devs)
126 {
127         struct bch_sb *sb;
128         struct dev_opts *i;
129         struct bch_sb_field_members *mi;
130         unsigned u64s;
131
132         /* calculate block size: */
133         if (!opts.block_size)
134                 for (i = devs; i < devs + nr_devs; i++)
135                         opts.block_size = max(opts.block_size,
136                                               get_blocksize(i->path, i->fd));
137
138         /* calculate bucket sizes: */
139         for (i = devs; i < devs + nr_devs; i++)
140                 bch2_pick_bucket_size(opts, i);
141
142         /* calculate btree node size: */
143         if (!opts.btree_node_size) {
144                 /* 256k default btree node size */
145                 opts.btree_node_size = 512;
146
147                 for (i = devs; i < devs + nr_devs; i++)
148                         opts.btree_node_size =
149                                 min(opts.btree_node_size, i->bucket_size);
150         }
151
152         if (uuid_is_null(opts.uuid.b))
153                 uuid_generate(opts.uuid.b);
154
155         sb = calloc(1, sizeof(*sb) +
156                     sizeof(struct bch_sb_field_members) +
157                     sizeof(struct bch_member) * nr_devs +
158                     sizeof(struct bch_sb_field_crypt));
159
160         sb->version     = cpu_to_le64(BCACHE_SB_VERSION_CDEV_V4);
161         sb->magic       = BCACHE_MAGIC;
162         sb->block_size  = cpu_to_le16(opts.block_size);
163         sb->user_uuid   = opts.uuid;
164         sb->nr_devices  = nr_devs;
165
166         uuid_generate(sb->uuid.b);
167
168         if (opts.label)
169                 strncpy((char *) sb->label, opts.label, sizeof(sb->label));
170
171         SET_BCH_SB_CSUM_TYPE(sb,                opts.meta_csum_type);
172         SET_BCH_SB_META_CSUM_TYPE(sb,           opts.meta_csum_type);
173         SET_BCH_SB_DATA_CSUM_TYPE(sb,           opts.data_csum_type);
174         SET_BCH_SB_COMPRESSION_TYPE(sb,         opts.compression_type);
175
176         SET_BCH_SB_BTREE_NODE_SIZE(sb,          opts.btree_node_size);
177         SET_BCH_SB_GC_RESERVE(sb,               8);
178         SET_BCH_SB_META_REPLICAS_WANT(sb,       opts.meta_replicas);
179         SET_BCH_SB_META_REPLICAS_REQ(sb,        opts.meta_replicas_required);
180         SET_BCH_SB_DATA_REPLICAS_WANT(sb,       opts.data_replicas);
181         SET_BCH_SB_DATA_REPLICAS_REQ(sb,        opts.data_replicas_required);
182         SET_BCH_SB_ERROR_ACTION(sb,             opts.on_error_action);
183         SET_BCH_SB_STR_HASH_TYPE(sb,            BCH_STR_HASH_SIPHASH);
184
185         struct timespec now;
186         if (clock_gettime(CLOCK_REALTIME, &now))
187                 die("error getting current time: %m");
188
189         sb->time_base_lo        = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
190         sb->time_precision      = cpu_to_le32(1);
191
192         if (opts.encrypted) {
193                 struct bch_sb_field_crypt *crypt = vstruct_end(sb);
194
195                 u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
196
197                 le32_add_cpu(&sb->u64s, u64s);
198                 crypt->field.u64s = cpu_to_le32(u64s);
199                 crypt->field.type = BCH_SB_FIELD_crypt;
200
201                 bch_sb_crypt_init(sb, crypt, opts.passphrase);
202                 SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
203         }
204
205         mi = vstruct_end(sb);
206         u64s = (sizeof(struct bch_sb_field_members) +
207                 sizeof(struct bch_member) * nr_devs) / sizeof(u64);
208
209         le32_add_cpu(&sb->u64s, u64s);
210         mi->field.u64s = cpu_to_le32(u64s);
211         mi->field.type = BCH_SB_FIELD_members;
212
213         for (i = devs; i < devs + nr_devs; i++) {
214                 struct bch_member *m = mi->members + (i - devs);
215
216                 uuid_generate(m->uuid.b);
217                 m->nbuckets     = cpu_to_le64(i->nbuckets);
218                 m->first_bucket = 0;
219                 m->bucket_size  = cpu_to_le16(i->bucket_size);
220
221                 SET_BCH_MEMBER_TIER(m,          i->tier);
222                 SET_BCH_MEMBER_REPLACEMENT(m,   CACHE_REPLACEMENT_LRU);
223                 SET_BCH_MEMBER_DISCARD(m,       i->discard);
224         }
225
226         for (i = devs; i < devs + nr_devs; i++) {
227                 sb->dev_idx = i - devs;
228
229                 init_layout(&sb->layout, opts.block_size,
230                             i->sb_offset, i->sb_end);
231
232                 if (i->sb_offset == BCH_SB_SECTOR) {
233                         /* Zero start of disk */
234                         static const char zeroes[BCH_SB_SECTOR << 9];
235
236                         xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
237                 }
238
239                 bch2_super_write(i->fd, sb);
240                 close(i->fd);
241         }
242
243         return sb;
244 }
245
246 void bch2_super_write(int fd, struct bch_sb *sb)
247 {
248         struct nonce nonce = { 0 };
249
250         unsigned i;
251         for (i = 0; i < sb->layout.nr_superblocks; i++) {
252                 sb->offset = sb->layout.sb_offset[i];
253
254                 if (sb->offset == BCH_SB_SECTOR) {
255                         /* Write backup layout */
256                         xpwrite(fd, &sb->layout, sizeof(sb->layout),
257                                 BCH_SB_LAYOUT_SECTOR << 9);
258                 }
259
260                 sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
261                 xpwrite(fd, sb, vstruct_bytes(sb),
262                         le64_to_cpu(sb->offset) << 9);
263         }
264
265         fsync(fd);
266 }
267
268 struct bch_sb *__bch2_super_read(int fd, u64 sector)
269 {
270         struct bch_sb sb, *ret;
271
272         xpread(fd, &sb, sizeof(sb), sector << 9);
273
274         if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
275                 die("not a bcachefs superblock");
276
277         size_t bytes = vstruct_bytes(&sb);
278
279         ret = malloc(bytes);
280
281         xpread(fd, ret, bytes, sector << 9);
282
283         return ret;
284 }
285
286 struct bch_sb *bch2_super_read(const char *path)
287 {
288         int fd = xopen(path, O_RDONLY);
289         struct bch_sb *sb = __bch2_super_read(fd, BCH_SB_SECTOR);
290         close(fd);
291         return sb;
292 }
293
294 void bch2_super_print(struct bch_sb *sb, int units)
295 {
296         struct bch_sb_field_members *mi;
297         char user_uuid_str[40], internal_uuid_str[40], member_uuid_str[40];
298         char label[BCH_SB_LABEL_SIZE + 1];
299         unsigned i;
300
301         memset(label, 0, sizeof(label));
302         memcpy(label, sb->label, sizeof(sb->label));
303         uuid_unparse(sb->user_uuid.b, user_uuid_str);
304         uuid_unparse(sb->uuid.b, internal_uuid_str);
305
306         printf("External UUID:                  %s\n"
307                "Internal UUID:                  %s\n"
308                "Label:                          %s\n"
309                "Version:                        %llu\n"
310                "Block_size:                     %s\n"
311                "Btree node size:                %s\n"
312                "Error action:                   %s\n"
313                "Clean:                          %llu\n"
314
315                "Metadata replicas:              have %llu, want %llu\n"
316                "Data replicas:                  have %llu, want %llu\n"
317
318                "Metadata checksum type:         %s\n"
319                "Data checksum type:             %s\n"
320                "Compression type:               %s\n"
321
322                "String hash type:               %s\n"
323                "32 bit inodes:                  %llu\n"
324                "GC reserve percentage:          %llu%%\n"
325                "Root reserve percentage:        %llu%%\n"
326
327                "Devices:                        %u\n",
328                user_uuid_str,
329                internal_uuid_str,
330                label,
331                le64_to_cpu(sb->version),
332                pr_units(le16_to_cpu(sb->block_size), units),
333                pr_units(BCH_SB_BTREE_NODE_SIZE(sb), units),
334
335                BCH_SB_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
336                ? bch2_error_actions[BCH_SB_ERROR_ACTION(sb)]
337                : "unknown",
338
339                BCH_SB_CLEAN(sb),
340
341                0LLU, //BCH_SB_META_REPLICAS_HAVE(sb),
342                BCH_SB_META_REPLICAS_WANT(sb),
343                0LLU, //BCH_SB_DATA_REPLICAS_HAVE(sb),
344                BCH_SB_DATA_REPLICAS_WANT(sb),
345
346                BCH_SB_META_CSUM_TYPE(sb) < BCH_CSUM_NR
347                ? bch2_csum_types[BCH_SB_META_CSUM_TYPE(sb)]
348                : "unknown",
349
350                BCH_SB_DATA_CSUM_TYPE(sb) < BCH_CSUM_NR
351                ? bch2_csum_types[BCH_SB_DATA_CSUM_TYPE(sb)]
352                : "unknown",
353
354                BCH_SB_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_NR
355                ? bch2_compression_types[BCH_SB_COMPRESSION_TYPE(sb)]
356                : "unknown",
357
358                BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR
359                ? bch2_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)]
360                : "unknown",
361
362                BCH_SB_INODE_32BIT(sb),
363                BCH_SB_GC_RESERVE(sb),
364                BCH_SB_ROOT_RESERVE(sb),
365
366                sb->nr_devices);
367
368         mi = bch2_sb_get_members(sb);
369         if (!mi) {
370                 printf("Member info section missing\n");
371                 return;
372         }
373
374         for (i = 0; i < sb->nr_devices; i++) {
375                 struct bch_member *m = mi->members + i;
376                 time_t last_mount = le64_to_cpu(m->last_mount);
377
378                 uuid_unparse(m->uuid.b, member_uuid_str);
379
380                 printf("\n"
381                        "Device %u:\n"
382                        "  UUID:                         %s\n"
383                        "  Size:                         %s\n"
384                        "  Bucket size:                  %s\n"
385                        "  First bucket:                 %u\n"
386                        "  Buckets:                      %llu\n"
387                        "  Last mount:                   %s\n"
388                        "  State:                        %s\n"
389                        "  Tier:                         %llu\n"
390                        "  Has metadata:                 %llu\n"
391                        "  Has data:                     %llu\n"
392                        "  Replacement policy:           %s\n"
393                        "  Discard:                      %llu\n",
394                        i, member_uuid_str,
395                        pr_units(le16_to_cpu(m->bucket_size) *
396                                 le64_to_cpu(m->nbuckets), units),
397                        pr_units(le16_to_cpu(m->bucket_size), units),
398                        le16_to_cpu(m->first_bucket),
399                        le64_to_cpu(m->nbuckets),
400                        last_mount ? ctime(&last_mount) : "(never)",
401
402                        BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR
403                        ? bch2_dev_state[BCH_MEMBER_STATE(m)]
404                        : "unknown",
405
406                        BCH_MEMBER_TIER(m),
407                        0LLU, //BCH_MEMBER_HAS_METADATA(m),
408                        0LLU, //BCH_MEMBER_HAS_DATA(m),
409
410                        BCH_MEMBER_REPLACEMENT(m) < CACHE_REPLACEMENT_NR
411                        ? bch2_cache_replacement_policies[BCH_MEMBER_REPLACEMENT(m)]
412                        : "unknown",
413
414                        BCH_MEMBER_DISCARD(m));
415         }
416 }