]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache.c
f17bd4ad4d677a373bbc15468847f4c542a0c4c1
[bcachefs-tools-debian] / libbcache.c
1 #include <errno.h>
2 #include <fcntl.h>
3 #include <stdbool.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <unistd.h>
11
12 #include <uuid/uuid.h>
13
14 #include "ccan/ilog/ilog.h"
15
16 #include "bcache-ondisk.h"
17 #include "libbcache.h"
18 #include "crypto.h"
19
20 #define BCH_MIN_NR_NBUCKETS     (1 << 10)
21
22 /* first bucket should start 1 mb in, in sectors: */
23 #define FIRST_BUCKET_OFFSET     (1 << 11)
24
25 void __do_write_sb(int fd, void *sb, size_t bytes)
26 {
27         char zeroes[SB_SECTOR << 9] = {0};
28
29         /* Zero start of disk */
30         if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) {
31                 perror("write error trying to zero start of disk\n");
32                 exit(EXIT_FAILURE);
33         }
34         /* Write superblock */
35         if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) {
36                 perror("write error trying to write superblock\n");
37                 exit(EXIT_FAILURE);
38         }
39
40         fsync(fd);
41         close(fd);
42 }
43
44 #define do_write_sb(_fd, _sb)                   \
45         __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
46
47 /* minimum size filesystem we can create, given a bucket size: */
48 static u64 min_size(unsigned bucket_size)
49 {
50         return (DIV_ROUND_UP(FIRST_BUCKET_OFFSET, bucket_size) +
51                 BCH_MIN_NR_NBUCKETS) * bucket_size;
52 }
53
54 void bcache_format(struct dev_opts *devs, size_t nr_devs,
55                    unsigned block_size,
56                    unsigned btree_node_size,
57                    unsigned meta_csum_type,
58                    unsigned data_csum_type,
59                    unsigned compression_type,
60                    const char *passphrase,
61                    unsigned meta_replicas,
62                    unsigned data_replicas,
63                    unsigned on_error_action,
64                    char *label,
65                    uuid_le uuid)
66 {
67         struct cache_sb *sb;
68         struct dev_opts *i;
69
70         /* calculate block size: */
71         if (!block_size)
72                 for (i = devs; i < devs + nr_devs; i++)
73                         block_size = max(block_size,
74                                          get_blocksize(i->path, i->fd));
75
76         /* calculate bucket sizes: */
77         for (i = devs; i < devs + nr_devs; i++) {
78                 if (!i->size)
79                         i->size = get_size(i->path, i->fd);
80
81                 if (!i->bucket_size) {
82                         if (i->size < min_size(block_size))
83                                 die("cannot format %s, too small (%llu sectors, min %llu)",
84                                     i->path, i->size, min_size(block_size));
85
86                         /* Want a bucket size of at least 128k, if possible: */
87                         i->bucket_size = max(block_size, 256U);
88
89                         if (i->size >= min_size(i->bucket_size)) {
90                                 unsigned scale = max(1U,
91                                         ilog2(i->size / min_size(i->bucket_size)) / 4);
92
93                                 scale = rounddown_pow_of_two(scale);
94
95                                 /* max bucket size 1 mb */
96                                 i->bucket_size = min(i->bucket_size * scale, 1U << 11);
97                         } else {
98                                 do {
99                                         i->bucket_size /= 2;
100                                 } while (i->size < min_size(i->bucket_size));
101                         }
102                 }
103
104                 /* first bucket: 1 mb in */
105                 i->first_bucket = DIV_ROUND_UP(FIRST_BUCKET_OFFSET, i->bucket_size);
106                 i->nbuckets     = i->size / i->bucket_size;
107
108                 if (i->bucket_size < block_size)
109                         die("Bucket size cannot be smaller than block size");
110
111                 if (i->nbuckets - i->first_bucket < BCH_MIN_NR_NBUCKETS)
112                         die("Not enough buckets: %llu, need %u (bucket size %u)",
113                             i->nbuckets - i->first_bucket, BCH_MIN_NR_NBUCKETS,
114                             i->bucket_size);
115         }
116
117         /* calculate btree node size: */
118         if (!btree_node_size) {
119                 /* 256k default btree node size */
120                 btree_node_size = 512;
121
122                 for (i = devs; i < devs + nr_devs; i++)
123                         btree_node_size = min(btree_node_size, i->bucket_size);
124         }
125
126         sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) * nr_devs);
127
128         sb->offset      = __cpu_to_le64(SB_SECTOR);
129         sb->version     = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3);
130         sb->magic       = BCACHE_MAGIC;
131         sb->block_size  = __cpu_to_le16(block_size);
132         sb->user_uuid   = uuid;
133         sb->nr_in_set   = nr_devs;
134
135         uuid_generate(sb->set_uuid.b);
136
137         if (label)
138                 strncpy((char *) sb->label, label, sizeof(sb->label));
139
140         /*
141          * don't have a userspace crc32c implementation handy, just always use
142          * crc64
143          */
144         SET_CACHE_SB_CSUM_TYPE(sb,              BCH_CSUM_CRC64);
145         SET_CACHE_SET_META_CSUM_TYPE(sb,        meta_csum_type);
146         SET_CACHE_SET_DATA_CSUM_TYPE(sb,        data_csum_type);
147         SET_CACHE_SET_COMPRESSION_TYPE(sb,      compression_type);
148
149         SET_CACHE_SET_BTREE_NODE_SIZE(sb,       btree_node_size);
150         SET_CACHE_SET_META_REPLICAS_WANT(sb,    meta_replicas);
151         SET_CACHE_SET_META_REPLICAS_HAVE(sb,    meta_replicas);
152         SET_CACHE_SET_DATA_REPLICAS_WANT(sb,    data_replicas);
153         SET_CACHE_SET_DATA_REPLICAS_HAVE(sb,    data_replicas);
154         SET_CACHE_SET_ERROR_ACTION(sb,          on_error_action);
155         SET_CACHE_SET_STR_HASH_TYPE(sb,         BCH_STR_HASH_SIPHASH);
156
157         if (passphrase) {
158                 struct bcache_key key;
159                 struct bcache_disk_key disk_key;
160
161                 derive_passphrase(&key, passphrase);
162                 disk_key_init(&disk_key);
163                 disk_key_encrypt(sb, &disk_key, &key);
164
165                 memcpy(sb->encryption_key, &disk_key, sizeof(disk_key));
166                 SET_CACHE_SET_ENCRYPTION_TYPE(sb, 1);
167                 SET_CACHE_SET_ENCRYPTION_KEY(sb, 1);
168
169                 memzero_explicit(&disk_key, sizeof(disk_key));
170                 memzero_explicit(&key, sizeof(key));
171         }
172
173         for (i = devs; i < devs + nr_devs; i++) {
174                 struct cache_member *m = sb->members + (i - devs);
175
176                 uuid_generate(m->uuid.b);
177                 m->nbuckets     = __cpu_to_le64(i->nbuckets);
178                 m->first_bucket = __cpu_to_le16(i->first_bucket);
179                 m->bucket_size  = __cpu_to_le16(i->bucket_size);
180
181                 SET_CACHE_TIER(m,               i->tier);
182                 SET_CACHE_REPLACEMENT(m,        CACHE_REPLACEMENT_LRU);
183                 SET_CACHE_DISCARD(m,            i->discard);
184         }
185
186         sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb));
187
188         for (i = devs; i < devs + nr_devs; i++) {
189                 struct cache_member *m = sb->members + (i - devs);
190                 char uuid_str[40], set_uuid_str[40];
191
192                 sb->disk_uuid   = m->uuid;
193                 sb->nr_this_dev = i - devs;
194                 sb->csum        = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s),
195                                                            CACHE_SB_CSUM_TYPE(sb)));
196
197                 uuid_unparse(sb->disk_uuid.b, uuid_str);
198                 uuid_unparse(sb->user_uuid.b, set_uuid_str);
199                 printf("UUID:                   %s\n"
200                        "Set UUID:               %s\n"
201                        "version:                %u\n"
202                        "nbuckets:               %llu\n"
203                        "block_size:             %u\n"
204                        "bucket_size:            %u\n"
205                        "nr_in_set:              %u\n"
206                        "nr_this_dev:            %u\n"
207                        "first_bucket:           %u\n",
208                        uuid_str, set_uuid_str,
209                        (unsigned) sb->version,
210                        __le64_to_cpu(m->nbuckets),
211                        __le16_to_cpu(sb->block_size),
212                        __le16_to_cpu(m->bucket_size),
213                        sb->nr_in_set,
214                        sb->nr_this_dev,
215                        __le16_to_cpu(m->first_bucket));
216
217                 do_write_sb(i->fd, sb);
218         }
219
220         free(sb);
221 }
222
223 void bcache_super_read(const char *path, struct cache_sb *sb)
224 {
225         int fd = open(path, O_RDONLY);
226         if (fd < 0)
227                 die("couldn't open %s", path);
228
229         if (pread(fd, sb, sizeof(*sb), SB_SECTOR << 9) != sizeof(*sb))
230                 die("error reading superblock");
231
232         if (memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic)))
233                 die("not a bcache superblock");
234 }