]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache/super-io.c
Delete more unused shim code, update bcache code
[bcachefs-tools-debian] / libbcache / super-io.c
1
2 #include "bcache.h"
3 #include "blockdev.h"
4 #include "checksum.h"
5 #include "error.h"
6 #include "io.h"
7 #include "journal.h"
8 #include "super-io.h"
9 #include "super.h"
10 #include "vstructs.h"
11
12 #include <linux/backing-dev.h>
13
14 static inline void __bch_sb_layout_size_assert(void)
15 {
16         BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
17 }
18
19 struct bch_sb_field *bch_sb_field_get(struct bch_sb *sb,
20                                       enum bch_sb_field_types type)
21 {
22         struct bch_sb_field *f;
23
24         /* XXX: need locking around superblock to access optional fields */
25
26         vstruct_for_each(sb, f)
27                 if (le32_to_cpu(f->type) == type)
28                         return f;
29         return NULL;
30 }
31
32 void bch_free_super(struct bcache_superblock *sb)
33 {
34         if (sb->bio)
35                 bio_put(sb->bio);
36         if (!IS_ERR_OR_NULL(sb->bdev))
37                 blkdev_put(sb->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
38
39         free_pages((unsigned long) sb->sb, sb->page_order);
40         memset(sb, 0, sizeof(*sb));
41 }
42
43 static int __bch_super_realloc(struct bcache_superblock *sb, unsigned order)
44 {
45         struct bch_sb *new_sb;
46         struct bio *bio;
47
48         if (sb->page_order >= order && sb->sb)
49                 return 0;
50
51         if (dynamic_fault("bcache:add:super_realloc"))
52                 return -ENOMEM;
53
54         bio = bio_kmalloc(GFP_KERNEL, 1 << order);
55         if (!bio)
56                 return -ENOMEM;
57
58         if (sb->bio)
59                 bio_put(sb->bio);
60         sb->bio = bio;
61
62         new_sb = (void *) __get_free_pages(GFP_KERNEL, order);
63         if (!new_sb)
64                 return -ENOMEM;
65
66         if (sb->sb)
67                 memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
68
69         free_pages((unsigned long) sb->sb, sb->page_order);
70         sb->sb = new_sb;
71
72         sb->page_order = order;
73
74         return 0;
75 }
76
77 int bch_dev_sb_realloc(struct bcache_superblock *sb, unsigned u64s)
78 {
79         u64 new_bytes = __vstruct_bytes(struct bch_sb, u64s);
80         u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
81
82         if (new_bytes > max_bytes) {
83                 char buf[BDEVNAME_SIZE];
84
85                 pr_err("%s: superblock too big: want %llu but have %llu",
86                        bdevname(sb->bdev, buf), new_bytes, max_bytes);
87                 return -ENOSPC;
88         }
89
90         return __bch_super_realloc(sb, get_order(new_bytes));
91 }
92
93 static int bch_fs_sb_realloc(struct cache_set *c, unsigned u64s)
94 {
95         u64 bytes = __vstruct_bytes(struct bch_sb, u64s);
96         struct bch_sb *sb;
97         unsigned order = get_order(bytes);
98
99         if (c->disk_sb && order <= c->disk_sb_order)
100                 return 0;
101
102         sb = (void *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
103         if (!sb)
104                 return -ENOMEM;
105
106         if (c->disk_sb)
107                 memcpy(sb, c->disk_sb, PAGE_SIZE << c->disk_sb_order);
108
109         free_pages((unsigned long) c->disk_sb, c->disk_sb_order);
110
111         c->disk_sb = sb;
112         c->disk_sb_order = order;
113         return 0;
114 }
115
116 static struct bch_sb_field *__bch_sb_field_resize(struct bch_sb *sb,
117                                                   struct bch_sb_field *f,
118                                                   unsigned u64s)
119 {
120         unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
121
122         if (!f) {
123                 f = vstruct_last(sb);
124                 memset(f, 0, sizeof(u64) * u64s);
125                 f->u64s = cpu_to_le32(u64s);
126                 f->type = 0;
127         } else {
128                 void *src, *dst;
129
130                 src = vstruct_end(f);
131                 f->u64s = cpu_to_le32(u64s);
132                 dst = vstruct_end(f);
133
134                 memmove(dst, src, vstruct_end(sb) - src);
135
136                 if (dst > src)
137                         memset(src, 0, dst - src);
138         }
139
140         le32_add_cpu(&sb->u64s, u64s - old_u64s);
141
142         return f;
143
144 }
145
146 struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c,
147                                             struct bch_sb_field *f,
148                                             unsigned u64s)
149 {
150         ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
151         ssize_t d = -old_u64s + u64s;
152         struct cache *ca;
153         unsigned i;
154
155         lockdep_assert_held(&c->sb_lock);
156
157         if (bch_fs_sb_realloc(c, le32_to_cpu(c->disk_sb->u64s) + d))
158                 return NULL;
159
160         for_each_cache(ca, c, i) {
161                 struct bcache_superblock *sb = &ca->disk_sb;
162
163                 if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
164                         percpu_ref_put(&ca->ref);
165                         return NULL;
166                 }
167         }
168
169         return __bch_sb_field_resize(c->disk_sb, f, u64s);
170 }
171
172 struct bch_sb_field *bch_dev_sb_field_resize(struct bcache_superblock *sb,
173                                              struct bch_sb_field *f,
174                                              unsigned u64s)
175 {
176         ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
177         ssize_t d = -old_u64s + u64s;
178
179         if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
180                 return NULL;
181
182         return __bch_sb_field_resize(sb->sb, f, u64s);
183 }
184
185 static const char *validate_sb_layout(struct bch_sb_layout *layout)
186 {
187         u64 offset, prev_offset, max_sectors;
188         unsigned i;
189
190         if (uuid_le_cmp(layout->magic, BCACHE_MAGIC))
191                 return "Not a bcache superblock layout";
192
193         if (layout->layout_type != 0)
194                 return "Invalid superblock layout type";
195
196         if (!layout->nr_superblocks)
197                 return "Invalid superblock layout: no superblocks";
198
199         if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset))
200                 return "Invalid superblock layout: too many superblocks";
201
202         max_sectors = 1 << layout->sb_max_size_bits;
203
204         prev_offset = le64_to_cpu(layout->sb_offset[0]);
205
206         if (prev_offset != BCH_SB_SECTOR)
207                 return "Invalid superblock layout: doesn't have default superblock location";
208
209         for (i = 1; i < layout->nr_superblocks; i++) {
210                 offset = le64_to_cpu(layout->sb_offset[i]);
211
212                 if (offset < prev_offset + max_sectors)
213                         return "Invalid superblock layout: superblocks overlap";
214                 prev_offset = offset;
215         }
216
217         return NULL;
218 }
219
220 const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
221 {
222         struct bch_sb *sb = disk_sb->sb;
223         struct bch_sb_field *f;
224         struct bch_sb_field_members *sb_mi;
225         struct bch_sb_field_journal *journal;
226         struct cache_member_cpu mi;
227         const char *err;
228         u16 block_size;
229         unsigned i;
230
231         switch (le64_to_cpu(sb->version)) {
232         case BCACHE_SB_VERSION_CDEV_V4:
233                 break;
234         default:
235                 return"Unsupported superblock version";
236         }
237
238         if (BCH_SB_INITIALIZED(sb) &&
239             le64_to_cpu(sb->version) != BCACHE_SB_VERSION_CDEV_V4)
240                 return "Unsupported superblock version";
241
242         block_size = le16_to_cpu(sb->block_size);
243
244         if (!is_power_of_2(block_size) ||
245             block_size > PAGE_SECTORS)
246                 return "Bad block size";
247
248         if (bch_is_zero(sb->user_uuid.b, sizeof(uuid_le)))
249                 return "Bad user UUID";
250
251         if (bch_is_zero(sb->uuid.b, sizeof(uuid_le)))
252                 return "Bad internal UUID";
253
254         if (!sb->nr_devices ||
255             sb->nr_devices <= sb->dev_idx ||
256             sb->nr_devices > BCH_SB_MEMBERS_MAX)
257                 return "Bad cache device number in set";
258
259         if (!BCH_SB_META_REPLICAS_WANT(sb) ||
260             BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
261                 return "Invalid number of metadata replicas";
262
263         if (!BCH_SB_META_REPLICAS_HAVE(sb) ||
264             BCH_SB_META_REPLICAS_HAVE(sb) >
265             BCH_SB_META_REPLICAS_WANT(sb))
266                 return "Invalid number of metadata replicas";
267
268         if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
269             BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
270                 return "Invalid number of data replicas";
271
272         if (!BCH_SB_DATA_REPLICAS_HAVE(sb) ||
273             BCH_SB_DATA_REPLICAS_HAVE(sb) >
274             BCH_SB_DATA_REPLICAS_WANT(sb))
275                 return "Invalid number of data replicas";
276
277         if (!BCH_SB_BTREE_NODE_SIZE(sb))
278                 return "Btree node size not set";
279
280         if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
281                 return "Btree node size not a power of two";
282
283         if (BCH_SB_BTREE_NODE_SIZE(sb) > BTREE_NODE_SIZE_MAX)
284                 return "Btree node size too large";
285
286         if (BCH_SB_GC_RESERVE(sb) < 5)
287                 return "gc reserve percentage too small";
288
289         if (1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb) < block_size)
290                 return "max journal entry size too small";
291
292         /* 4 mb max: */
293         if (512U << BCH_SB_JOURNAL_ENTRY_SIZE(sb) > JOURNAL_ENTRY_SIZE_MAX)
294                 return "max journal entry size too big";
295
296         if (!sb->time_precision ||
297             le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
298                 return "invalid time precision";
299
300         /* validate layout */
301         err = validate_sb_layout(&sb->layout);
302         if (err)
303                 return err;
304
305         vstruct_for_each(sb, f) {
306                 if (!f->u64s)
307                         return "Invalid superblock: invalid optional field";
308
309                 if (vstruct_next(f) > vstruct_last(sb))
310                         return "Invalid superblock: invalid optional field";
311
312                 if (le32_to_cpu(f->type) >= BCH_SB_FIELD_NR)
313                         return "Invalid superblock: unknown optional field type";
314         }
315
316         /* Validate member info: */
317         sb_mi = bch_sb_get_members(sb);
318         if (!sb_mi)
319                 return "Invalid superblock: member info area missing";
320
321         if ((void *) (sb_mi->members + sb->nr_devices) >
322             vstruct_end(&sb_mi->field))
323                 return "Invalid superblock: bad member info";
324
325         mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx);
326
327         for (i = 0; i < sb->layout.nr_superblocks; i++) {
328                 u64 offset = le64_to_cpu(sb->layout.sb_offset[i]);
329                 u64 max_size = 1 << sb->layout.sb_max_size_bits;
330
331                 if (offset + max_size > mi.first_bucket * mi.bucket_size)
332                         return "Invalid superblock: first bucket comes before end of super";
333         }
334
335         if (mi.nbuckets > LONG_MAX)
336                 return "Too many buckets";
337
338         if (mi.nbuckets - mi.first_bucket < 1 << 10)
339                 return "Not enough buckets";
340
341         if (!is_power_of_2(mi.bucket_size) ||
342             mi.bucket_size < PAGE_SECTORS ||
343             mi.bucket_size < block_size)
344                 return "Bad bucket size";
345
346         if (get_capacity(disk_sb->bdev->bd_disk) <
347             mi.bucket_size * mi.nbuckets)
348                 return "Invalid superblock: device too small";
349
350         /* Validate journal buckets: */
351         journal = bch_sb_get_journal(sb);
352         if (journal) {
353                 for (i = 0; i < bch_nr_journal_buckets(journal); i++) {
354                         u64 b = le64_to_cpu(journal->buckets[i]);
355
356                         if (b <  mi.first_bucket || b >= mi.nbuckets)
357                                 return "bad journal bucket";
358                 }
359         }
360
361         return NULL;
362 }
363
364 /* device open: */
365
366 static bool bch_is_open_cache(struct block_device *bdev)
367 {
368         struct cache_set *c;
369         struct cache *ca;
370         unsigned i;
371
372         rcu_read_lock();
373         list_for_each_entry(c, &bch_fs_list, list)
374                 for_each_cache_rcu(ca, c, i)
375                         if (ca->disk_sb.bdev == bdev) {
376                                 rcu_read_unlock();
377                                 return true;
378                         }
379         rcu_read_unlock();
380         return false;
381 }
382
383 static bool bch_is_open(struct block_device *bdev)
384 {
385         lockdep_assert_held(&bch_register_lock);
386
387         return bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
388 }
389
390 static const char *bch_blkdev_open(const char *path, void *holder,
391                                    struct bch_opts opts,
392                                    struct block_device **ret)
393 {
394         struct block_device *bdev;
395         fmode_t mode = opts.nochanges > 0
396                 ? FMODE_READ
397                 : FMODE_READ|FMODE_WRITE|FMODE_EXCL;
398         const char *err;
399
400         *ret = NULL;
401         bdev = blkdev_get_by_path(path, mode, holder);
402
403         if (bdev == ERR_PTR(-EBUSY)) {
404                 bdev = lookup_bdev(path);
405                 if (IS_ERR(bdev))
406                         return "device busy";
407
408                 err = bch_is_open(bdev)
409                         ? "device already registered"
410                         : "device busy";
411
412                 bdput(bdev);
413                 return err;
414         }
415
416         if (IS_ERR(bdev))
417                 return "failed to open device";
418
419         bdev_get_queue(bdev)->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
420
421         *ret = bdev;
422         return NULL;
423 }
424
425 /* Update cached mi: */
426 int bch_fs_mi_update(struct cache_set *c, struct bch_member *mi,
427                      unsigned nr_devices)
428 {
429         struct cache_member_rcu *new, *old;
430         struct cache *ca;
431         unsigned i;
432
433         lockdep_assert_held(&c->sb_lock);
434
435         new = kzalloc(sizeof(struct cache_member_rcu) +
436                       sizeof(struct cache_member_cpu) * nr_devices,
437                       GFP_KERNEL);
438         if (!new)
439                 return -ENOMEM;
440
441         new->nr_devices = nr_devices;
442
443         for (i = 0; i < nr_devices; i++)
444                 new->m[i] = cache_mi_to_cpu_mi(&mi[i]);
445
446         rcu_read_lock();
447         for_each_cache(ca, c, i)
448                 ca->mi = new->m[i];
449         rcu_read_unlock();
450
451         old = rcu_dereference_protected(c->members,
452                                 lockdep_is_held(&c->sb_lock));
453
454         rcu_assign_pointer(c->members, new);
455         if (old)
456                 kfree_rcu(old, rcu);
457
458         return 0;
459 }
460
461 static void bch_sb_update(struct cache_set *c)
462 {
463         struct bch_sb *src = c->disk_sb;
464
465         lockdep_assert_held(&c->sb_lock);
466
467         c->sb.uuid              = src->uuid;
468         c->sb.user_uuid         = src->user_uuid;
469         c->sb.block_size        = le16_to_cpu(src->block_size);
470         c->sb.btree_node_size   = BCH_SB_BTREE_NODE_SIZE(src);
471         c->sb.nr_devices        = src->nr_devices;
472         c->sb.clean             = BCH_SB_CLEAN(src);
473         c->sb.meta_replicas_have= BCH_SB_META_REPLICAS_HAVE(src);
474         c->sb.data_replicas_have= BCH_SB_DATA_REPLICAS_HAVE(src);
475         c->sb.str_hash_type     = BCH_SB_STR_HASH_TYPE(src);
476         c->sb.encryption_type   = BCH_SB_ENCRYPTION_TYPE(src);
477         c->sb.time_base_lo      = le64_to_cpu(src->time_base_lo);
478         c->sb.time_base_hi      = le32_to_cpu(src->time_base_hi);
479         c->sb.time_precision    = le32_to_cpu(src->time_precision);
480 }
481
482 /* doesn't copy member info */
483 static void __copy_super(struct bch_sb *dst, struct bch_sb *src)
484 {
485         struct bch_sb_field *src_f, *dst_f;
486
487         dst->version            = src->version;
488         dst->seq                = src->seq;
489         dst->uuid               = src->uuid;
490         dst->user_uuid          = src->user_uuid;
491         memcpy(dst->label,      src->label, sizeof(dst->label));
492
493         dst->block_size         = src->block_size;
494         dst->nr_devices         = src->nr_devices;
495
496         dst->time_base_lo       = src->time_base_lo;
497         dst->time_base_hi       = src->time_base_hi;
498         dst->time_precision     = src->time_precision;
499
500         memcpy(dst->flags,      src->flags,     sizeof(dst->flags));
501         memcpy(dst->features,   src->features,  sizeof(dst->features));
502         memcpy(dst->compat,     src->compat,    sizeof(dst->compat));
503
504         vstruct_for_each(src, src_f) {
505                 if (src_f->type == BCH_SB_FIELD_journal)
506                         continue;
507
508                 dst_f = bch_sb_field_get(dst, src_f->type);
509                 dst_f = __bch_sb_field_resize(dst, dst_f,
510                                 le32_to_cpu(src_f->u64s));
511
512                 memcpy(dst_f, src_f, vstruct_bytes(src_f));
513         }
514 }
515
516 int bch_sb_to_cache_set(struct cache_set *c, struct bch_sb *src)
517 {
518         struct bch_sb_field_members *members =
519                 bch_sb_get_members(src);
520         struct bch_sb_field_journal *journal_buckets =
521                 bch_sb_get_journal(src);
522         unsigned journal_u64s = journal_buckets
523                 ? le32_to_cpu(journal_buckets->field.u64s)
524                 : 0;
525
526         lockdep_assert_held(&c->sb_lock);
527
528         if (bch_fs_sb_realloc(c, le32_to_cpu(src->u64s) - journal_u64s))
529                 return -ENOMEM;
530
531         if (bch_fs_mi_update(c, members->members, src->nr_devices))
532                 return -ENOMEM;
533
534         __copy_super(c->disk_sb, src);
535         bch_sb_update(c);
536
537         return 0;
538 }
539
540 int bch_sb_from_cache_set(struct cache_set *c, struct cache *ca)
541 {
542         struct bch_sb *src = c->disk_sb, *dst = ca->disk_sb.sb;
543         struct bch_sb_field_journal *journal_buckets =
544                 bch_sb_get_journal(dst);
545         unsigned journal_u64s = journal_buckets
546                 ? le32_to_cpu(journal_buckets->field.u64s)
547                 : 0;
548         unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
549         int ret;
550
551         ret = bch_dev_sb_realloc(&ca->disk_sb, u64s);
552         if (ret)
553                 return ret;
554
555         __copy_super(dst, src);
556
557         return 0;
558 }
559
560 /* read superblock: */
561
562 static const char *read_one_super(struct bcache_superblock *sb, u64 offset)
563 {
564         struct bch_csum csum;
565         size_t bytes;
566         unsigned order;
567 reread:
568         bio_reset(sb->bio);
569         sb->bio->bi_bdev = sb->bdev;
570         sb->bio->bi_iter.bi_sector = BCH_SB_SECTOR;
571         sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
572         bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
573         bch_bio_map(sb->bio, sb->sb);
574
575         if (submit_bio_wait(sb->bio))
576                 return "IO error";
577
578         if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
579                 return "Not a bcache superblock";
580
581         if (le64_to_cpu(sb->sb->version) != BCACHE_SB_VERSION_CDEV_V4)
582                 return "Unsupported superblock version";
583
584         bytes = vstruct_bytes(sb->sb);
585
586         if (bytes > 512 << sb->sb->layout.sb_max_size_bits)
587                 return "Bad superblock: too big";
588
589         order = get_order(bytes);
590         if (order > sb->page_order) {
591                 if (__bch_super_realloc(sb, order))
592                         return "cannot allocate memory";
593                 goto reread;
594         }
595
596         if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR)
597                 return "unknown csum type";
598
599         /* XXX: verify MACs */
600         csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
601                             (struct nonce) { 0 }, sb->sb);
602
603         if (bch_crc_cmp(csum, sb->sb->csum))
604                 return "bad checksum reading superblock";
605
606         return NULL;
607 }
608
609 const char *bch_read_super(struct bcache_superblock *sb,
610                            struct bch_opts opts,
611                            const char *path)
612 {
613         struct bch_sb_layout layout;
614         const char *err;
615         unsigned i;
616
617         lockdep_assert_held(&bch_register_lock);
618
619         memset(sb, 0, sizeof(*sb));
620
621         err = bch_blkdev_open(path, &sb, opts, &sb->bdev);
622         if (err)
623                 return err;
624
625         err = "cannot allocate memory";
626         if (__bch_super_realloc(sb, 0))
627                 goto err;
628
629         err = "dynamic fault";
630         if (bch_fs_init_fault("read_super"))
631                 goto err;
632
633         err = read_one_super(sb, BCH_SB_SECTOR);
634         if (!err)
635                 goto got_super;
636
637         pr_err("error reading default super: %s", err);
638
639         /*
640          * Error reading primary superblock - read location of backup
641          * superblocks:
642          */
643         bio_reset(sb->bio);
644         sb->bio->bi_bdev = sb->bdev;
645         sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
646         sb->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout);
647         bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
648         /*
649          * use sb buffer to read layout, since sb buffer is page aligned but
650          * layout won't be:
651          */
652         bch_bio_map(sb->bio, sb->sb);
653
654         err = "IO error";
655         if (submit_bio_wait(sb->bio))
656                 goto err;
657
658         memcpy(&layout, sb->sb, sizeof(layout));
659         err = validate_sb_layout(&layout);
660         if (err)
661                 goto err;
662
663         for (i = 0; i < layout.nr_superblocks; i++) {
664                 u64 offset = le64_to_cpu(layout.sb_offset[i]);
665
666                 if (offset == BCH_SB_SECTOR)
667                         continue;
668
669                 err = read_one_super(sb, offset);
670                 if (!err)
671                         goto got_super;
672         }
673         goto err;
674 got_super:
675         pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
676                  le64_to_cpu(sb->sb->version),
677                  le64_to_cpu(sb->sb->flags),
678                  le64_to_cpu(sb->sb->seq),
679                  le16_to_cpu(sb->sb->u64s));
680
681         err = "Superblock block size smaller than device block size";
682         if (le16_to_cpu(sb->sb->block_size) << 9 <
683             bdev_logical_block_size(sb->bdev))
684                 goto err;
685
686         return NULL;
687 err:
688         bch_free_super(sb);
689         return err;
690 }
691
692 /* write superblock: */
693
694 static void write_super_endio(struct bio *bio)
695 {
696         struct cache *ca = bio->bi_private;
697
698         /* XXX: return errors directly */
699
700         bch_dev_fatal_io_err_on(bio->bi_error, ca, "superblock write");
701
702         bch_account_io_completion(ca);
703
704         closure_put(&ca->set->sb_write);
705         percpu_ref_put(&ca->ref);
706 }
707
708 static bool write_one_super(struct cache_set *c, struct cache *ca, unsigned idx)
709 {
710         struct bch_sb *sb = ca->disk_sb.sb;
711         struct bio *bio = ca->disk_sb.bio;
712
713         if (idx >= sb->layout.nr_superblocks)
714                 return false;
715
716         sb->offset = sb->layout.sb_offset[idx];
717
718         SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
719         sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
720                                 (struct nonce) { 0 }, sb);
721
722         bio_reset(bio);
723         bio->bi_bdev            = ca->disk_sb.bdev;
724         bio->bi_iter.bi_sector  = le64_to_cpu(sb->offset);
725         bio->bi_iter.bi_size    =
726                 roundup(vstruct_bytes(sb),
727                         bdev_logical_block_size(ca->disk_sb.bdev));
728         bio->bi_end_io          = write_super_endio;
729         bio->bi_private         = ca;
730         bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
731         bch_bio_map(bio, sb);
732
733         percpu_ref_get(&ca->ref);
734         closure_bio_submit_punt(bio, &c->sb_write, c);
735
736         return true;
737 }
738
739 void bch_write_super(struct cache_set *c)
740 {
741         struct bch_sb_field_members *members =
742                 bch_sb_get_members(c->disk_sb);
743         struct closure *cl = &c->sb_write;
744         struct cache *ca;
745         unsigned i, super_idx = 0;
746         bool wrote;
747
748         lockdep_assert_held(&c->sb_lock);
749
750         closure_init_stack(cl);
751
752         le64_add_cpu(&c->disk_sb->seq, 1);
753
754         for_each_cache(ca, c, i)
755                 bch_sb_from_cache_set(c, ca);
756
757         do {
758                 wrote = false;
759                 for_each_cache(ca, c, i)
760                         if (write_one_super(c, ca, super_idx))
761                                 wrote = true;
762
763                 closure_sync(cl);
764                 super_idx++;
765         } while (wrote);
766
767         /* Make new options visible after they're persistent: */
768         bch_fs_mi_update(c, members->members, c->sb.nr_devices);
769         bch_sb_update(c);
770 }
771
772 void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
773                                    bool meta)
774 {
775         struct bch_member *mi;
776         struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
777         const struct bch_extent_ptr *ptr;
778
779         mutex_lock(&c->sb_lock);
780
781         /* recheck, might have raced */
782         if (bch_check_super_marked(c, k, meta)) {
783                 mutex_unlock(&c->sb_lock);
784                 return;
785         }
786
787         mi = bch_sb_get_members(c->disk_sb)->members;
788
789         extent_for_each_ptr(e, ptr)
790                 if (!ptr->cached)
791                         (meta
792                          ? SET_BCH_MEMBER_HAS_METADATA
793                          : SET_BCH_MEMBER_HAS_DATA)(mi + ptr->dev, true);
794
795         bch_write_super(c);
796         mutex_unlock(&c->sb_lock);
797 }