]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/replicas.c
Update bcachefs sources to 09a5465430 bcachefs: Don't need to walk inodes on clean...
[bcachefs-tools-debian] / libbcachefs / replicas.c
1
2 #include "bcachefs.h"
3 #include "journal.h"
4 #include "replicas.h"
5 #include "super-io.h"
6
7 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
8                                             struct bch_replicas_cpu *);
9
10 /* Replicas tracking - in memory: */
11
12 static inline int u8_cmp(u8 l, u8 r)
13 {
14         return (l > r) - (l < r);
15 }
16
17 static void verify_replicas_entry_sorted(struct bch_replicas_entry *e)
18 {
19 #ifdef CONFIG_BCACHES_DEBUG
20         unsigned i;
21
22         for (i = 0; i + 1 < e->nr_devs; i++)
23                 BUG_ON(e->devs[i] >= e->devs[i + 1]);
24 #endif
25 }
26
27 static void replicas_entry_sort(struct bch_replicas_entry *e)
28 {
29         bubble_sort(e->devs, e->nr_devs, u8_cmp);
30 }
31
32 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
33 {
34         eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
35 }
36
37 void bch2_replicas_entry_to_text(struct printbuf *out,
38                                  struct bch_replicas_entry *e)
39 {
40         unsigned i;
41
42         pr_buf(out, "%s: %u/%u [",
43                bch2_data_types[e->data_type],
44                e->nr_required,
45                e->nr_devs);
46
47         for (i = 0; i < e->nr_devs; i++)
48                 pr_buf(out, i ? " %u" : "%u", e->devs[i]);
49         pr_buf(out, "]");
50 }
51
52 void bch2_cpu_replicas_to_text(struct printbuf *out,
53                               struct bch_replicas_cpu *r)
54 {
55         struct bch_replicas_entry *e;
56         bool first = true;
57
58         for_each_cpu_replicas_entry(r, e) {
59                 if (!first)
60                         pr_buf(out, " ");
61                 first = false;
62
63                 bch2_replicas_entry_to_text(out, e);
64         }
65 }
66
67 static void extent_to_replicas(struct bkey_s_c k,
68                                struct bch_replicas_entry *r)
69 {
70         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
71         const union bch_extent_entry *entry;
72         struct extent_ptr_decoded p;
73
74         r->nr_required  = 1;
75
76         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
77                 if (p.ptr.cached)
78                         continue;
79
80                 if (p.ec_nr) {
81                         r->nr_devs = 0;
82                         break;
83                 }
84
85                 r->devs[r->nr_devs++] = p.ptr.dev;
86         }
87 }
88
89 static void stripe_to_replicas(struct bkey_s_c k,
90                                struct bch_replicas_entry *r)
91 {
92         struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
93         const struct bch_extent_ptr *ptr;
94
95         r->nr_required  = s.v->nr_blocks - s.v->nr_redundant;
96
97         for (ptr = s.v->ptrs;
98              ptr < s.v->ptrs + s.v->nr_blocks;
99              ptr++)
100                 r->devs[r->nr_devs++] = ptr->dev;
101 }
102
103 static void bkey_to_replicas(struct bch_replicas_entry *e,
104                              struct bkey_s_c k)
105 {
106         e->nr_devs = 0;
107
108         switch (k.k->type) {
109         case KEY_TYPE_btree_ptr:
110                 e->data_type = BCH_DATA_BTREE;
111                 extent_to_replicas(k, e);
112                 break;
113         case KEY_TYPE_extent:
114                 e->data_type = BCH_DATA_USER;
115                 extent_to_replicas(k, e);
116                 break;
117         case KEY_TYPE_stripe:
118                 e->data_type = BCH_DATA_USER;
119                 stripe_to_replicas(k, e);
120                 break;
121         }
122
123         replicas_entry_sort(e);
124 }
125
126 void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
127                               enum bch_data_type data_type,
128                               struct bch_devs_list devs)
129 {
130         unsigned i;
131
132         BUG_ON(!data_type ||
133                data_type == BCH_DATA_SB ||
134                data_type >= BCH_DATA_NR);
135
136         e->data_type    = data_type;
137         e->nr_devs      = 0;
138         e->nr_required  = 1;
139
140         for (i = 0; i < devs.nr; i++)
141                 e->devs[e->nr_devs++] = devs.devs[i];
142
143         replicas_entry_sort(e);
144 }
145
146 static struct bch_replicas_cpu
147 cpu_replicas_add_entry(struct bch_replicas_cpu *old,
148                        struct bch_replicas_entry *new_entry)
149 {
150         unsigned i;
151         struct bch_replicas_cpu new = {
152                 .nr             = old->nr + 1,
153                 .entry_size     = max_t(unsigned, old->entry_size,
154                                         replicas_entry_bytes(new_entry)),
155         };
156
157         BUG_ON(!new_entry->data_type);
158         verify_replicas_entry_sorted(new_entry);
159
160         new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
161         if (!new.entries)
162                 return new;
163
164         for (i = 0; i < old->nr; i++)
165                 memcpy(cpu_replicas_entry(&new, i),
166                        cpu_replicas_entry(old, i),
167                        old->entry_size);
168
169         memcpy(cpu_replicas_entry(&new, old->nr),
170                new_entry,
171                replicas_entry_bytes(new_entry));
172
173         bch2_cpu_replicas_sort(&new);
174         return new;
175 }
176
177 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
178                                        struct bch_replicas_entry *search)
179 {
180         int idx, entry_size = replicas_entry_bytes(search);
181
182         if (unlikely(entry_size > r->entry_size))
183                 return -1;
184
185         verify_replicas_entry_sorted(search);
186
187 #define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
188         idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
189                               entry_cmp, search);
190 #undef entry_cmp
191
192         return idx < r->nr ? idx : -1;
193 }
194
195 int bch2_replicas_entry_idx(struct bch_fs *c,
196                             struct bch_replicas_entry *search)
197 {
198         replicas_entry_sort(search);
199
200         return __replicas_entry_idx(&c->replicas, search);
201 }
202
203 static bool __replicas_has_entry(struct bch_replicas_cpu *r,
204                                  struct bch_replicas_entry *search)
205 {
206         return __replicas_entry_idx(r, search) >= 0;
207 }
208
209 bool bch2_replicas_marked(struct bch_fs *c,
210                           struct bch_replicas_entry *search,
211                           bool check_gc_replicas)
212 {
213         bool marked;
214
215         if (!search->nr_devs)
216                 return true;
217
218         verify_replicas_entry_sorted(search);
219
220         percpu_down_read_preempt_disable(&c->mark_lock);
221         marked = __replicas_has_entry(&c->replicas, search) &&
222                 (!check_gc_replicas ||
223                  likely((!c->replicas_gc.entries)) ||
224                  __replicas_has_entry(&c->replicas_gc, search));
225         percpu_up_read_preempt_enable(&c->mark_lock);
226
227         return marked;
228 }
229
230 static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
231                                     struct bch_replicas_cpu *dst_r,
232                                     struct bch_fs_usage __percpu *src_p,
233                                     struct bch_replicas_cpu *src_r)
234 {
235         unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
236         struct bch_fs_usage *dst, *src = (void *)
237                 bch2_acc_percpu_u64s((void *) src_p, src_nr);
238         int src_idx, dst_idx;
239
240         preempt_disable();
241         dst = this_cpu_ptr(dst_p);
242         preempt_enable();
243
244         *dst = *src;
245
246         for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
247                 if (!src->data[src_idx])
248                         continue;
249
250                 dst_idx = __replicas_entry_idx(dst_r,
251                                 cpu_replicas_entry(src_r, src_idx));
252                 BUG_ON(dst_idx < 0);
253
254                 dst->data[dst_idx] = src->data[src_idx];
255         }
256 }
257
258 /*
259  * Resize filesystem accounting:
260  */
261 static int replicas_table_update(struct bch_fs *c,
262                                  struct bch_replicas_cpu *new_r)
263 {
264         struct bch_fs_usage __percpu *new_usage[3] = { NULL, NULL, NULL };
265         unsigned bytes = sizeof(struct bch_fs_usage) +
266                 sizeof(u64) * new_r->nr;
267         unsigned i;
268         int ret = -ENOMEM;
269
270         for (i = 0; i < 3; i++) {
271                 if (i < 2 && !c->usage[i])
272                         continue;
273
274                 new_usage[i] = __alloc_percpu_gfp(bytes, sizeof(u64),
275                                                   GFP_NOIO);
276                 if (!new_usage[i])
277                         goto err;
278         }
279
280         for (i = 0; i < 2; i++) {
281                 if (!c->usage[i])
282                         continue;
283
284                 __replicas_table_update(new_usage[i],   new_r,
285                                         c->usage[i],    &c->replicas);
286
287                 swap(c->usage[i], new_usage[i]);
288         }
289
290         swap(c->usage_scratch, new_usage[2]);
291
292         swap(c->replicas, *new_r);
293         ret = 0;
294 err:
295         for (i = 0; i < 3; i++)
296                 free_percpu(new_usage[i]);
297         return ret;
298 }
299
300 static unsigned reserve_journal_replicas(struct bch_fs *c,
301                                      struct bch_replicas_cpu *r)
302 {
303         struct bch_replicas_entry *e;
304         unsigned journal_res_u64s = 0;
305
306         /* nr_inodes: */
307         journal_res_u64s +=
308                 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
309
310         /* key_version: */
311         journal_res_u64s +=
312                 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
313
314         /* persistent_reserved: */
315         journal_res_u64s +=
316                 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
317                 BCH_REPLICAS_MAX;
318
319         for_each_cpu_replicas_entry(r, e)
320                 journal_res_u64s +=
321                         DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
322                                      e->nr_devs, sizeof(u64));
323         return journal_res_u64s;
324 }
325
326 noinline
327 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
328                                 struct bch_replicas_entry *new_entry)
329 {
330         struct bch_replicas_cpu new_r, new_gc;
331         int ret = -ENOMEM;
332
333         memset(&new_r, 0, sizeof(new_r));
334         memset(&new_gc, 0, sizeof(new_gc));
335
336         mutex_lock(&c->sb_lock);
337
338         if (c->replicas_gc.entries &&
339             !__replicas_has_entry(&c->replicas_gc, new_entry)) {
340                 new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
341                 if (!new_gc.entries)
342                         goto err;
343         }
344
345         if (!__replicas_has_entry(&c->replicas, new_entry)) {
346                 new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
347                 if (!new_r.entries)
348                         goto err;
349
350                 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
351                 if (ret)
352                         goto err;
353
354                 bch2_journal_entry_res_resize(&c->journal,
355                                 &c->replicas_journal_res,
356                                 reserve_journal_replicas(c, &new_r));
357         }
358
359         if (!new_r.entries &&
360             !new_gc.entries)
361                 goto out;
362
363         /* allocations done, now commit: */
364
365         if (new_r.entries)
366                 bch2_write_super(c);
367
368         /* don't update in memory replicas until changes are persistent */
369         percpu_down_write(&c->mark_lock);
370         if (new_r.entries)
371                 ret = replicas_table_update(c, &new_r);
372         if (new_gc.entries)
373                 swap(new_gc, c->replicas_gc);
374         percpu_up_write(&c->mark_lock);
375 out:
376         ret = 0;
377 err:
378         mutex_unlock(&c->sb_lock);
379
380         kfree(new_r.entries);
381         kfree(new_gc.entries);
382
383         return ret;
384 }
385
386 int bch2_mark_replicas(struct bch_fs *c,
387                        struct bch_replicas_entry *r)
388 {
389         return likely(bch2_replicas_marked(c, r, true))
390                 ? 0
391                 : bch2_mark_replicas_slowpath(c, r);
392 }
393
394 bool bch2_bkey_replicas_marked(struct bch_fs *c,
395                                struct bkey_s_c k,
396                                bool check_gc_replicas)
397 {
398         struct bch_replicas_padded search;
399         struct bch_devs_list cached = bch2_bkey_cached_devs(k);
400         unsigned i;
401
402         for (i = 0; i < cached.nr; i++) {
403                 bch2_replicas_entry_cached(&search.e, cached.devs[i]);
404
405                 if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
406                         return false;
407         }
408
409         bkey_to_replicas(&search.e, k);
410
411         return bch2_replicas_marked(c, &search.e, check_gc_replicas);
412 }
413
414 int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
415 {
416         struct bch_replicas_padded search;
417         struct bch_devs_list cached = bch2_bkey_cached_devs(k);
418         unsigned i;
419         int ret;
420
421         for (i = 0; i < cached.nr; i++) {
422                 bch2_replicas_entry_cached(&search.e, cached.devs[i]);
423
424                 ret = bch2_mark_replicas(c, &search.e);
425                 if (ret)
426                         return ret;
427         }
428
429         bkey_to_replicas(&search.e, k);
430
431         return bch2_mark_replicas(c, &search.e);
432 }
433
434 int bch2_replicas_gc_end(struct bch_fs *c, int ret)
435 {
436         unsigned i;
437
438         lockdep_assert_held(&c->replicas_gc_lock);
439
440         mutex_lock(&c->sb_lock);
441
442         if (ret)
443                 goto err;
444
445         /*
446          * this is kind of crappy; the replicas gc mechanism needs to be ripped
447          * out
448          */
449
450         for (i = 0; i < c->replicas.nr; i++) {
451                 struct bch_replicas_entry *e =
452                         cpu_replicas_entry(&c->replicas, i);
453                 struct bch_replicas_cpu n;
454                 u64 v;
455
456                 if (__replicas_has_entry(&c->replicas_gc, e))
457                         continue;
458
459                 v = percpu_u64_get(&c->usage[0]->data[i]);
460                 if (!v)
461                         continue;
462
463                 n = cpu_replicas_add_entry(&c->replicas_gc, e);
464                 if (!n.entries) {
465                         ret = -ENOSPC;
466                         goto err;
467                 }
468
469                 percpu_down_write(&c->mark_lock);
470                 swap(n, c->replicas_gc);
471                 percpu_up_write(&c->mark_lock);
472
473                 kfree(n.entries);
474         }
475
476         if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
477                 ret = -ENOSPC;
478                 goto err;
479         }
480
481         bch2_write_super(c);
482
483         /* don't update in memory replicas until changes are persistent */
484 err:
485         percpu_down_write(&c->mark_lock);
486         if (!ret)
487                 ret = replicas_table_update(c, &c->replicas_gc);
488
489         kfree(c->replicas_gc.entries);
490         c->replicas_gc.entries = NULL;
491         percpu_up_write(&c->mark_lock);
492
493         mutex_unlock(&c->sb_lock);
494         return ret;
495 }
496
497 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
498 {
499         struct bch_replicas_entry *e;
500         unsigned i = 0;
501
502         lockdep_assert_held(&c->replicas_gc_lock);
503
504         mutex_lock(&c->sb_lock);
505         BUG_ON(c->replicas_gc.entries);
506
507         c->replicas_gc.nr               = 0;
508         c->replicas_gc.entry_size       = 0;
509
510         for_each_cpu_replicas_entry(&c->replicas, e)
511                 if (!((1 << e->data_type) & typemask)) {
512                         c->replicas_gc.nr++;
513                         c->replicas_gc.entry_size =
514                                 max_t(unsigned, c->replicas_gc.entry_size,
515                                       replicas_entry_bytes(e));
516                 }
517
518         c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
519                                          c->replicas_gc.entry_size,
520                                          GFP_NOIO);
521         if (!c->replicas_gc.entries) {
522                 mutex_unlock(&c->sb_lock);
523                 return -ENOMEM;
524         }
525
526         for_each_cpu_replicas_entry(&c->replicas, e)
527                 if (!((1 << e->data_type) & typemask))
528                         memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
529                                e, c->replicas_gc.entry_size);
530
531         bch2_cpu_replicas_sort(&c->replicas_gc);
532         mutex_unlock(&c->sb_lock);
533
534         return 0;
535 }
536
537 int bch2_replicas_set_usage(struct bch_fs *c,
538                             struct bch_replicas_entry *r,
539                             u64 sectors)
540 {
541         int ret, idx = bch2_replicas_entry_idx(c, r);
542
543         if (idx < 0) {
544                 struct bch_replicas_cpu n;
545
546                 n = cpu_replicas_add_entry(&c->replicas, r);
547                 if (!n.entries)
548                         return -ENOMEM;
549
550                 ret = replicas_table_update(c, &n);
551                 if (ret)
552                         return ret;
553
554                 kfree(n.entries);
555
556                 idx = bch2_replicas_entry_idx(c, r);
557                 BUG_ON(ret < 0);
558         }
559
560         percpu_u64_set(&c->usage[0]->data[idx], sectors);
561
562         return 0;
563 }
564
565 /* Replicas tracking - superblock: */
566
567 static int
568 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
569                                    struct bch_replicas_cpu *cpu_r)
570 {
571         struct bch_replicas_entry *e, *dst;
572         unsigned nr = 0, entry_size = 0, idx = 0;
573
574         for_each_replicas_entry(sb_r, e) {
575                 entry_size = max_t(unsigned, entry_size,
576                                    replicas_entry_bytes(e));
577                 nr++;
578         }
579
580         cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
581         if (!cpu_r->entries)
582                 return -ENOMEM;
583
584         cpu_r->nr               = nr;
585         cpu_r->entry_size       = entry_size;
586
587         for_each_replicas_entry(sb_r, e) {
588                 dst = cpu_replicas_entry(cpu_r, idx++);
589                 memcpy(dst, e, replicas_entry_bytes(e));
590                 replicas_entry_sort(dst);
591         }
592
593         return 0;
594 }
595
596 static int
597 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
598                                       struct bch_replicas_cpu *cpu_r)
599 {
600         struct bch_replicas_entry_v0 *e;
601         unsigned nr = 0, entry_size = 0, idx = 0;
602
603         for_each_replicas_entry(sb_r, e) {
604                 entry_size = max_t(unsigned, entry_size,
605                                    replicas_entry_bytes(e));
606                 nr++;
607         }
608
609         entry_size += sizeof(struct bch_replicas_entry) -
610                 sizeof(struct bch_replicas_entry_v0);
611
612         cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
613         if (!cpu_r->entries)
614                 return -ENOMEM;
615
616         cpu_r->nr               = nr;
617         cpu_r->entry_size       = entry_size;
618
619         for_each_replicas_entry(sb_r, e) {
620                 struct bch_replicas_entry *dst =
621                         cpu_replicas_entry(cpu_r, idx++);
622
623                 dst->data_type  = e->data_type;
624                 dst->nr_devs    = e->nr_devs;
625                 dst->nr_required = 1;
626                 memcpy(dst->devs, e->devs, e->nr_devs);
627                 replicas_entry_sort(dst);
628         }
629
630         return 0;
631 }
632
633 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
634 {
635         struct bch_sb_field_replicas *sb_v1;
636         struct bch_sb_field_replicas_v0 *sb_v0;
637         struct bch_replicas_cpu new_r = { 0, 0, NULL };
638         int ret = 0;
639
640         if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
641                 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
642         else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
643                 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
644
645         if (ret)
646                 return -ENOMEM;
647
648         bch2_cpu_replicas_sort(&new_r);
649
650         percpu_down_write(&c->mark_lock);
651
652         ret = replicas_table_update(c, &new_r);
653         percpu_up_write(&c->mark_lock);
654
655         kfree(new_r.entries);
656
657         return 0;
658 }
659
660 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
661                                                struct bch_replicas_cpu *r)
662 {
663         struct bch_sb_field_replicas_v0 *sb_r;
664         struct bch_replicas_entry_v0 *dst;
665         struct bch_replicas_entry *src;
666         size_t bytes;
667
668         bytes = sizeof(struct bch_sb_field_replicas);
669
670         for_each_cpu_replicas_entry(r, src)
671                 bytes += replicas_entry_bytes(src) - 1;
672
673         sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
674                         DIV_ROUND_UP(bytes, sizeof(u64)));
675         if (!sb_r)
676                 return -ENOSPC;
677
678         bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
679         sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
680
681         memset(&sb_r->entries, 0,
682                vstruct_end(&sb_r->field) -
683                (void *) &sb_r->entries);
684
685         dst = sb_r->entries;
686         for_each_cpu_replicas_entry(r, src) {
687                 dst->data_type  = src->data_type;
688                 dst->nr_devs    = src->nr_devs;
689                 memcpy(dst->devs, src->devs, src->nr_devs);
690
691                 dst = replicas_entry_next(dst);
692
693                 BUG_ON((void *) dst > vstruct_end(&sb_r->field));
694         }
695
696         return 0;
697 }
698
699 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
700                                             struct bch_replicas_cpu *r)
701 {
702         struct bch_sb_field_replicas *sb_r;
703         struct bch_replicas_entry *dst, *src;
704         bool need_v1 = false;
705         size_t bytes;
706
707         bytes = sizeof(struct bch_sb_field_replicas);
708
709         for_each_cpu_replicas_entry(r, src) {
710                 bytes += replicas_entry_bytes(src);
711                 if (src->nr_required != 1)
712                         need_v1 = true;
713         }
714
715         if (!need_v1)
716                 return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
717
718         sb_r = bch2_sb_resize_replicas(&c->disk_sb,
719                         DIV_ROUND_UP(bytes, sizeof(u64)));
720         if (!sb_r)
721                 return -ENOSPC;
722
723         bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
724         sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
725
726         memset(&sb_r->entries, 0,
727                vstruct_end(&sb_r->field) -
728                (void *) &sb_r->entries);
729
730         dst = sb_r->entries;
731         for_each_cpu_replicas_entry(r, src) {
732                 memcpy(dst, src, replicas_entry_bytes(src));
733
734                 dst = replicas_entry_next(dst);
735
736                 BUG_ON((void *) dst > vstruct_end(&sb_r->field));
737         }
738
739         return 0;
740 }
741
742 static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
743 {
744         unsigned i;
745
746         sort_cmp_size(cpu_r->entries,
747                       cpu_r->nr,
748                       cpu_r->entry_size,
749                       memcmp, NULL);
750
751         for (i = 0; i + 1 < cpu_r->nr; i++) {
752                 struct bch_replicas_entry *l =
753                         cpu_replicas_entry(cpu_r, i);
754                 struct bch_replicas_entry *r =
755                         cpu_replicas_entry(cpu_r, i + 1);
756
757                 BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
758
759                 if (!memcmp(l, r, cpu_r->entry_size))
760                         return "duplicate replicas entry";
761         }
762
763         return NULL;
764 }
765
766 static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
767 {
768         struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
769         struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
770         struct bch_replicas_cpu cpu_r = { .entries = NULL };
771         struct bch_replicas_entry *e;
772         const char *err;
773         unsigned i;
774
775         for_each_replicas_entry(sb_r, e) {
776                 err = "invalid replicas entry: invalid data type";
777                 if (e->data_type >= BCH_DATA_NR)
778                         goto err;
779
780                 err = "invalid replicas entry: no devices";
781                 if (!e->nr_devs)
782                         goto err;
783
784                 err = "invalid replicas entry: bad nr_required";
785                 if (!e->nr_required ||
786                     (e->nr_required > 1 &&
787                      e->nr_required >= e->nr_devs))
788                         goto err;
789
790                 err = "invalid replicas entry: invalid device";
791                 for (i = 0; i < e->nr_devs; i++)
792                         if (!bch2_dev_exists(sb, mi, e->devs[i]))
793                                 goto err;
794         }
795
796         err = "cannot allocate memory";
797         if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r))
798                 goto err;
799
800         err = check_dup_replicas_entries(&cpu_r);
801 err:
802         kfree(cpu_r.entries);
803         return err;
804 }
805
806 static void bch2_sb_replicas_to_text(struct printbuf *out,
807                                      struct bch_sb *sb,
808                                      struct bch_sb_field *f)
809 {
810         struct bch_sb_field_replicas *r = field_to_type(f, replicas);
811         struct bch_replicas_entry *e;
812         bool first = true;
813
814         for_each_replicas_entry(r, e) {
815                 if (!first)
816                         pr_buf(out, " ");
817                 first = false;
818
819                 bch2_replicas_entry_to_text(out, e);
820         }
821 }
822
823 const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
824         .validate       = bch2_sb_validate_replicas,
825         .to_text        = bch2_sb_replicas_to_text,
826 };
827
828 static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
829 {
830         struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
831         struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
832         struct bch_replicas_cpu cpu_r = { .entries = NULL };
833         struct bch_replicas_entry_v0 *e;
834         const char *err;
835         unsigned i;
836
837         for_each_replicas_entry_v0(sb_r, e) {
838                 err = "invalid replicas entry: invalid data type";
839                 if (e->data_type >= BCH_DATA_NR)
840                         goto err;
841
842                 err = "invalid replicas entry: no devices";
843                 if (!e->nr_devs)
844                         goto err;
845
846                 err = "invalid replicas entry: invalid device";
847                 for (i = 0; i < e->nr_devs; i++)
848                         if (!bch2_dev_exists(sb, mi, e->devs[i]))
849                                 goto err;
850         }
851
852         err = "cannot allocate memory";
853         if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r))
854                 goto err;
855
856         err = check_dup_replicas_entries(&cpu_r);
857 err:
858         kfree(cpu_r.entries);
859         return err;
860 }
861
862 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
863         .validate       = bch2_sb_validate_replicas_v0,
864 };
865
866 /* Query replicas: */
867
868 struct replicas_status __bch2_replicas_status(struct bch_fs *c,
869                                               struct bch_devs_mask online_devs)
870 {
871         struct bch_sb_field_members *mi;
872         struct bch_replicas_entry *e;
873         unsigned i, nr_online, nr_offline;
874         struct replicas_status ret;
875
876         memset(&ret, 0, sizeof(ret));
877
878         for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
879                 ret.replicas[i].redundancy = INT_MAX;
880
881         mi = bch2_sb_get_members(c->disk_sb.sb);
882
883         percpu_down_read_preempt_disable(&c->mark_lock);
884
885         for_each_cpu_replicas_entry(&c->replicas, e) {
886                 if (e->data_type >= ARRAY_SIZE(ret.replicas))
887                         panic("e %p data_type %u\n", e, e->data_type);
888
889                 nr_online = nr_offline = 0;
890
891                 for (i = 0; i < e->nr_devs; i++) {
892                         BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
893                                                 e->devs[i]));
894
895                         if (test_bit(e->devs[i], online_devs.d))
896                                 nr_online++;
897                         else
898                                 nr_offline++;
899                 }
900
901                 ret.replicas[e->data_type].redundancy =
902                         min(ret.replicas[e->data_type].redundancy,
903                             (int) nr_online - (int) e->nr_required);
904
905                 ret.replicas[e->data_type].nr_offline =
906                         max(ret.replicas[e->data_type].nr_offline,
907                             nr_offline);
908         }
909
910         percpu_up_read_preempt_enable(&c->mark_lock);
911
912         for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
913                 if (ret.replicas[i].redundancy == INT_MAX)
914                         ret.replicas[i].redundancy = 0;
915
916         return ret;
917 }
918
919 struct replicas_status bch2_replicas_status(struct bch_fs *c)
920 {
921         return __bch2_replicas_status(c, bch2_online_devs(c));
922 }
923
924 static bool have_enough_devs(struct replicas_status s,
925                              enum bch_data_type type,
926                              bool force_if_degraded,
927                              bool force_if_lost)
928 {
929         return (!s.replicas[type].nr_offline || force_if_degraded) &&
930                 (s.replicas[type].redundancy >= 0 || force_if_lost);
931 }
932
933 bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
934 {
935         return (have_enough_devs(s, BCH_DATA_JOURNAL,
936                                  flags & BCH_FORCE_IF_METADATA_DEGRADED,
937                                  flags & BCH_FORCE_IF_METADATA_LOST) &&
938                 have_enough_devs(s, BCH_DATA_BTREE,
939                                  flags & BCH_FORCE_IF_METADATA_DEGRADED,
940                                  flags & BCH_FORCE_IF_METADATA_LOST) &&
941                 have_enough_devs(s, BCH_DATA_USER,
942                                  flags & BCH_FORCE_IF_DATA_DEGRADED,
943                                  flags & BCH_FORCE_IF_DATA_LOST));
944 }
945
946 int bch2_replicas_online(struct bch_fs *c, bool meta)
947 {
948         struct replicas_status s = bch2_replicas_status(c);
949
950         return (meta
951                 ? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
952                       s.replicas[BCH_DATA_BTREE].redundancy)
953                 : s.replicas[BCH_DATA_USER].redundancy) + 1;
954 }
955
956 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
957 {
958         struct bch_replicas_entry *e;
959         unsigned i, ret = 0;
960
961         percpu_down_read_preempt_disable(&c->mark_lock);
962
963         for_each_cpu_replicas_entry(&c->replicas, e)
964                 for (i = 0; i < e->nr_devs; i++)
965                         if (e->devs[i] == ca->dev_idx)
966                                 ret |= 1 << e->data_type;
967
968         percpu_up_read_preempt_enable(&c->mark_lock);
969
970         return ret;
971 }
972
973 int bch2_fs_replicas_init(struct bch_fs *c)
974 {
975         c->journal.entry_u64s_reserved +=
976                 reserve_journal_replicas(c, &c->replicas);
977         return 0;
978 }