]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/replicas.c
Update bcachefs sources to 2cb70a82bc bcachefs: delete some debug code
[bcachefs-tools-debian] / libbcachefs / replicas.c
1
2 #include "bcachefs.h"
3 #include "replicas.h"
4 #include "super-io.h"
5
6 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
7                                             struct bch_replicas_cpu *);
8
9 /* Replicas tracking - in memory: */
10
11 #define for_each_cpu_replicas_entry(_r, _i)                             \
12         for (_i = (_r)->entries;                                        \
13              (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
14              _i = (void *) (_i) + (_r)->entry_size)
15
16 static inline struct bch_replicas_cpu_entry *
17 cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
18 {
19         return (void *) r->entries + r->entry_size * i;
20 }
21
22 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
23 {
24         eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
25 }
26
27 static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
28                                      unsigned dev)
29 {
30         return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
31 }
32
33 static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
34                                     unsigned dev)
35 {
36         e->devs[dev >> 3] |= 1 << (dev & 7);
37 }
38
39 static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
40 {
41         return (r->entry_size -
42                 offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
43 }
44
45 int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *r,
46                               char *buf, size_t size)
47 {
48         char *out = buf, *end = out + size;
49         struct bch_replicas_cpu_entry *e;
50         bool first = true;
51         unsigned i;
52
53         for_each_cpu_replicas_entry(r, e) {
54                 bool first_e = true;
55
56                 if (!first)
57                         out += scnprintf(out, end - out, " ");
58                 first = false;
59
60                 out += scnprintf(out, end - out, "%u: [", e->data_type);
61
62                 for (i = 0; i < replicas_dev_slots(r); i++)
63                         if (replicas_test_dev(e, i)) {
64                                 if (!first_e)
65                                         out += scnprintf(out, end - out, " ");
66                                 first_e = false;
67                                 out += scnprintf(out, end - out, "%u", i);
68                         }
69                 out += scnprintf(out, end - out, "]");
70         }
71
72         return out - buf;
73 }
74
75 static inline unsigned bkey_to_replicas(struct bkey_s_c_extent e,
76                                         enum bch_data_type data_type,
77                                         struct bch_replicas_cpu_entry *r,
78                                         unsigned *max_dev)
79 {
80         const struct bch_extent_ptr *ptr;
81         unsigned nr = 0;
82
83         BUG_ON(!data_type ||
84                data_type == BCH_DATA_SB ||
85                data_type >= BCH_DATA_NR);
86
87         memset(r, 0, sizeof(*r));
88         r->data_type = data_type;
89
90         *max_dev = 0;
91
92         extent_for_each_ptr(e, ptr)
93                 if (!ptr->cached) {
94                         *max_dev = max_t(unsigned, *max_dev, ptr->dev);
95                         replicas_set_dev(r, ptr->dev);
96                         nr++;
97                 }
98         return nr;
99 }
100
101 static inline void devlist_to_replicas(struct bch_devs_list devs,
102                                        enum bch_data_type data_type,
103                                        struct bch_replicas_cpu_entry *r,
104                                        unsigned *max_dev)
105 {
106         unsigned i;
107
108         BUG_ON(!data_type ||
109                data_type == BCH_DATA_SB ||
110                data_type >= BCH_DATA_NR);
111
112         memset(r, 0, sizeof(*r));
113         r->data_type = data_type;
114
115         *max_dev = 0;
116
117         for (i = 0; i < devs.nr; i++) {
118                 *max_dev = max_t(unsigned, *max_dev, devs.devs[i]);
119                 replicas_set_dev(r, devs.devs[i]);
120         }
121 }
122
123 static struct bch_replicas_cpu *
124 cpu_replicas_add_entry(struct bch_replicas_cpu *old,
125                        struct bch_replicas_cpu_entry new_entry,
126                        unsigned max_dev)
127 {
128         struct bch_replicas_cpu *new;
129         unsigned i, nr, entry_size;
130
131         entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
132                 DIV_ROUND_UP(max_dev + 1, 8);
133         entry_size = max(entry_size, old->entry_size);
134         nr = old->nr + 1;
135
136         new = kzalloc(sizeof(struct bch_replicas_cpu) +
137                       nr * entry_size, GFP_NOIO);
138         if (!new)
139                 return NULL;
140
141         new->nr         = nr;
142         new->entry_size = entry_size;
143
144         for (i = 0; i < old->nr; i++)
145                 memcpy(cpu_replicas_entry(new, i),
146                        cpu_replicas_entry(old, i),
147                        min(new->entry_size, old->entry_size));
148
149         memcpy(cpu_replicas_entry(new, old->nr),
150                &new_entry,
151                new->entry_size);
152
153         bch2_cpu_replicas_sort(new);
154         return new;
155 }
156
157 static bool replicas_has_entry(struct bch_replicas_cpu *r,
158                                 struct bch_replicas_cpu_entry search,
159                                 unsigned max_dev)
160 {
161         return max_dev < replicas_dev_slots(r) &&
162                 eytzinger0_find(r->entries, r->nr,
163                                 r->entry_size,
164                                 memcmp, &search) < r->nr;
165 }
166
167 noinline
168 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
169                                 struct bch_replicas_cpu_entry new_entry,
170                                 unsigned max_dev)
171 {
172         struct bch_replicas_cpu *old_gc, *new_gc = NULL, *old_r, *new_r = NULL;
173         int ret = -ENOMEM;
174
175         mutex_lock(&c->sb_lock);
176
177         old_gc = rcu_dereference_protected(c->replicas_gc,
178                                            lockdep_is_held(&c->sb_lock));
179         if (old_gc && !replicas_has_entry(old_gc, new_entry, max_dev)) {
180                 new_gc = cpu_replicas_add_entry(old_gc, new_entry, max_dev);
181                 if (!new_gc)
182                         goto err;
183         }
184
185         old_r = rcu_dereference_protected(c->replicas,
186                                           lockdep_is_held(&c->sb_lock));
187         if (!replicas_has_entry(old_r, new_entry, max_dev)) {
188                 new_r = cpu_replicas_add_entry(old_r, new_entry, max_dev);
189                 if (!new_r)
190                         goto err;
191
192                 ret = bch2_cpu_replicas_to_sb_replicas(c, new_r);
193                 if (ret)
194                         goto err;
195         }
196
197         /* allocations done, now commit: */
198
199         if (new_r)
200                 bch2_write_super(c);
201
202         /* don't update in memory replicas until changes are persistent */
203
204         if (new_gc) {
205                 rcu_assign_pointer(c->replicas_gc, new_gc);
206                 kfree_rcu(old_gc, rcu);
207         }
208
209         if (new_r) {
210                 rcu_assign_pointer(c->replicas, new_r);
211                 kfree_rcu(old_r, rcu);
212         }
213
214         mutex_unlock(&c->sb_lock);
215         return 0;
216 err:
217         mutex_unlock(&c->sb_lock);
218         kfree(new_gc);
219         kfree(new_r);
220         return ret;
221 }
222
223 int bch2_mark_replicas(struct bch_fs *c,
224                        enum bch_data_type data_type,
225                        struct bch_devs_list devs)
226 {
227         struct bch_replicas_cpu_entry search;
228         struct bch_replicas_cpu *r, *gc_r;
229         unsigned max_dev;
230         bool marked;
231
232         if (!devs.nr)
233                 return 0;
234
235         BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
236
237         devlist_to_replicas(devs, data_type, &search, &max_dev);
238
239         rcu_read_lock();
240         r = rcu_dereference(c->replicas);
241         gc_r = rcu_dereference(c->replicas_gc);
242         marked = replicas_has_entry(r, search, max_dev) &&
243                 (!likely(gc_r) || replicas_has_entry(gc_r, search, max_dev));
244         rcu_read_unlock();
245
246         return likely(marked) ? 0
247                 : bch2_mark_replicas_slowpath(c, search, max_dev);
248 }
249
250 int bch2_mark_bkey_replicas(struct bch_fs *c,
251                             enum bch_data_type data_type,
252                             struct bkey_s_c k)
253 {
254         struct bch_devs_list cached = bch2_bkey_cached_devs(k);
255         unsigned i;
256         int ret;
257
258         for (i = 0; i < cached.nr; i++)
259                 if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
260                                               bch2_dev_list_single(cached.devs[i]))))
261                         return ret;
262
263         return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
264 }
265
266 int bch2_replicas_gc_end(struct bch_fs *c, int ret)
267 {
268         struct bch_replicas_cpu *new_r, *old_r;
269
270         lockdep_assert_held(&c->replicas_gc_lock);
271
272         mutex_lock(&c->sb_lock);
273
274         new_r = rcu_dereference_protected(c->replicas_gc,
275                                           lockdep_is_held(&c->sb_lock));
276         rcu_assign_pointer(c->replicas_gc, NULL);
277
278         if (ret)
279                 goto err;
280
281         if (bch2_cpu_replicas_to_sb_replicas(c, new_r)) {
282                 ret = -ENOSPC;
283                 goto err;
284         }
285
286         bch2_write_super(c);
287
288         /* don't update in memory replicas until changes are persistent */
289
290         old_r = rcu_dereference_protected(c->replicas,
291                                           lockdep_is_held(&c->sb_lock));
292
293         rcu_assign_pointer(c->replicas, new_r);
294         kfree_rcu(old_r, rcu);
295 out:
296         mutex_unlock(&c->sb_lock);
297         return ret;
298 err:
299         kfree_rcu(new_r, rcu);
300         goto out;
301 }
302
303 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
304 {
305         struct bch_replicas_cpu *dst, *src;
306         struct bch_replicas_cpu_entry *e;
307
308         lockdep_assert_held(&c->replicas_gc_lock);
309
310         mutex_lock(&c->sb_lock);
311         BUG_ON(c->replicas_gc);
312
313         src = rcu_dereference_protected(c->replicas,
314                                         lockdep_is_held(&c->sb_lock));
315
316         dst = kzalloc(sizeof(struct bch_replicas_cpu) +
317                       src->nr * src->entry_size, GFP_NOIO);
318         if (!dst) {
319                 mutex_unlock(&c->sb_lock);
320                 return -ENOMEM;
321         }
322
323         dst->nr         = 0;
324         dst->entry_size = src->entry_size;
325
326         for_each_cpu_replicas_entry(src, e)
327                 if (!((1 << e->data_type) & typemask))
328                         memcpy(cpu_replicas_entry(dst, dst->nr++),
329                                e, dst->entry_size);
330
331         bch2_cpu_replicas_sort(dst);
332
333         rcu_assign_pointer(c->replicas_gc, dst);
334         mutex_unlock(&c->sb_lock);
335
336         return 0;
337 }
338
339 /* Replicas tracking - superblock: */
340
341 static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
342                                         unsigned *nr,
343                                         unsigned *bytes,
344                                         unsigned *max_dev)
345 {
346         struct bch_replicas_entry *i;
347         unsigned j;
348
349         *nr     = 0;
350         *bytes  = sizeof(*r);
351         *max_dev = 0;
352
353         if (!r)
354                 return;
355
356         for_each_replicas_entry(r, i) {
357                 for (j = 0; j < i->nr; j++)
358                         *max_dev = max_t(unsigned, *max_dev, i->devs[j]);
359                 (*nr)++;
360         }
361
362         *bytes = (void *) i - (void *) r;
363 }
364
365 static struct bch_replicas_cpu *
366 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
367 {
368         struct bch_replicas_cpu *cpu_r;
369         unsigned i, nr, bytes, max_dev, entry_size;
370
371         bch2_sb_replicas_nr_entries(sb_r, &nr, &bytes, &max_dev);
372
373         entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
374                 DIV_ROUND_UP(max_dev + 1, 8);
375
376         cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
377                         nr * entry_size, GFP_NOIO);
378         if (!cpu_r)
379                 return NULL;
380
381         cpu_r->nr               = nr;
382         cpu_r->entry_size       = entry_size;
383
384         if (nr) {
385                 struct bch_replicas_cpu_entry *dst =
386                         cpu_replicas_entry(cpu_r, 0);
387                 struct bch_replicas_entry *src = sb_r->entries;
388
389                 while (dst < cpu_replicas_entry(cpu_r, nr)) {
390                         dst->data_type = src->data_type;
391                         for (i = 0; i < src->nr; i++)
392                                 replicas_set_dev(dst, src->devs[i]);
393
394                         src     = replicas_entry_next(src);
395                         dst     = (void *) dst + entry_size;
396                 }
397         }
398
399         bch2_cpu_replicas_sort(cpu_r);
400         return cpu_r;
401 }
402
403 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
404 {
405         struct bch_sb_field_replicas *sb_r;
406         struct bch_replicas_cpu *cpu_r, *old_r;
407
408         sb_r    = bch2_sb_get_replicas(c->disk_sb.sb);
409         cpu_r   = __bch2_sb_replicas_to_cpu_replicas(sb_r);
410         if (!cpu_r)
411                 return -ENOMEM;
412
413         old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
414         rcu_assign_pointer(c->replicas, cpu_r);
415         if (old_r)
416                 kfree_rcu(old_r, rcu);
417
418         return 0;
419 }
420
421 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
422                                             struct bch_replicas_cpu *r)
423 {
424         struct bch_sb_field_replicas *sb_r;
425         struct bch_replicas_entry *sb_e;
426         struct bch_replicas_cpu_entry *e;
427         size_t i, bytes;
428
429         bytes = sizeof(struct bch_sb_field_replicas);
430
431         for_each_cpu_replicas_entry(r, e) {
432                 bytes += sizeof(struct bch_replicas_entry);
433                 for (i = 0; i < r->entry_size - 1; i++)
434                         bytes += hweight8(e->devs[i]);
435         }
436
437         sb_r = bch2_sb_resize_replicas(&c->disk_sb,
438                         DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
439         if (!sb_r)
440                 return -ENOSPC;
441
442         memset(&sb_r->entries, 0,
443                vstruct_end(&sb_r->field) -
444                (void *) &sb_r->entries);
445
446         sb_e = sb_r->entries;
447         for_each_cpu_replicas_entry(r, e) {
448                 sb_e->data_type = e->data_type;
449
450                 for (i = 0; i < replicas_dev_slots(r); i++)
451                         if (replicas_test_dev(e, i))
452                                 sb_e->devs[sb_e->nr++] = i;
453
454                 sb_e = replicas_entry_next(sb_e);
455
456                 BUG_ON((void *) sb_e > vstruct_end(&sb_r->field));
457         }
458
459         return 0;
460 }
461
462 static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
463 {
464         struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
465         struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
466         struct bch_replicas_cpu *cpu_r = NULL;
467         struct bch_replicas_entry *e;
468         const char *err;
469         unsigned i;
470
471         for_each_replicas_entry(sb_r, e) {
472                 err = "invalid replicas entry: invalid data type";
473                 if (e->data_type >= BCH_DATA_NR)
474                         goto err;
475
476                 err = "invalid replicas entry: no devices";
477                 if (!e->nr)
478                         goto err;
479
480                 err = "invalid replicas entry: too many devices";
481                 if (e->nr >= BCH_REPLICAS_MAX)
482                         goto err;
483
484                 err = "invalid replicas entry: invalid device";
485                 for (i = 0; i < e->nr; i++)
486                         if (!bch2_dev_exists(sb, mi, e->devs[i]))
487                                 goto err;
488         }
489
490         err = "cannot allocate memory";
491         cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
492         if (!cpu_r)
493                 goto err;
494
495         sort_cmp_size(cpu_r->entries,
496                       cpu_r->nr,
497                       cpu_r->entry_size,
498                       memcmp, NULL);
499
500         for (i = 0; i + 1 < cpu_r->nr; i++) {
501                 struct bch_replicas_cpu_entry *l =
502                         cpu_replicas_entry(cpu_r, i);
503                 struct bch_replicas_cpu_entry *r =
504                         cpu_replicas_entry(cpu_r, i + 1);
505
506                 BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
507
508                 err = "duplicate replicas entry";
509                 if (!memcmp(l, r, cpu_r->entry_size))
510                         goto err;
511         }
512
513         err = NULL;
514 err:
515         kfree(cpu_r);
516         return err;
517 }
518
519 const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
520         .validate       = bch2_sb_validate_replicas,
521 };
522
523 int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t size)
524 {
525         char *out = buf, *end = out + size;
526         struct bch_replicas_entry *e;
527         bool first = true;
528         unsigned i;
529
530         if (!r) {
531                 out += scnprintf(out, end - out, "(no replicas section found)");
532                 return out - buf;
533         }
534
535         for_each_replicas_entry(r, e) {
536                 if (!first)
537                         out += scnprintf(out, end - out, " ");
538                 first = false;
539
540                 out += scnprintf(out, end - out, "%u: [", e->data_type);
541
542                 for (i = 0; i < e->nr; i++)
543                         out += scnprintf(out, end - out,
544                                          i ? " %u" : "%u", e->devs[i]);
545                 out += scnprintf(out, end - out, "]");
546         }
547
548         return out - buf;
549 }
550
551 /* Query replicas: */
552
553 bool bch2_replicas_marked(struct bch_fs *c,
554                           enum bch_data_type data_type,
555                           struct bch_devs_list devs)
556 {
557         struct bch_replicas_cpu_entry search;
558         unsigned max_dev;
559         bool ret;
560
561         if (!devs.nr)
562                 return true;
563
564         devlist_to_replicas(devs, data_type, &search, &max_dev);
565
566         rcu_read_lock();
567         ret = replicas_has_entry(rcu_dereference(c->replicas),
568                                  search, max_dev);
569         rcu_read_unlock();
570
571         return ret;
572 }
573
574 bool bch2_bkey_replicas_marked(struct bch_fs *c,
575                                enum bch_data_type data_type,
576                                struct bkey_s_c k)
577 {
578         struct bch_devs_list cached = bch2_bkey_cached_devs(k);
579         unsigned i;
580
581         for (i = 0; i < cached.nr; i++)
582                 if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
583                                           bch2_dev_list_single(cached.devs[i])))
584                         return false;
585
586         return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
587 }
588
589 struct replicas_status __bch2_replicas_status(struct bch_fs *c,
590                                               struct bch_devs_mask online_devs)
591 {
592         struct bch_sb_field_members *mi;
593         struct bch_replicas_cpu_entry *e;
594         struct bch_replicas_cpu *r;
595         unsigned i, dev, dev_slots, nr_online, nr_offline;
596         struct replicas_status ret;
597
598         memset(&ret, 0, sizeof(ret));
599
600         for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
601                 ret.replicas[i].nr_online = UINT_MAX;
602
603         mi = bch2_sb_get_members(c->disk_sb.sb);
604         rcu_read_lock();
605
606         r = rcu_dereference(c->replicas);
607         dev_slots = replicas_dev_slots(r);
608
609         for_each_cpu_replicas_entry(r, e) {
610                 if (e->data_type >= ARRAY_SIZE(ret.replicas))
611                         panic("e %p data_type %u\n", e, e->data_type);
612
613                 nr_online = nr_offline = 0;
614
615                 for (dev = 0; dev < dev_slots; dev++) {
616                         if (!replicas_test_dev(e, dev))
617                                 continue;
618
619                         BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, dev));
620
621                         if (test_bit(dev, online_devs.d))
622                                 nr_online++;
623                         else
624                                 nr_offline++;
625                 }
626
627                 ret.replicas[e->data_type].nr_online =
628                         min(ret.replicas[e->data_type].nr_online,
629                             nr_online);
630
631                 ret.replicas[e->data_type].nr_offline =
632                         max(ret.replicas[e->data_type].nr_offline,
633                             nr_offline);
634         }
635
636         rcu_read_unlock();
637
638         return ret;
639 }
640
641 struct replicas_status bch2_replicas_status(struct bch_fs *c)
642 {
643         return __bch2_replicas_status(c, bch2_online_devs(c));
644 }
645
646 static bool have_enough_devs(struct replicas_status s,
647                              enum bch_data_type type,
648                              bool force_if_degraded,
649                              bool force_if_lost)
650 {
651         return (!s.replicas[type].nr_offline || force_if_degraded) &&
652                 (s.replicas[type].nr_online || force_if_lost);
653 }
654
655 bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
656 {
657         return (have_enough_devs(s, BCH_DATA_JOURNAL,
658                                  flags & BCH_FORCE_IF_METADATA_DEGRADED,
659                                  flags & BCH_FORCE_IF_METADATA_LOST) &&
660                 have_enough_devs(s, BCH_DATA_BTREE,
661                                  flags & BCH_FORCE_IF_METADATA_DEGRADED,
662                                  flags & BCH_FORCE_IF_METADATA_LOST) &&
663                 have_enough_devs(s, BCH_DATA_USER,
664                                  flags & BCH_FORCE_IF_DATA_DEGRADED,
665                                  flags & BCH_FORCE_IF_DATA_LOST));
666 }
667
668 unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
669 {
670         struct replicas_status s = bch2_replicas_status(c);
671
672         return meta
673                 ? min(s.replicas[BCH_DATA_JOURNAL].nr_online,
674                       s.replicas[BCH_DATA_BTREE].nr_online)
675                 : s.replicas[BCH_DATA_USER].nr_online;
676 }
677
678 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
679 {
680         struct bch_replicas_cpu_entry *e;
681         struct bch_replicas_cpu *r;
682         unsigned ret = 0;
683
684         rcu_read_lock();
685         r = rcu_dereference(c->replicas);
686
687         if (ca->dev_idx >= replicas_dev_slots(r))
688                 goto out;
689
690         for_each_cpu_replicas_entry(r, e)
691                 if (replicas_test_dev(e, ca->dev_idx))
692                         ret |= 1 << e->data_type;
693 out:
694         rcu_read_unlock();
695
696         return ret;
697 }