]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/replicas.c
Update bcachefs sources to 9abf628c70 bcachefs: Fix a spurious error in fsck
[bcachefs-tools-debian] / libbcachefs / replicas.c
1
2 #include "bcachefs.h"
3 #include "replicas.h"
4 #include "super-io.h"
5
6 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
7                                             struct bch_replicas_cpu *);
8
9 /* Replicas tracking - in memory: */
10
11 #define for_each_cpu_replicas_entry(_r, _i)                             \
12         for (_i = (_r)->entries;                                        \
13              (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
14              _i = (void *) (_i) + (_r)->entry_size)
15
16 static inline struct bch_replicas_cpu_entry *
17 cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
18 {
19         return (void *) r->entries + r->entry_size * i;
20 }
21
22 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
23 {
24         eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
25 }
26
27 static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
28                                      unsigned dev)
29 {
30         return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
31 }
32
33 static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
34                                     unsigned dev)
35 {
36         e->devs[dev >> 3] |= 1 << (dev & 7);
37 }
38
39 static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
40 {
41         return (r->entry_size -
42                 offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
43 }
44
45 int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *r,
46                               char *buf, size_t size)
47 {
48         char *out = buf, *end = out + size;
49         struct bch_replicas_cpu_entry *e;
50         bool first = true;
51         unsigned i;
52
53         for_each_cpu_replicas_entry(r, e) {
54                 bool first_e = true;
55
56                 if (!first)
57                         out += scnprintf(out, end - out, " ");
58                 first = false;
59
60                 out += scnprintf(out, end - out, "%u: [", e->data_type);
61
62                 for (i = 0; i < replicas_dev_slots(r); i++)
63                         if (replicas_test_dev(e, i)) {
64                                 if (!first_e)
65                                         out += scnprintf(out, end - out, " ");
66                                 first_e = false;
67                                 out += scnprintf(out, end - out, "%u", i);
68                         }
69                 out += scnprintf(out, end - out, "]");
70         }
71
72         return out - buf;
73 }
74
75 static inline unsigned bkey_to_replicas(struct bkey_s_c_extent e,
76                                         enum bch_data_type data_type,
77                                         struct bch_replicas_cpu_entry *r,
78                                         unsigned *max_dev)
79 {
80         const struct bch_extent_ptr *ptr;
81         unsigned nr = 0;
82
83         BUG_ON(!data_type ||
84                data_type == BCH_DATA_SB ||
85                data_type >= BCH_DATA_NR);
86
87         memset(r, 0, sizeof(*r));
88         r->data_type = data_type;
89
90         *max_dev = 0;
91
92         extent_for_each_ptr(e, ptr)
93                 if (!ptr->cached) {
94                         *max_dev = max_t(unsigned, *max_dev, ptr->dev);
95                         replicas_set_dev(r, ptr->dev);
96                         nr++;
97                 }
98         return nr;
99 }
100
101 static inline void devlist_to_replicas(struct bch_devs_list devs,
102                                        enum bch_data_type data_type,
103                                        struct bch_replicas_cpu_entry *r,
104                                        unsigned *max_dev)
105 {
106         unsigned i;
107
108         BUG_ON(!data_type ||
109                data_type == BCH_DATA_SB ||
110                data_type >= BCH_DATA_NR);
111
112         memset(r, 0, sizeof(*r));
113         r->data_type = data_type;
114
115         *max_dev = 0;
116
117         for (i = 0; i < devs.nr; i++) {
118                 *max_dev = max_t(unsigned, *max_dev, devs.devs[i]);
119                 replicas_set_dev(r, devs.devs[i]);
120         }
121 }
122
123 static struct bch_replicas_cpu *
124 cpu_replicas_add_entry(struct bch_replicas_cpu *old,
125                        struct bch_replicas_cpu_entry new_entry,
126                        unsigned max_dev)
127 {
128         struct bch_replicas_cpu *new;
129         unsigned i, nr, entry_size;
130
131         entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
132                 DIV_ROUND_UP(max_dev + 1, 8);
133         entry_size = max(entry_size, old->entry_size);
134         nr = old->nr + 1;
135
136         new = kzalloc(sizeof(struct bch_replicas_cpu) +
137                       nr * entry_size, GFP_NOIO);
138         if (!new)
139                 return NULL;
140
141         new->nr         = nr;
142         new->entry_size = entry_size;
143
144         for (i = 0; i < old->nr; i++)
145                 memcpy(cpu_replicas_entry(new, i),
146                        cpu_replicas_entry(old, i),
147                        min(new->entry_size, old->entry_size));
148
149         memcpy(cpu_replicas_entry(new, old->nr),
150                &new_entry,
151                new->entry_size);
152
153         bch2_cpu_replicas_sort(new);
154         return new;
155 }
156
157 static bool replicas_has_entry(struct bch_replicas_cpu *r,
158                                 struct bch_replicas_cpu_entry search,
159                                 unsigned max_dev)
160 {
161         return max_dev < replicas_dev_slots(r) &&
162                 eytzinger0_find(r->entries, r->nr,
163                                 r->entry_size,
164                                 memcmp, &search) < r->nr;
165 }
166
167 noinline
168 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
169                                 struct bch_replicas_cpu_entry new_entry,
170                                 unsigned max_dev)
171 {
172         struct bch_replicas_cpu *old_gc, *new_gc = NULL, *old_r, *new_r = NULL;
173         int ret = -ENOMEM;
174
175         mutex_lock(&c->sb_lock);
176
177         old_gc = rcu_dereference_protected(c->replicas_gc,
178                                            lockdep_is_held(&c->sb_lock));
179         if (old_gc && !replicas_has_entry(old_gc, new_entry, max_dev)) {
180                 new_gc = cpu_replicas_add_entry(old_gc, new_entry, max_dev);
181                 if (!new_gc)
182                         goto err;
183         }
184
185         old_r = rcu_dereference_protected(c->replicas,
186                                           lockdep_is_held(&c->sb_lock));
187         if (!replicas_has_entry(old_r, new_entry, max_dev)) {
188                 new_r = cpu_replicas_add_entry(old_r, new_entry, max_dev);
189                 if (!new_r)
190                         goto err;
191
192                 ret = bch2_cpu_replicas_to_sb_replicas(c, new_r);
193                 if (ret)
194                         goto err;
195         }
196
197         /* allocations done, now commit: */
198
199         if (new_r)
200                 bch2_write_super(c);
201
202         /* don't update in memory replicas until changes are persistent */
203
204         if (new_gc) {
205                 rcu_assign_pointer(c->replicas_gc, new_gc);
206                 kfree_rcu(old_gc, rcu);
207         }
208
209         if (new_r) {
210                 rcu_assign_pointer(c->replicas, new_r);
211                 kfree_rcu(old_r, rcu);
212         }
213
214         mutex_unlock(&c->sb_lock);
215         return 0;
216 err:
217         mutex_unlock(&c->sb_lock);
218         if (new_gc)
219                 kfree(new_gc);
220         if (new_r)
221                 kfree(new_r);
222         return ret;
223 }
224
225 int bch2_mark_replicas(struct bch_fs *c,
226                        enum bch_data_type data_type,
227                        struct bch_devs_list devs)
228 {
229         struct bch_replicas_cpu_entry search;
230         struct bch_replicas_cpu *r, *gc_r;
231         unsigned max_dev;
232         bool marked;
233
234         if (!devs.nr)
235                 return 0;
236
237         BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
238
239         devlist_to_replicas(devs, data_type, &search, &max_dev);
240
241         rcu_read_lock();
242         r = rcu_dereference(c->replicas);
243         gc_r = rcu_dereference(c->replicas_gc);
244         marked = replicas_has_entry(r, search, max_dev) &&
245                 (!likely(gc_r) || replicas_has_entry(gc_r, search, max_dev));
246         rcu_read_unlock();
247
248         return likely(marked) ? 0
249                 : bch2_mark_replicas_slowpath(c, search, max_dev);
250 }
251
252 int bch2_mark_bkey_replicas(struct bch_fs *c,
253                             enum bch_data_type data_type,
254                             struct bkey_s_c k)
255 {
256         struct bch_devs_list cached = bch2_bkey_cached_devs(k);
257         unsigned i;
258         int ret;
259
260         for (i = 0; i < cached.nr; i++)
261                 if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
262                                               bch2_dev_list_single(cached.devs[i]))))
263                         return ret;
264
265         return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
266 }
267
268 int bch2_replicas_gc_end(struct bch_fs *c, int err)
269 {
270         struct bch_replicas_cpu *new_r, *old_r;
271         int ret = 0;
272
273         lockdep_assert_held(&c->replicas_gc_lock);
274
275         mutex_lock(&c->sb_lock);
276
277         new_r = rcu_dereference_protected(c->replicas_gc,
278                                           lockdep_is_held(&c->sb_lock));
279
280         if (err) {
281                 rcu_assign_pointer(c->replicas_gc, NULL);
282                 kfree_rcu(new_r, rcu);
283                 goto err;
284         }
285
286         if (bch2_cpu_replicas_to_sb_replicas(c, new_r)) {
287                 ret = -ENOSPC;
288                 goto err;
289         }
290
291         old_r = rcu_dereference_protected(c->replicas,
292                                           lockdep_is_held(&c->sb_lock));
293
294         rcu_assign_pointer(c->replicas, new_r);
295         rcu_assign_pointer(c->replicas_gc, NULL);
296         kfree_rcu(old_r, rcu);
297
298         bch2_write_super(c);
299 err:
300         mutex_unlock(&c->sb_lock);
301         return ret;
302 }
303
304 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
305 {
306         struct bch_replicas_cpu *dst, *src;
307         struct bch_replicas_cpu_entry *e;
308
309         lockdep_assert_held(&c->replicas_gc_lock);
310
311         mutex_lock(&c->sb_lock);
312         BUG_ON(c->replicas_gc);
313
314         src = rcu_dereference_protected(c->replicas,
315                                         lockdep_is_held(&c->sb_lock));
316
317         dst = kzalloc(sizeof(struct bch_replicas_cpu) +
318                       src->nr * src->entry_size, GFP_NOIO);
319         if (!dst) {
320                 mutex_unlock(&c->sb_lock);
321                 return -ENOMEM;
322         }
323
324         dst->nr         = 0;
325         dst->entry_size = src->entry_size;
326
327         for_each_cpu_replicas_entry(src, e)
328                 if (!((1 << e->data_type) & typemask))
329                         memcpy(cpu_replicas_entry(dst, dst->nr++),
330                                e, dst->entry_size);
331
332         bch2_cpu_replicas_sort(dst);
333
334         rcu_assign_pointer(c->replicas_gc, dst);
335         mutex_unlock(&c->sb_lock);
336
337         return 0;
338 }
339
340 /* Replicas tracking - superblock: */
341
342 static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
343                                         unsigned *nr,
344                                         unsigned *bytes,
345                                         unsigned *max_dev)
346 {
347         struct bch_replicas_entry *i;
348         unsigned j;
349
350         *nr     = 0;
351         *bytes  = sizeof(*r);
352         *max_dev = 0;
353
354         if (!r)
355                 return;
356
357         for_each_replicas_entry(r, i) {
358                 for (j = 0; j < i->nr; j++)
359                         *max_dev = max_t(unsigned, *max_dev, i->devs[j]);
360                 (*nr)++;
361         }
362
363         *bytes = (void *) i - (void *) r;
364 }
365
366 static struct bch_replicas_cpu *
367 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
368 {
369         struct bch_replicas_cpu *cpu_r;
370         unsigned i, nr, bytes, max_dev, entry_size;
371
372         bch2_sb_replicas_nr_entries(sb_r, &nr, &bytes, &max_dev);
373
374         entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
375                 DIV_ROUND_UP(max_dev + 1, 8);
376
377         cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
378                         nr * entry_size, GFP_NOIO);
379         if (!cpu_r)
380                 return NULL;
381
382         cpu_r->nr               = nr;
383         cpu_r->entry_size       = entry_size;
384
385         if (nr) {
386                 struct bch_replicas_cpu_entry *dst =
387                         cpu_replicas_entry(cpu_r, 0);
388                 struct bch_replicas_entry *src = sb_r->entries;
389
390                 while (dst < cpu_replicas_entry(cpu_r, nr)) {
391                         dst->data_type = src->data_type;
392                         for (i = 0; i < src->nr; i++)
393                                 replicas_set_dev(dst, src->devs[i]);
394
395                         src     = replicas_entry_next(src);
396                         dst     = (void *) dst + entry_size;
397                 }
398         }
399
400         bch2_cpu_replicas_sort(cpu_r);
401         return cpu_r;
402 }
403
404 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
405 {
406         struct bch_sb_field_replicas *sb_r;
407         struct bch_replicas_cpu *cpu_r, *old_r;
408
409         sb_r    = bch2_sb_get_replicas(c->disk_sb.sb);
410         cpu_r   = __bch2_sb_replicas_to_cpu_replicas(sb_r);
411         if (!cpu_r)
412                 return -ENOMEM;
413
414         old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
415         rcu_assign_pointer(c->replicas, cpu_r);
416         if (old_r)
417                 kfree_rcu(old_r, rcu);
418
419         return 0;
420 }
421
422 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
423                                             struct bch_replicas_cpu *r)
424 {
425         struct bch_sb_field_replicas *sb_r;
426         struct bch_replicas_entry *sb_e;
427         struct bch_replicas_cpu_entry *e;
428         size_t i, bytes;
429
430         bytes = sizeof(struct bch_sb_field_replicas);
431
432         for_each_cpu_replicas_entry(r, e) {
433                 bytes += sizeof(struct bch_replicas_entry);
434                 for (i = 0; i < r->entry_size - 1; i++)
435                         bytes += hweight8(e->devs[i]);
436         }
437
438         sb_r = bch2_sb_resize_replicas(&c->disk_sb,
439                         DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
440         if (!sb_r)
441                 return -ENOSPC;
442
443         memset(&sb_r->entries, 0,
444                vstruct_end(&sb_r->field) -
445                (void *) &sb_r->entries);
446
447         sb_e = sb_r->entries;
448         for_each_cpu_replicas_entry(r, e) {
449                 sb_e->data_type = e->data_type;
450
451                 for (i = 0; i < replicas_dev_slots(r); i++)
452                         if (replicas_test_dev(e, i))
453                                 sb_e->devs[sb_e->nr++] = i;
454
455                 sb_e = replicas_entry_next(sb_e);
456
457                 BUG_ON((void *) sb_e > vstruct_end(&sb_r->field));
458         }
459
460         return 0;
461 }
462
463 static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
464 {
465         struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
466         struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
467         struct bch_replicas_cpu *cpu_r = NULL;
468         struct bch_replicas_entry *e;
469         const char *err;
470         unsigned i;
471
472         for_each_replicas_entry(sb_r, e) {
473                 err = "invalid replicas entry: invalid data type";
474                 if (e->data_type >= BCH_DATA_NR)
475                         goto err;
476
477                 err = "invalid replicas entry: no devices";
478                 if (!e->nr)
479                         goto err;
480
481                 err = "invalid replicas entry: too many devices";
482                 if (e->nr >= BCH_REPLICAS_MAX)
483                         goto err;
484
485                 err = "invalid replicas entry: invalid device";
486                 for (i = 0; i < e->nr; i++)
487                         if (!bch2_dev_exists(sb, mi, e->devs[i]))
488                                 goto err;
489         }
490
491         err = "cannot allocate memory";
492         cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
493         if (!cpu_r)
494                 goto err;
495
496         sort_cmp_size(cpu_r->entries,
497                       cpu_r->nr,
498                       cpu_r->entry_size,
499                       memcmp, NULL);
500
501         for (i = 0; i + 1 < cpu_r->nr; i++) {
502                 struct bch_replicas_cpu_entry *l =
503                         cpu_replicas_entry(cpu_r, i);
504                 struct bch_replicas_cpu_entry *r =
505                         cpu_replicas_entry(cpu_r, i + 1);
506
507                 BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
508
509                 err = "duplicate replicas entry";
510                 if (!memcmp(l, r, cpu_r->entry_size))
511                         goto err;
512         }
513
514         err = NULL;
515 err:
516         kfree(cpu_r);
517         return err;
518 }
519
520 const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
521         .validate       = bch2_sb_validate_replicas,
522 };
523
524 int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t size)
525 {
526         char *out = buf, *end = out + size;
527         struct bch_replicas_entry *e;
528         bool first = true;
529         unsigned i;
530
531         if (!r) {
532                 out += scnprintf(out, end - out, "(no replicas section found)");
533                 return out - buf;
534         }
535
536         for_each_replicas_entry(r, e) {
537                 if (!first)
538                         out += scnprintf(out, end - out, " ");
539                 first = false;
540
541                 out += scnprintf(out, end - out, "%u: [", e->data_type);
542
543                 for (i = 0; i < e->nr; i++)
544                         out += scnprintf(out, end - out,
545                                          i ? " %u" : "%u", e->devs[i]);
546                 out += scnprintf(out, end - out, "]");
547         }
548
549         return out - buf;
550 }
551
552 /* Query replicas: */
553
554 bool bch2_replicas_marked(struct bch_fs *c,
555                           enum bch_data_type data_type,
556                           struct bch_devs_list devs)
557 {
558         struct bch_replicas_cpu_entry search;
559         unsigned max_dev;
560         bool ret;
561
562         if (!devs.nr)
563                 return true;
564
565         devlist_to_replicas(devs, data_type, &search, &max_dev);
566
567         rcu_read_lock();
568         ret = replicas_has_entry(rcu_dereference(c->replicas),
569                                  search, max_dev);
570         rcu_read_unlock();
571
572         return ret;
573 }
574
575 bool bch2_bkey_replicas_marked(struct bch_fs *c,
576                                enum bch_data_type data_type,
577                                struct bkey_s_c k)
578 {
579         struct bch_devs_list cached = bch2_bkey_cached_devs(k);
580         unsigned i;
581
582         for (i = 0; i < cached.nr; i++)
583                 if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
584                                           bch2_dev_list_single(cached.devs[i])))
585                         return false;
586
587         return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
588 }
589
590 struct replicas_status __bch2_replicas_status(struct bch_fs *c,
591                                               struct bch_devs_mask online_devs)
592 {
593         struct bch_sb_field_members *mi;
594         struct bch_replicas_cpu_entry *e;
595         struct bch_replicas_cpu *r;
596         unsigned i, dev, dev_slots, nr_online, nr_offline;
597         struct replicas_status ret;
598
599         memset(&ret, 0, sizeof(ret));
600
601         for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
602                 ret.replicas[i].nr_online = UINT_MAX;
603
604         mi = bch2_sb_get_members(c->disk_sb.sb);
605         rcu_read_lock();
606
607         r = rcu_dereference(c->replicas);
608         dev_slots = replicas_dev_slots(r);
609
610         for_each_cpu_replicas_entry(r, e) {
611                 if (e->data_type >= ARRAY_SIZE(ret.replicas))
612                         panic("e %p data_type %u\n", e, e->data_type);
613
614                 nr_online = nr_offline = 0;
615
616                 for (dev = 0; dev < dev_slots; dev++) {
617                         if (!replicas_test_dev(e, dev))
618                                 continue;
619
620                         BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, dev));
621
622                         if (test_bit(dev, online_devs.d))
623                                 nr_online++;
624                         else
625                                 nr_offline++;
626                 }
627
628                 ret.replicas[e->data_type].nr_online =
629                         min(ret.replicas[e->data_type].nr_online,
630                             nr_online);
631
632                 ret.replicas[e->data_type].nr_offline =
633                         max(ret.replicas[e->data_type].nr_offline,
634                             nr_offline);
635         }
636
637         rcu_read_unlock();
638
639         return ret;
640 }
641
642 struct replicas_status bch2_replicas_status(struct bch_fs *c)
643 {
644         return __bch2_replicas_status(c, bch2_online_devs(c));
645 }
646
647 static bool have_enough_devs(struct replicas_status s,
648                              enum bch_data_type type,
649                              bool force_if_degraded,
650                              bool force_if_lost)
651 {
652         return (!s.replicas[type].nr_offline || force_if_degraded) &&
653                 (s.replicas[type].nr_online || force_if_lost);
654 }
655
656 bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
657 {
658         return (have_enough_devs(s, BCH_DATA_JOURNAL,
659                                  flags & BCH_FORCE_IF_METADATA_DEGRADED,
660                                  flags & BCH_FORCE_IF_METADATA_LOST) &&
661                 have_enough_devs(s, BCH_DATA_BTREE,
662                                  flags & BCH_FORCE_IF_METADATA_DEGRADED,
663                                  flags & BCH_FORCE_IF_METADATA_LOST) &&
664                 have_enough_devs(s, BCH_DATA_USER,
665                                  flags & BCH_FORCE_IF_DATA_DEGRADED,
666                                  flags & BCH_FORCE_IF_DATA_LOST));
667 }
668
669 unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
670 {
671         struct replicas_status s = bch2_replicas_status(c);
672
673         return meta
674                 ? min(s.replicas[BCH_DATA_JOURNAL].nr_online,
675                       s.replicas[BCH_DATA_BTREE].nr_online)
676                 : s.replicas[BCH_DATA_USER].nr_online;
677 }
678
679 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
680 {
681         struct bch_replicas_cpu_entry *e;
682         struct bch_replicas_cpu *r;
683         unsigned ret = 0;
684
685         rcu_read_lock();
686         r = rcu_dereference(c->replicas);
687
688         if (ca->dev_idx >= replicas_dev_slots(r))
689                 goto out;
690
691         for_each_cpu_replicas_entry(r, e)
692                 if (replicas_test_dev(e, ca->dev_idx))
693                         ret |= 1 << e->data_type;
694 out:
695         rcu_read_unlock();
696
697         return ret;
698 }