]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/bkey_sort.c
c47c862f12e188f2a1efa1188b52768708ed1790
[bcachefs-tools-debian] / libbcachefs / bkey_sort.c
1 #include "bcachefs.h"
2 #include "bkey_sort.h"
3 #include "bset.h"
4 #include "extents.h"
5
6 /* too many iterators, need to clean this up */
7
8 /* btree_node_iter_large: */
9
10 #define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r)
11
12 static inline bool
13 bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter)
14 {
15         return !iter->used;
16 }
17
18 static inline struct bkey_packed *
19 bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter,
20                                     struct btree *b)
21 {
22         return bch2_btree_node_iter_large_end(iter)
23                 ? NULL
24                 : __btree_node_offset_to_key(b, iter->data->k);
25 }
26
27 static void
28 bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
29                                    struct btree *b)
30 {
31         iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s;
32
33         EBUG_ON(!iter->used);
34         EBUG_ON(iter->data->k > iter->data->end);
35
36         if (iter->data->k == iter->data->end)
37                 heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
38         else
39                 heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
40 }
41
42 static inline struct bkey_packed *
43 bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter,
44                                     struct btree *b)
45 {
46         struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b);
47
48         if (ret)
49                 bch2_btree_node_iter_large_advance(iter, b);
50
51         return ret;
52 }
53
54 void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
55                                      struct btree *b,
56                                      const struct bkey_packed *k,
57                                      const struct bkey_packed *end)
58 {
59         if (k != end) {
60                 struct btree_node_iter_set n =
61                         ((struct btree_node_iter_set) {
62                                  __btree_node_key_to_offset(b, k),
63                                  __btree_node_key_to_offset(b, end)
64                          });
65
66                 __heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
67         }
68 }
69
70 static void sort_key_next(struct btree_node_iter_large *iter,
71                           struct btree *b,
72                           struct btree_node_iter_set *i)
73 {
74         i->k += __btree_node_offset_to_key(b, i->k)->u64s;
75
76         if (i->k == i->end)
77                 *i = iter->data[--iter->used];
78 }
79
80 /* regular sort_iters */
81
82 typedef int (*sort_cmp_fn)(struct btree *,
83                            struct bkey_packed *,
84                            struct bkey_packed *);
85
86 static inline void __sort_iter_sift(struct sort_iter *iter,
87                                     unsigned from,
88                                     sort_cmp_fn cmp)
89 {
90         unsigned i;
91
92         for (i = from;
93              i + 1 < iter->used &&
94              cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
95              i++)
96                 swap(iter->data[i], iter->data[i + 1]);
97 }
98
99 static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
100 {
101
102         __sort_iter_sift(iter, 0, cmp);
103 }
104
105 static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
106 {
107         unsigned i = iter->used;
108
109         while (i--)
110                 __sort_iter_sift(iter, i, cmp);
111 }
112
113 static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
114 {
115         return iter->used ? iter->data->k : NULL;
116 }
117
118 static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
119 {
120         iter->data->k = bkey_next(iter->data->k);
121
122         BUG_ON(iter->data->k > iter->data->end);
123
124         if (iter->data->k == iter->data->end)
125                 array_remove_item(iter->data, iter->used, 0);
126         else
127                 sort_iter_sift(iter, cmp);
128 }
129
130 static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
131                                                  sort_cmp_fn cmp)
132 {
133         struct bkey_packed *ret = sort_iter_peek(iter);
134
135         if (ret)
136                 sort_iter_advance(iter, cmp);
137
138         return ret;
139 }
140
141 /*
142  * Returns true if l > r - unless l == r, in which case returns true if l is
143  * older than r.
144  *
145  * Necessary for btree_sort_fixup() - if there are multiple keys that compare
146  * equal in different sets, we have to process them newest to oldest.
147  */
148 #define key_sort_cmp(h, l, r)                                           \
149 ({                                                                      \
150         bkey_cmp_packed(b,                                              \
151                         __btree_node_offset_to_key(b, (l).k),           \
152                         __btree_node_offset_to_key(b, (r).k))           \
153                                                                         \
154         ?: (l).k - (r).k;                                               \
155 })
156
157 static inline bool should_drop_next_key(struct btree_node_iter_large *iter,
158                                         struct btree *b)
159 {
160         struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
161         struct bkey_packed *k = __btree_node_offset_to_key(b, l->k);
162
163         if (bkey_whiteout(k))
164                 return true;
165
166         if (iter->used < 2)
167                 return false;
168
169         if (iter->used > 2 &&
170             key_sort_cmp(iter, r[0], r[1]) >= 0)
171                 r++;
172
173         /*
174          * key_sort_cmp() ensures that when keys compare equal the older key
175          * comes first; so if l->k compares equal to r->k then l->k is older and
176          * should be dropped.
177          */
178         return !bkey_cmp_packed(b,
179                                 __btree_node_offset_to_key(b, l->k),
180                                 __btree_node_offset_to_key(b, r->k));
181 }
182
183 struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
184                                         struct btree *b,
185                                         struct btree_node_iter_large *iter)
186 {
187         struct bkey_packed *out = dst->start;
188         struct btree_nr_keys nr;
189
190         memset(&nr, 0, sizeof(nr));
191
192         heap_resort(iter, key_sort_cmp, NULL);
193
194         while (!bch2_btree_node_iter_large_end(iter)) {
195                 if (!should_drop_next_key(iter, b)) {
196                         struct bkey_packed *k =
197                                 __btree_node_offset_to_key(b, iter->data->k);
198
199                         bkey_copy(out, k);
200                         btree_keys_account_key_add(&nr, 0, out);
201                         out = bkey_next(out);
202                 }
203
204                 sort_key_next(iter, b, iter->data);
205                 heap_sift_down(iter, 0, key_sort_cmp, NULL);
206         }
207
208         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
209         return nr;
210 }
211
212 /*
213  * If keys compare equal, compare by pointer order:
214  *
215  * Necessary for sort_fix_overlapping() - if there are multiple keys that
216  * compare equal in different sets, we have to process them newest to oldest.
217  */
218 #define extent_sort_cmp(h, l, r)                                        \
219 ({                                                                      \
220         struct bkey _ul = bkey_unpack_key(b,                            \
221                                 __btree_node_offset_to_key(b, (l).k));  \
222         struct bkey _ur = bkey_unpack_key(b,                            \
223                                 __btree_node_offset_to_key(b, (r).k));  \
224                                                                         \
225         bkey_cmp(bkey_start_pos(&_ul),                                  \
226                  bkey_start_pos(&_ur)) ?: (r).k - (l).k;                \
227 })
228
229 static inline void extent_sort_sift(struct btree_node_iter_large *iter,
230                                     struct btree *b, size_t i)
231 {
232         heap_sift_down(iter, i, extent_sort_cmp, NULL);
233 }
234
235 static inline void extent_sort_next(struct btree_node_iter_large *iter,
236                                     struct btree *b,
237                                     struct btree_node_iter_set *i)
238 {
239         sort_key_next(iter, b, i);
240         heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
241 }
242
243 static void extent_sort_append(struct bch_fs *c,
244                                struct btree *b,
245                                struct btree_nr_keys *nr,
246                                struct bkey_packed *start,
247                                struct bkey_packed **prev,
248                                struct bkey_packed *k)
249 {
250         struct bkey_format *f = &b->format;
251         BKEY_PADDED(k) tmp;
252
253         if (bkey_whiteout(k))
254                 return;
255
256         bch2_bkey_unpack(b, &tmp.k, k);
257
258         if (*prev &&
259             bch2_bkey_merge(c, (void *) *prev, &tmp.k))
260                 return;
261
262         if (*prev) {
263                 bch2_bkey_pack(*prev, (void *) *prev, f);
264
265                 btree_keys_account_key_add(nr, 0, *prev);
266                 *prev = bkey_next(*prev);
267         } else {
268                 *prev = start;
269         }
270
271         bkey_copy(*prev, &tmp.k);
272 }
273
274 struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
275                                         struct bset *dst,
276                                         struct btree *b,
277                                         struct btree_node_iter_large *iter)
278 {
279         struct bkey_format *f = &b->format;
280         struct btree_node_iter_set *_l = iter->data, *_r;
281         struct bkey_packed *prev = NULL, *out, *lk, *rk;
282         struct bkey l_unpacked, r_unpacked;
283         struct bkey_s l, r;
284         struct btree_nr_keys nr;
285
286         memset(&nr, 0, sizeof(nr));
287
288         heap_resort(iter, extent_sort_cmp, NULL);
289
290         while (!bch2_btree_node_iter_large_end(iter)) {
291                 lk = __btree_node_offset_to_key(b, _l->k);
292
293                 if (iter->used == 1) {
294                         extent_sort_append(c, b, &nr, dst->start, &prev, lk);
295                         extent_sort_next(iter, b, _l);
296                         continue;
297                 }
298
299                 _r = iter->data + 1;
300                 if (iter->used > 2 &&
301                     extent_sort_cmp(iter, _r[0], _r[1]) >= 0)
302                         _r++;
303
304                 rk = __btree_node_offset_to_key(b, _r->k);
305
306                 l = __bkey_disassemble(b, lk, &l_unpacked);
307                 r = __bkey_disassemble(b, rk, &r_unpacked);
308
309                 /* If current key and next key don't overlap, just append */
310                 if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
311                         extent_sort_append(c, b, &nr, dst->start, &prev, lk);
312                         extent_sort_next(iter, b, _l);
313                         continue;
314                 }
315
316                 /* Skip 0 size keys */
317                 if (!r.k->size) {
318                         extent_sort_next(iter, b, _r);
319                         continue;
320                 }
321
322                 /*
323                  * overlap: keep the newer key and trim the older key so they
324                  * don't overlap. comparing pointers tells us which one is
325                  * newer, since the bsets are appended one after the other.
326                  */
327
328                 /* can't happen because of comparison func */
329                 BUG_ON(_l->k < _r->k &&
330                        !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
331
332                 if (_l->k > _r->k) {
333                         /* l wins, trim r */
334                         if (bkey_cmp(l.k->p, r.k->p) >= 0) {
335                                 sort_key_next(iter, b, _r);
336                         } else {
337                                 __bch2_cut_front(l.k->p, r);
338                                 extent_save(b, rk, r.k);
339                         }
340
341                         extent_sort_sift(iter, b, _r - iter->data);
342                 } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
343                         BKEY_PADDED(k) tmp;
344
345                         /*
346                          * r wins, but it overlaps in the middle of l - split l:
347                          */
348                         bkey_reassemble(&tmp.k, l.s_c);
349                         bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
350
351                         __bch2_cut_front(r.k->p, l);
352                         extent_save(b, lk, l.k);
353
354                         extent_sort_sift(iter, b, 0);
355
356                         extent_sort_append(c, b, &nr, dst->start, &prev,
357                                            bkey_to_packed(&tmp.k));
358                 } else {
359                         bch2_cut_back(bkey_start_pos(r.k), l.k);
360                         extent_save(b, lk, l.k);
361                 }
362         }
363
364         if (prev) {
365                 bch2_bkey_pack(prev, (void *) prev, f);
366                 btree_keys_account_key_add(&nr, 0, prev);
367                 out = bkey_next(prev);
368         } else {
369                 out = dst->start;
370         }
371
372         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
373         return nr;
374 }
375
376 /* Sort + repack in a new format: */
377 struct btree_nr_keys
378 bch2_sort_repack(struct bset *dst, struct btree *src,
379                  struct btree_node_iter *src_iter,
380                  struct bkey_format *out_f,
381                  bool filter_whiteouts)
382 {
383         struct bkey_format *in_f = &src->format;
384         struct bkey_packed *in, *out = vstruct_last(dst);
385         struct btree_nr_keys nr;
386
387         memset(&nr, 0, sizeof(nr));
388
389         while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
390                 if (filter_whiteouts && bkey_whiteout(in))
391                         continue;
392
393                 if (bch2_bkey_transform(out_f, out, bkey_packed(in)
394                                        ? in_f : &bch2_bkey_format_current, in))
395                         out->format = KEY_FORMAT_LOCAL_BTREE;
396                 else
397                         bch2_bkey_unpack(src, (void *) out, in);
398
399                 btree_keys_account_key_add(&nr, 0, out);
400                 out = bkey_next(out);
401         }
402
403         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
404         return nr;
405 }
406
407 /* Sort, repack, and merge: */
408 struct btree_nr_keys
409 bch2_sort_repack_merge(struct bch_fs *c,
410                        struct bset *dst, struct btree *src,
411                        struct btree_node_iter *iter,
412                        struct bkey_format *out_f,
413                        bool filter_whiteouts)
414 {
415         struct bkey_packed *k, *prev = NULL, *out;
416         struct btree_nr_keys nr;
417         BKEY_PADDED(k) tmp;
418
419         memset(&nr, 0, sizeof(nr));
420
421         while ((k = bch2_btree_node_iter_next_all(iter, src))) {
422                 if (filter_whiteouts && bkey_whiteout(k))
423                         continue;
424
425                 /*
426                  * The filter might modify pointers, so we have to unpack the
427                  * key and values to &tmp.k:
428                  */
429                 bch2_bkey_unpack(src, &tmp.k, k);
430
431                 if (filter_whiteouts &&
432                     bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k)))
433                         continue;
434
435                 /* prev is always unpacked, for key merging: */
436
437                 if (prev &&
438                     bch2_bkey_merge(c, (void *) prev, &tmp.k) ==
439                     BCH_MERGE_MERGE)
440                         continue;
441
442                 /*
443                  * the current key becomes the new prev: advance prev, then
444                  * copy the current key - but first pack prev (in place):
445                  */
446                 if (prev) {
447                         bch2_bkey_pack(prev, (void *) prev, out_f);
448
449                         btree_keys_account_key_add(&nr, 0, prev);
450                         prev = bkey_next(prev);
451                 } else {
452                         prev = vstruct_last(dst);
453                 }
454
455                 bkey_copy(prev, &tmp.k);
456         }
457
458         if (prev) {
459                 bch2_bkey_pack(prev, (void *) prev, out_f);
460                 btree_keys_account_key_add(&nr, 0, prev);
461                 out = bkey_next(prev);
462         } else {
463                 out = vstruct_last(dst);
464         }
465
466         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
467         return nr;
468 }
469
470 static inline int sort_keys_cmp(struct btree *b,
471                                 struct bkey_packed *l,
472                                 struct bkey_packed *r)
473 {
474         return bkey_cmp_packed(b, l, r) ?:
475                 (int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?:
476                 (int) l->needs_whiteout - (int) r->needs_whiteout;
477 }
478
479 unsigned bch2_sort_keys(struct bkey_packed *dst,
480                         struct sort_iter *iter,
481                         bool filter_whiteouts)
482 {
483         const struct bkey_format *f = &iter->b->format;
484         struct bkey_packed *in, *next, *out = dst;
485
486         sort_iter_sort(iter, sort_keys_cmp);
487
488         while ((in = sort_iter_next(iter, sort_keys_cmp))) {
489                 if (bkey_whiteout(in) &&
490                     (filter_whiteouts || !in->needs_whiteout))
491                         continue;
492
493                 if (bkey_whiteout(in) &&
494                     (next = sort_iter_peek(iter)) &&
495                     !bkey_cmp_packed(iter->b, in, next)) {
496                         BUG_ON(in->needs_whiteout &&
497                                next->needs_whiteout);
498                         /*
499                          * XXX racy, called with read lock from write path
500                          *
501                          * leads to spurious BUG_ON() in bkey_unpack_key() in
502                          * debug mode
503                          */
504                         next->needs_whiteout |= in->needs_whiteout;
505                         continue;
506                 }
507
508                 if (bkey_whiteout(in)) {
509                         memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
510                         set_bkeyp_val_u64s(f, out, 0);
511                 } else {
512                         bkey_copy(out, in);
513                 }
514                 out = bkey_next(out);
515         }
516
517         return (u64 *) out - (u64 *) dst;
518 }
519
520 static inline int sort_extents_cmp(struct btree *b,
521                                    struct bkey_packed *l,
522                                    struct bkey_packed *r)
523 {
524         return bkey_cmp_packed(b, l, r) ?:
525                 (int) bkey_deleted(l) - (int) bkey_deleted(r);
526 }
527
528 unsigned bch2_sort_extents(struct bkey_packed *dst,
529                            struct sort_iter *iter,
530                            bool filter_whiteouts)
531 {
532         struct bkey_packed *in, *out = dst;
533
534         sort_iter_sort(iter, sort_extents_cmp);
535
536         while ((in = sort_iter_next(iter, sort_extents_cmp))) {
537                 if (bkey_deleted(in))
538                         continue;
539
540                 if (bkey_whiteout(in) &&
541                     (filter_whiteouts || !in->needs_whiteout))
542                         continue;
543
544                 bkey_copy(out, in);
545                 out = bkey_next(out);
546         }
547
548         return (u64 *) out - (u64 *) dst;
549 }
550
551 static inline int sort_key_whiteouts_cmp(struct btree *b,
552                                          struct bkey_packed *l,
553                                          struct bkey_packed *r)
554 {
555         return bkey_cmp_packed(b, l, r);
556 }
557
558 unsigned bch2_sort_key_whiteouts(struct bkey_packed *dst,
559                                  struct sort_iter *iter)
560 {
561         struct bkey_packed *in, *out = dst;
562
563         sort_iter_sort(iter, sort_key_whiteouts_cmp);
564
565         while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) {
566                 bkey_copy(out, in);
567                 out = bkey_next(out);
568         }
569
570         return (u64 *) out - (u64 *) dst;
571 }
572
573 static inline int sort_extent_whiteouts_cmp(struct btree *b,
574                                             struct bkey_packed *l,
575                                             struct bkey_packed *r)
576 {
577         struct bkey ul = bkey_unpack_key(b, l);
578         struct bkey ur = bkey_unpack_key(b, r);
579
580         return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
581 }
582
583 unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
584                                     struct sort_iter *iter)
585 {
586         const struct bkey_format *f = &iter->b->format;
587         struct bkey_packed *in, *out = dst;
588         struct bkey_i l, r;
589         bool prev = false, l_packed = false;
590         u64 max_packed_size     = bkey_field_max(f, BKEY_FIELD_SIZE);
591         u64 max_packed_offset   = bkey_field_max(f, BKEY_FIELD_OFFSET);
592         u64 new_size;
593
594         max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
595
596         sort_iter_sort(iter, sort_extent_whiteouts_cmp);
597
598         while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
599                 if (bkey_deleted(in))
600                         continue;
601
602                 EBUG_ON(bkeyp_val_u64s(f, in));
603                 EBUG_ON(in->type != KEY_TYPE_discard);
604
605                 r.k = bkey_unpack_key(iter->b, in);
606
607                 if (prev &&
608                     bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
609                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
610                                 continue;
611
612                         new_size = l_packed
613                                 ? min(max_packed_size, max_packed_offset -
614                                       bkey_start_offset(&l.k))
615                                 : KEY_SIZE_MAX;
616
617                         new_size = min(new_size, r.k.p.offset -
618                                        bkey_start_offset(&l.k));
619
620                         BUG_ON(new_size < l.k.size);
621
622                         bch2_key_resize(&l.k, new_size);
623
624                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
625                                 continue;
626
627                         bch2_cut_front(l.k.p, &r);
628                 }
629
630                 if (prev) {
631                         if (!bch2_bkey_pack(out, &l, f)) {
632                                 BUG_ON(l_packed);
633                                 bkey_copy(out, &l);
634                         }
635                         out = bkey_next(out);
636                 }
637
638                 l = r;
639                 prev = true;
640                 l_packed = bkey_packed(in);
641         }
642
643         if (prev) {
644                 if (!bch2_bkey_pack(out, &l, f)) {
645                         BUG_ON(l_packed);
646                         bkey_copy(out, &l);
647                 }
648                 out = bkey_next(out);
649         }
650
651         return (u64 *) out - (u64 *) dst;
652 }