]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/bkey_sort.c
Update bcachefs sources to b1a4dc53be bcachefs: Set lost+found mode to 0700
[bcachefs-tools-debian] / libbcachefs / bkey_sort.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bkey_sort.h"
4 #include "bset.h"
5 #include "extents.h"
6
7 /* too many iterators, need to clean this up */
8
9 /* btree_node_iter_large: */
10
11 #define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r)
12
13 static inline bool
14 bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter)
15 {
16         return !iter->used;
17 }
18
19 static inline struct bkey_packed *
20 bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter,
21                                     struct btree *b)
22 {
23         return bch2_btree_node_iter_large_end(iter)
24                 ? NULL
25                 : __btree_node_offset_to_key(b, iter->data->k);
26 }
27
28 static void
29 bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
30                                    struct btree *b)
31 {
32         iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s;
33
34         EBUG_ON(!iter->used);
35         EBUG_ON(iter->data->k > iter->data->end);
36
37         if (iter->data->k == iter->data->end)
38                 heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
39         else
40                 heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
41 }
42
43 static inline struct bkey_packed *
44 bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter,
45                                     struct btree *b)
46 {
47         struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b);
48
49         if (ret)
50                 bch2_btree_node_iter_large_advance(iter, b);
51
52         return ret;
53 }
54
55 void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
56                                      struct btree *b,
57                                      const struct bkey_packed *k,
58                                      const struct bkey_packed *end)
59 {
60         if (k != end) {
61                 struct btree_node_iter_set n =
62                         ((struct btree_node_iter_set) {
63                                  __btree_node_key_to_offset(b, k),
64                                  __btree_node_key_to_offset(b, end)
65                          });
66
67                 __heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
68         }
69 }
70
71 static void sort_key_next(struct btree_node_iter_large *iter,
72                           struct btree *b,
73                           struct btree_node_iter_set *i)
74 {
75         i->k += __btree_node_offset_to_key(b, i->k)->u64s;
76
77         if (i->k == i->end)
78                 *i = iter->data[--iter->used];
79 }
80
81 /* regular sort_iters */
82
83 typedef int (*sort_cmp_fn)(struct btree *,
84                            struct bkey_packed *,
85                            struct bkey_packed *);
86
87 static inline void __sort_iter_sift(struct sort_iter *iter,
88                                     unsigned from,
89                                     sort_cmp_fn cmp)
90 {
91         unsigned i;
92
93         for (i = from;
94              i + 1 < iter->used &&
95              cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
96              i++)
97                 swap(iter->data[i], iter->data[i + 1]);
98 }
99
100 static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
101 {
102
103         __sort_iter_sift(iter, 0, cmp);
104 }
105
106 static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
107 {
108         unsigned i = iter->used;
109
110         while (i--)
111                 __sort_iter_sift(iter, i, cmp);
112 }
113
114 static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
115 {
116         return iter->used ? iter->data->k : NULL;
117 }
118
119 static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
120 {
121         iter->data->k = bkey_next(iter->data->k);
122
123         BUG_ON(iter->data->k > iter->data->end);
124
125         if (iter->data->k == iter->data->end)
126                 array_remove_item(iter->data, iter->used, 0);
127         else
128                 sort_iter_sift(iter, cmp);
129 }
130
131 static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
132                                                  sort_cmp_fn cmp)
133 {
134         struct bkey_packed *ret = sort_iter_peek(iter);
135
136         if (ret)
137                 sort_iter_advance(iter, cmp);
138
139         return ret;
140 }
141
142 /*
143  * Returns true if l > r - unless l == r, in which case returns true if l is
144  * older than r.
145  *
146  * Necessary for btree_sort_fixup() - if there are multiple keys that compare
147  * equal in different sets, we have to process them newest to oldest.
148  */
149 #define key_sort_cmp(h, l, r)                                           \
150 ({                                                                      \
151         bkey_cmp_packed(b,                                              \
152                         __btree_node_offset_to_key(b, (l).k),           \
153                         __btree_node_offset_to_key(b, (r).k))           \
154                                                                         \
155         ?: (l).k - (r).k;                                               \
156 })
157
158 static inline bool should_drop_next_key(struct btree_node_iter_large *iter,
159                                         struct btree *b)
160 {
161         struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
162         struct bkey_packed *k = __btree_node_offset_to_key(b, l->k);
163
164         if (bkey_whiteout(k))
165                 return true;
166
167         if (iter->used < 2)
168                 return false;
169
170         if (iter->used > 2 &&
171             key_sort_cmp(iter, r[0], r[1]) >= 0)
172                 r++;
173
174         /*
175          * key_sort_cmp() ensures that when keys compare equal the older key
176          * comes first; so if l->k compares equal to r->k then l->k is older and
177          * should be dropped.
178          */
179         return !bkey_cmp_packed(b,
180                                 __btree_node_offset_to_key(b, l->k),
181                                 __btree_node_offset_to_key(b, r->k));
182 }
183
184 struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
185                                         struct btree *b,
186                                         struct btree_node_iter_large *iter)
187 {
188         struct bkey_packed *out = dst->start;
189         struct btree_nr_keys nr;
190
191         memset(&nr, 0, sizeof(nr));
192
193         heap_resort(iter, key_sort_cmp, NULL);
194
195         while (!bch2_btree_node_iter_large_end(iter)) {
196                 if (!should_drop_next_key(iter, b)) {
197                         struct bkey_packed *k =
198                                 __btree_node_offset_to_key(b, iter->data->k);
199
200                         bkey_copy(out, k);
201                         btree_keys_account_key_add(&nr, 0, out);
202                         out = bkey_next(out);
203                 }
204
205                 sort_key_next(iter, b, iter->data);
206                 heap_sift_down(iter, 0, key_sort_cmp, NULL);
207         }
208
209         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
210         return nr;
211 }
212
213 /*
214  * If keys compare equal, compare by pointer order:
215  *
216  * Necessary for sort_fix_overlapping() - if there are multiple keys that
217  * compare equal in different sets, we have to process them newest to oldest.
218  */
219 #define extent_sort_cmp(h, l, r)                                        \
220 ({                                                                      \
221         struct bkey _ul = bkey_unpack_key(b,                            \
222                                 __btree_node_offset_to_key(b, (l).k));  \
223         struct bkey _ur = bkey_unpack_key(b,                            \
224                                 __btree_node_offset_to_key(b, (r).k));  \
225                                                                         \
226         bkey_cmp(bkey_start_pos(&_ul),                                  \
227                  bkey_start_pos(&_ur)) ?: (r).k - (l).k;                \
228 })
229
230 static inline void extent_sort_sift(struct btree_node_iter_large *iter,
231                                     struct btree *b, size_t i)
232 {
233         heap_sift_down(iter, i, extent_sort_cmp, NULL);
234 }
235
236 static inline void extent_sort_next(struct btree_node_iter_large *iter,
237                                     struct btree *b,
238                                     struct btree_node_iter_set *i)
239 {
240         sort_key_next(iter, b, i);
241         heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
242 }
243
244 static void extent_sort_advance_prev(struct bkey_format *f,
245                                      struct btree_nr_keys *nr,
246                                      struct bkey_packed *start,
247                                      struct bkey_packed **prev)
248 {
249         if (*prev) {
250                 bch2_bkey_pack(*prev, (void *) *prev, f);
251
252                 btree_keys_account_key_add(nr, 0, *prev);
253                 *prev = bkey_next(*prev);
254         } else {
255                 *prev = start;
256         }
257 }
258
259 static void extent_sort_append(struct bch_fs *c,
260                                struct bkey_format *f,
261                                struct btree_nr_keys *nr,
262                                struct bkey_packed *start,
263                                struct bkey_packed **prev,
264                                struct bkey_s k)
265 {
266         if (bkey_whiteout(k.k))
267                 return;
268
269         /*
270          * prev is always unpacked, for key merging - until right before we
271          * advance it:
272          */
273
274         if (*prev &&
275             bch2_bkey_merge(c, bkey_i_to_s((void *) *prev), k) ==
276             BCH_MERGE_MERGE)
277                 return;
278
279         extent_sort_advance_prev(f, nr, start, prev);
280
281         bkey_reassemble((void *) *prev, k.s_c);
282 }
283
284 struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
285                                         struct bset *dst,
286                                         struct btree *b,
287                                         struct btree_node_iter_large *iter)
288 {
289         struct bkey_format *f = &b->format;
290         struct btree_node_iter_set *_l = iter->data, *_r;
291         struct bkey_packed *prev = NULL, *lk, *rk;
292         struct bkey l_unpacked, r_unpacked;
293         struct bkey_s l, r;
294         struct btree_nr_keys nr;
295
296         memset(&nr, 0, sizeof(nr));
297
298         heap_resort(iter, extent_sort_cmp, NULL);
299
300         while (!bch2_btree_node_iter_large_end(iter)) {
301                 lk = __btree_node_offset_to_key(b, _l->k);
302                 l = __bkey_disassemble(b, lk, &l_unpacked);
303
304                 if (iter->used == 1) {
305                         extent_sort_append(c, f, &nr, dst->start, &prev, l);
306                         extent_sort_next(iter, b, _l);
307                         continue;
308                 }
309
310                 _r = iter->data + 1;
311                 if (iter->used > 2 &&
312                     extent_sort_cmp(iter, _r[0], _r[1]) >= 0)
313                         _r++;
314
315                 rk = __btree_node_offset_to_key(b, _r->k);
316                 r = __bkey_disassemble(b, rk, &r_unpacked);
317
318                 /* If current key and next key don't overlap, just append */
319                 if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
320                         extent_sort_append(c, f, &nr, dst->start, &prev, l);
321                         extent_sort_next(iter, b, _l);
322                         continue;
323                 }
324
325                 /* Skip 0 size keys */
326                 if (!r.k->size) {
327                         extent_sort_next(iter, b, _r);
328                         continue;
329                 }
330
331                 /*
332                  * overlap: keep the newer key and trim the older key so they
333                  * don't overlap. comparing pointers tells us which one is
334                  * newer, since the bsets are appended one after the other.
335                  */
336
337                 /* can't happen because of comparison func */
338                 BUG_ON(_l->k < _r->k &&
339                        !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
340
341                 if (_l->k > _r->k) {
342                         /* l wins, trim r */
343                         if (bkey_cmp(l.k->p, r.k->p) >= 0) {
344                                 sort_key_next(iter, b, _r);
345                         } else {
346                                 __bch2_cut_front(l.k->p, r);
347                                 extent_save(b, rk, r.k);
348                         }
349
350                         extent_sort_sift(iter, b, _r - iter->data);
351                 } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
352                         BKEY_PADDED(k) tmp;
353
354                         /*
355                          * r wins, but it overlaps in the middle of l - split l:
356                          */
357                         bkey_reassemble(&tmp.k, l.s_c);
358                         bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
359
360                         __bch2_cut_front(r.k->p, l);
361                         extent_save(b, lk, l.k);
362
363                         extent_sort_sift(iter, b, 0);
364
365                         extent_sort_append(c, f, &nr, dst->start,
366                                            &prev, bkey_i_to_s(&tmp.k));
367                 } else {
368                         bch2_cut_back(bkey_start_pos(r.k), l.k);
369                         extent_save(b, lk, l.k);
370                 }
371         }
372
373         extent_sort_advance_prev(f, &nr, dst->start, &prev);
374
375         dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
376         return nr;
377 }
378
379 /* Sort + repack in a new format: */
380 struct btree_nr_keys
381 bch2_sort_repack(struct bset *dst, struct btree *src,
382                  struct btree_node_iter *src_iter,
383                  struct bkey_format *out_f,
384                  bool filter_whiteouts)
385 {
386         struct bkey_format *in_f = &src->format;
387         struct bkey_packed *in, *out = vstruct_last(dst);
388         struct btree_nr_keys nr;
389
390         memset(&nr, 0, sizeof(nr));
391
392         while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
393                 if (filter_whiteouts && bkey_whiteout(in))
394                         continue;
395
396                 if (bch2_bkey_transform(out_f, out, bkey_packed(in)
397                                        ? in_f : &bch2_bkey_format_current, in))
398                         out->format = KEY_FORMAT_LOCAL_BTREE;
399                 else
400                         bch2_bkey_unpack(src, (void *) out, in);
401
402                 btree_keys_account_key_add(&nr, 0, out);
403                 out = bkey_next(out);
404         }
405
406         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
407         return nr;
408 }
409
410 /* Sort, repack, and merge: */
411 struct btree_nr_keys
412 bch2_sort_repack_merge(struct bch_fs *c,
413                        struct bset *dst, struct btree *src,
414                        struct btree_node_iter *iter,
415                        struct bkey_format *out_f,
416                        bool filter_whiteouts)
417 {
418         struct bkey_packed *prev = NULL, *k_packed;
419         struct bkey_s k;
420         struct btree_nr_keys nr;
421         struct bkey unpacked;
422
423         memset(&nr, 0, sizeof(nr));
424
425         while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
426                 if (filter_whiteouts && bkey_whiteout(k_packed))
427                         continue;
428
429                 k = __bkey_disassemble(src, k_packed, &unpacked);
430
431                 if (filter_whiteouts &&
432                     bch2_bkey_normalize(c, k))
433                         continue;
434
435                 extent_sort_append(c, out_f, &nr, vstruct_last(dst), &prev, k);
436         }
437
438         extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
439
440         dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
441         return nr;
442 }
443
444 static inline int sort_keys_cmp(struct btree *b,
445                                 struct bkey_packed *l,
446                                 struct bkey_packed *r)
447 {
448         return bkey_cmp_packed(b, l, r) ?:
449                 (int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?:
450                 (int) l->needs_whiteout - (int) r->needs_whiteout;
451 }
452
453 unsigned bch2_sort_keys(struct bkey_packed *dst,
454                         struct sort_iter *iter,
455                         bool filter_whiteouts)
456 {
457         const struct bkey_format *f = &iter->b->format;
458         struct bkey_packed *in, *next, *out = dst;
459
460         sort_iter_sort(iter, sort_keys_cmp);
461
462         while ((in = sort_iter_next(iter, sort_keys_cmp))) {
463                 if (bkey_whiteout(in) &&
464                     (filter_whiteouts || !in->needs_whiteout))
465                         continue;
466
467                 if (bkey_whiteout(in) &&
468                     (next = sort_iter_peek(iter)) &&
469                     !bkey_cmp_packed(iter->b, in, next)) {
470                         BUG_ON(in->needs_whiteout &&
471                                next->needs_whiteout);
472                         /*
473                          * XXX racy, called with read lock from write path
474                          *
475                          * leads to spurious BUG_ON() in bkey_unpack_key() in
476                          * debug mode
477                          */
478                         next->needs_whiteout |= in->needs_whiteout;
479                         continue;
480                 }
481
482                 if (bkey_whiteout(in)) {
483                         memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
484                         set_bkeyp_val_u64s(f, out, 0);
485                 } else {
486                         bkey_copy(out, in);
487                 }
488                 out = bkey_next(out);
489         }
490
491         return (u64 *) out - (u64 *) dst;
492 }
493
494 static inline int sort_extents_cmp(struct btree *b,
495                                    struct bkey_packed *l,
496                                    struct bkey_packed *r)
497 {
498         return bkey_cmp_packed(b, l, r) ?:
499                 (int) bkey_deleted(l) - (int) bkey_deleted(r);
500 }
501
502 unsigned bch2_sort_extents(struct bkey_packed *dst,
503                            struct sort_iter *iter,
504                            bool filter_whiteouts)
505 {
506         struct bkey_packed *in, *out = dst;
507
508         sort_iter_sort(iter, sort_extents_cmp);
509
510         while ((in = sort_iter_next(iter, sort_extents_cmp))) {
511                 if (bkey_deleted(in))
512                         continue;
513
514                 if (bkey_whiteout(in) &&
515                     (filter_whiteouts || !in->needs_whiteout))
516                         continue;
517
518                 bkey_copy(out, in);
519                 out = bkey_next(out);
520         }
521
522         return (u64 *) out - (u64 *) dst;
523 }
524
525 static inline int sort_key_whiteouts_cmp(struct btree *b,
526                                          struct bkey_packed *l,
527                                          struct bkey_packed *r)
528 {
529         return bkey_cmp_packed(b, l, r);
530 }
531
532 unsigned bch2_sort_key_whiteouts(struct bkey_packed *dst,
533                                  struct sort_iter *iter)
534 {
535         struct bkey_packed *in, *out = dst;
536
537         sort_iter_sort(iter, sort_key_whiteouts_cmp);
538
539         while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) {
540                 bkey_copy(out, in);
541                 out = bkey_next(out);
542         }
543
544         return (u64 *) out - (u64 *) dst;
545 }
546
547 static inline int sort_extent_whiteouts_cmp(struct btree *b,
548                                             struct bkey_packed *l,
549                                             struct bkey_packed *r)
550 {
551         struct bkey ul = bkey_unpack_key(b, l);
552         struct bkey ur = bkey_unpack_key(b, r);
553
554         return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
555 }
556
557 unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
558                                     struct sort_iter *iter)
559 {
560         const struct bkey_format *f = &iter->b->format;
561         struct bkey_packed *in, *out = dst;
562         struct bkey_i l, r;
563         bool prev = false, l_packed = false;
564         u64 max_packed_size     = bkey_field_max(f, BKEY_FIELD_SIZE);
565         u64 max_packed_offset   = bkey_field_max(f, BKEY_FIELD_OFFSET);
566         u64 new_size;
567
568         max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
569
570         sort_iter_sort(iter, sort_extent_whiteouts_cmp);
571
572         while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
573                 if (bkey_deleted(in))
574                         continue;
575
576                 EBUG_ON(bkeyp_val_u64s(f, in));
577                 EBUG_ON(in->type != KEY_TYPE_discard);
578
579                 r.k = bkey_unpack_key(iter->b, in);
580
581                 if (prev &&
582                     bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
583                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
584                                 continue;
585
586                         new_size = l_packed
587                                 ? min(max_packed_size, max_packed_offset -
588                                       bkey_start_offset(&l.k))
589                                 : KEY_SIZE_MAX;
590
591                         new_size = min(new_size, r.k.p.offset -
592                                        bkey_start_offset(&l.k));
593
594                         BUG_ON(new_size < l.k.size);
595
596                         bch2_key_resize(&l.k, new_size);
597
598                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
599                                 continue;
600
601                         bch2_cut_front(l.k.p, &r);
602                 }
603
604                 if (prev) {
605                         if (!bch2_bkey_pack(out, &l, f)) {
606                                 BUG_ON(l_packed);
607                                 bkey_copy(out, &l);
608                         }
609                         out = bkey_next(out);
610                 }
611
612                 l = r;
613                 prev = true;
614                 l_packed = bkey_packed(in);
615         }
616
617         if (prev) {
618                 if (!bch2_bkey_pack(out, &l, f)) {
619                         BUG_ON(l_packed);
620                         bkey_copy(out, &l);
621                 }
622                 out = bkey_next(out);
623         }
624
625         return (u64 *) out - (u64 *) dst;
626 }