]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/bkey_sort.c
Merge pull request #24 from brendon-boldt/new-install-distros
[bcachefs-tools-debian] / libbcachefs / bkey_sort.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bkey_on_stack.h"
4 #include "bkey_sort.h"
5 #include "bset.h"
6 #include "extents.h"
7
8 typedef int (*sort_cmp_fn)(struct btree *,
9                            struct bkey_packed *,
10                            struct bkey_packed *);
11
12 static inline bool sort_iter_end(struct sort_iter *iter)
13 {
14         return !iter->used;
15 }
16
17 static inline void __sort_iter_sift(struct sort_iter *iter,
18                                     unsigned from,
19                                     sort_cmp_fn cmp)
20 {
21         unsigned i;
22
23         for (i = from;
24              i + 1 < iter->used &&
25              cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
26              i++)
27                 swap(iter->data[i], iter->data[i + 1]);
28 }
29
30 static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
31 {
32
33         __sort_iter_sift(iter, 0, cmp);
34 }
35
36 static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
37 {
38         unsigned i = iter->used;
39
40         while (i--)
41                 __sort_iter_sift(iter, i, cmp);
42 }
43
44 static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
45 {
46         return !sort_iter_end(iter) ? iter->data->k : NULL;
47 }
48
49 static inline void __sort_iter_advance(struct sort_iter *iter,
50                                        unsigned idx, sort_cmp_fn cmp)
51 {
52         struct sort_iter_set *i = iter->data + idx;
53
54         BUG_ON(idx >= iter->used);
55
56         i->k = bkey_next_skip_noops(i->k, i->end);
57
58         BUG_ON(i->k > i->end);
59
60         if (i->k == i->end)
61                 array_remove_item(iter->data, iter->used, idx);
62         else
63                 __sort_iter_sift(iter, idx, cmp);
64 }
65
66 static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
67 {
68         __sort_iter_advance(iter, 0, cmp);
69 }
70
71 static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
72                                                  sort_cmp_fn cmp)
73 {
74         struct bkey_packed *ret = sort_iter_peek(iter);
75
76         if (ret)
77                 sort_iter_advance(iter, cmp);
78
79         return ret;
80 }
81
82 /*
83  * If keys compare equal, compare by pointer order:
84  */
85 static inline int key_sort_fix_overlapping_cmp(struct btree *b,
86                                                struct bkey_packed *l,
87                                                struct bkey_packed *r)
88 {
89         return bkey_cmp_packed(b, l, r) ?:
90                 cmp_int((unsigned long) l, (unsigned long) r);
91 }
92
93 static inline bool should_drop_next_key(struct sort_iter *iter)
94 {
95         /*
96          * key_sort_cmp() ensures that when keys compare equal the older key
97          * comes first; so if l->k compares equal to r->k then l->k is older
98          * and should be dropped.
99          */
100         return iter->used >= 2 &&
101                 !bkey_cmp_packed(iter->b,
102                                  iter->data[0].k,
103                                  iter->data[1].k);
104 }
105
106 struct btree_nr_keys
107 bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
108                               struct sort_iter *iter)
109 {
110         struct bkey_packed *out = dst->start;
111         struct bkey_packed *k;
112         struct btree_nr_keys nr;
113
114         memset(&nr, 0, sizeof(nr));
115
116         sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
117
118         while ((k = sort_iter_peek(iter))) {
119                 if (!bkey_whiteout(k) &&
120                     !should_drop_next_key(iter)) {
121                         bkey_copy(out, k);
122                         btree_keys_account_key_add(&nr, 0, out);
123                         out = bkey_next(out);
124                 }
125
126                 sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
127         }
128
129         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
130         return nr;
131 }
132
133 /*
134  * If keys compare equal, compare by pointer order:
135  *
136  * Necessary for sort_fix_overlapping() - if there are multiple keys that
137  * compare equal in different sets, we have to process them newest to oldest.
138  */
139 static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
140                                                   struct bkey_packed *l,
141                                                   struct bkey_packed *r)
142 {
143         struct bkey ul = bkey_unpack_key(b, l);
144         struct bkey ur = bkey_unpack_key(b, r);
145
146         return bkey_cmp(bkey_start_pos(&ul),
147                         bkey_start_pos(&ur)) ?:
148                 cmp_int((unsigned long) r, (unsigned long) l);
149 }
150
151 static void extent_sort_advance_prev(struct bkey_format *f,
152                                      struct btree_nr_keys *nr,
153                                      struct bkey_packed *start,
154                                      struct bkey_packed **prev)
155 {
156         if (*prev) {
157                 bch2_bkey_pack(*prev, (void *) *prev, f);
158
159                 btree_keys_account_key_add(nr, 0, *prev);
160                 *prev = bkey_next(*prev);
161         } else {
162                 *prev = start;
163         }
164 }
165
166 static void extent_sort_append(struct bch_fs *c,
167                                struct bkey_format *f,
168                                struct btree_nr_keys *nr,
169                                struct bkey_packed *start,
170                                struct bkey_packed **prev,
171                                struct bkey_s k)
172 {
173         if (bkey_whiteout(k.k))
174                 return;
175
176         /*
177          * prev is always unpacked, for key merging - until right before we
178          * advance it:
179          */
180
181         if (*prev &&
182             bch2_bkey_merge(c, bkey_i_to_s((void *) *prev), k) ==
183             BCH_MERGE_MERGE)
184                 return;
185
186         extent_sort_advance_prev(f, nr, start, prev);
187
188         bkey_reassemble((void *) *prev, k.s_c);
189 }
190
191 struct btree_nr_keys
192 bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
193                                  struct sort_iter *iter)
194 {
195         struct btree *b = iter->b;
196         struct bkey_format *f = &b->format;
197         struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
198         struct bkey_packed *prev = NULL;
199         struct bkey l_unpacked, r_unpacked;
200         struct bkey_s l, r;
201         struct btree_nr_keys nr;
202         struct bkey_on_stack split;
203
204         memset(&nr, 0, sizeof(nr));
205         bkey_on_stack_init(&split);
206
207         sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
208
209         while (!sort_iter_end(iter)) {
210                 l = __bkey_disassemble(b, _l->k, &l_unpacked);
211
212                 if (iter->used == 1) {
213                         extent_sort_append(c, f, &nr, dst->start, &prev, l);
214                         sort_iter_advance(iter,
215                                           extent_sort_fix_overlapping_cmp);
216                         continue;
217                 }
218
219                 r = __bkey_disassemble(b, _r->k, &r_unpacked);
220
221                 /* If current key and next key don't overlap, just append */
222                 if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
223                         extent_sort_append(c, f, &nr, dst->start, &prev, l);
224                         sort_iter_advance(iter,
225                                           extent_sort_fix_overlapping_cmp);
226                         continue;
227                 }
228
229                 /* Skip 0 size keys */
230                 if (!r.k->size) {
231                         __sort_iter_advance(iter, 1,
232                                             extent_sort_fix_overlapping_cmp);
233                         continue;
234                 }
235
236                 /*
237                  * overlap: keep the newer key and trim the older key so they
238                  * don't overlap. comparing pointers tells us which one is
239                  * newer, since the bsets are appended one after the other.
240                  */
241
242                 /* can't happen because of comparison func */
243                 BUG_ON(_l->k < _r->k &&
244                        !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
245
246                 if (_l->k > _r->k) {
247                         /* l wins, trim r */
248                         if (bkey_cmp(l.k->p, r.k->p) >= 0) {
249                                 __sort_iter_advance(iter, 1,
250                                          extent_sort_fix_overlapping_cmp);
251                         } else {
252                                 bch2_cut_front_s(l.k->p, r);
253                                 extent_save(b, _r->k, r.k);
254                                 __sort_iter_sift(iter, 1,
255                                          extent_sort_fix_overlapping_cmp);
256                         }
257                 } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
258
259                         /*
260                          * r wins, but it overlaps in the middle of l - split l:
261                          */
262                         bkey_on_stack_reassemble(&split, c, l.s_c);
263                         bch2_cut_back(bkey_start_pos(r.k), split.k);
264
265                         bch2_cut_front_s(r.k->p, l);
266                         extent_save(b, _l->k, l.k);
267
268                         __sort_iter_sift(iter, 0,
269                                          extent_sort_fix_overlapping_cmp);
270
271                         extent_sort_append(c, f, &nr, dst->start,
272                                            &prev, bkey_i_to_s(split.k));
273                 } else {
274                         bch2_cut_back_s(bkey_start_pos(r.k), l);
275                         extent_save(b, _l->k, l.k);
276                 }
277         }
278
279         extent_sort_advance_prev(f, &nr, dst->start, &prev);
280
281         dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
282
283         bkey_on_stack_exit(&split, c);
284         return nr;
285 }
286
287 /* Sort + repack in a new format: */
288 struct btree_nr_keys
289 bch2_sort_repack(struct bset *dst, struct btree *src,
290                  struct btree_node_iter *src_iter,
291                  struct bkey_format *out_f,
292                  bool filter_whiteouts)
293 {
294         struct bkey_format *in_f = &src->format;
295         struct bkey_packed *in, *out = vstruct_last(dst);
296         struct btree_nr_keys nr;
297
298         memset(&nr, 0, sizeof(nr));
299
300         while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
301                 if (filter_whiteouts && bkey_whiteout(in))
302                         continue;
303
304                 if (bch2_bkey_transform(out_f, out, bkey_packed(in)
305                                        ? in_f : &bch2_bkey_format_current, in))
306                         out->format = KEY_FORMAT_LOCAL_BTREE;
307                 else
308                         bch2_bkey_unpack(src, (void *) out, in);
309
310                 btree_keys_account_key_add(&nr, 0, out);
311                 out = bkey_next(out);
312         }
313
314         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
315         return nr;
316 }
317
318 /* Sort, repack, and merge: */
319 struct btree_nr_keys
320 bch2_sort_repack_merge(struct bch_fs *c,
321                        struct bset *dst, struct btree *src,
322                        struct btree_node_iter *iter,
323                        struct bkey_format *out_f,
324                        bool filter_whiteouts)
325 {
326         struct bkey_packed *prev = NULL, *k_packed;
327         struct bkey_s k;
328         struct btree_nr_keys nr;
329         struct bkey unpacked;
330
331         memset(&nr, 0, sizeof(nr));
332
333         while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
334                 if (filter_whiteouts && bkey_whiteout(k_packed))
335                         continue;
336
337                 k = __bkey_disassemble(src, k_packed, &unpacked);
338
339                 if (filter_whiteouts &&
340                     bch2_bkey_normalize(c, k))
341                         continue;
342
343                 extent_sort_append(c, out_f, &nr, vstruct_last(dst), &prev, k);
344         }
345
346         extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
347
348         dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
349         return nr;
350 }
351
352 static inline int sort_keys_cmp(struct btree *b,
353                                 struct bkey_packed *l,
354                                 struct bkey_packed *r)
355 {
356         return bkey_cmp_packed(b, l, r) ?:
357                 (int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?:
358                 (int) l->needs_whiteout - (int) r->needs_whiteout;
359 }
360
361 unsigned bch2_sort_keys(struct bkey_packed *dst,
362                         struct sort_iter *iter,
363                         bool filter_whiteouts)
364 {
365         const struct bkey_format *f = &iter->b->format;
366         struct bkey_packed *in, *next, *out = dst;
367
368         sort_iter_sort(iter, sort_keys_cmp);
369
370         while ((in = sort_iter_next(iter, sort_keys_cmp))) {
371                 if (bkey_whiteout(in) &&
372                     (filter_whiteouts || !in->needs_whiteout))
373                         continue;
374
375                 if (bkey_whiteout(in) &&
376                     (next = sort_iter_peek(iter)) &&
377                     !bkey_cmp_packed(iter->b, in, next)) {
378                         BUG_ON(in->needs_whiteout &&
379                                next->needs_whiteout);
380                         /*
381                          * XXX racy, called with read lock from write path
382                          *
383                          * leads to spurious BUG_ON() in bkey_unpack_key() in
384                          * debug mode
385                          */
386                         next->needs_whiteout |= in->needs_whiteout;
387                         continue;
388                 }
389
390                 if (bkey_whiteout(in)) {
391                         memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
392                         set_bkeyp_val_u64s(f, out, 0);
393                 } else {
394                         bkey_copy(out, in);
395                 }
396                 out = bkey_next(out);
397         }
398
399         return (u64 *) out - (u64 *) dst;
400 }
401
402 static inline int sort_extents_cmp(struct btree *b,
403                                    struct bkey_packed *l,
404                                    struct bkey_packed *r)
405 {
406         return bkey_cmp_packed(b, l, r) ?:
407                 (int) bkey_deleted(l) - (int) bkey_deleted(r);
408 }
409
410 unsigned bch2_sort_extents(struct bkey_packed *dst,
411                            struct sort_iter *iter,
412                            bool filter_whiteouts)
413 {
414         struct bkey_packed *in, *out = dst;
415
416         sort_iter_sort(iter, sort_extents_cmp);
417
418         while ((in = sort_iter_next(iter, sort_extents_cmp))) {
419                 if (bkey_deleted(in))
420                         continue;
421
422                 if (bkey_whiteout(in) &&
423                     (filter_whiteouts || !in->needs_whiteout))
424                         continue;
425
426                 bkey_copy(out, in);
427                 out = bkey_next(out);
428         }
429
430         return (u64 *) out - (u64 *) dst;
431 }
432
433 static inline int sort_extent_whiteouts_cmp(struct btree *b,
434                                             struct bkey_packed *l,
435                                             struct bkey_packed *r)
436 {
437         struct bkey ul = bkey_unpack_key(b, l);
438         struct bkey ur = bkey_unpack_key(b, r);
439
440         return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
441 }
442
443 unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
444                                     struct sort_iter *iter)
445 {
446         const struct bkey_format *f = &iter->b->format;
447         struct bkey_packed *in, *out = dst;
448         struct bkey_i l, r;
449         bool prev = false, l_packed = false;
450         u64 max_packed_size     = bkey_field_max(f, BKEY_FIELD_SIZE);
451         u64 max_packed_offset   = bkey_field_max(f, BKEY_FIELD_OFFSET);
452         u64 new_size;
453
454         max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
455
456         sort_iter_sort(iter, sort_extent_whiteouts_cmp);
457
458         while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
459                 if (bkey_deleted(in))
460                         continue;
461
462                 EBUG_ON(bkeyp_val_u64s(f, in));
463                 EBUG_ON(in->type != KEY_TYPE_discard);
464
465                 r.k = bkey_unpack_key(iter->b, in);
466
467                 if (prev &&
468                     bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
469                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
470                                 continue;
471
472                         new_size = l_packed
473                                 ? min(max_packed_size, max_packed_offset -
474                                       bkey_start_offset(&l.k))
475                                 : KEY_SIZE_MAX;
476
477                         new_size = min(new_size, r.k.p.offset -
478                                        bkey_start_offset(&l.k));
479
480                         BUG_ON(new_size < l.k.size);
481
482                         bch2_key_resize(&l.k, new_size);
483
484                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
485                                 continue;
486
487                         bch2_cut_front(l.k.p, &r);
488                 }
489
490                 if (prev) {
491                         if (!bch2_bkey_pack(out, &l, f)) {
492                                 BUG_ON(l_packed);
493                                 bkey_copy(out, &l);
494                         }
495                         out = bkey_next(out);
496                 }
497
498                 l = r;
499                 prev = true;
500                 l_packed = bkey_packed(in);
501         }
502
503         if (prev) {
504                 if (!bch2_bkey_pack(out, &l, f)) {
505                         BUG_ON(l_packed);
506                         bkey_copy(out, &l);
507                 }
508                 out = bkey_next(out);
509         }
510
511         return (u64 *) out - (u64 *) dst;
512 }