]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/bkey_sort.c
839e78d1dc35fb3e71fdaff3407a9a50d58cd50d
[bcachefs-tools-debian] / libbcachefs / bkey_sort.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bkey_on_stack.h"
4 #include "bkey_sort.h"
5 #include "bset.h"
6 #include "extents.h"
7
8 typedef int (*sort_cmp_fn)(struct btree *,
9                            struct bkey_packed *,
10                            struct bkey_packed *);
11
12 static inline bool sort_iter_end(struct sort_iter *iter)
13 {
14         return !iter->used;
15 }
16
17 static inline void __sort_iter_sift(struct sort_iter *iter,
18                                     unsigned from,
19                                     sort_cmp_fn cmp)
20 {
21         unsigned i;
22
23         for (i = from;
24              i + 1 < iter->used &&
25              cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
26              i++)
27                 swap(iter->data[i], iter->data[i + 1]);
28 }
29
30 static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
31 {
32
33         __sort_iter_sift(iter, 0, cmp);
34 }
35
36 static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
37 {
38         unsigned i = iter->used;
39
40         while (i--)
41                 __sort_iter_sift(iter, i, cmp);
42 }
43
44 static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
45 {
46         return !sort_iter_end(iter) ? iter->data->k : NULL;
47 }
48
49 static inline void __sort_iter_advance(struct sort_iter *iter,
50                                        unsigned idx, sort_cmp_fn cmp)
51 {
52         struct sort_iter_set *i = iter->data + idx;
53
54         BUG_ON(idx >= iter->used);
55
56         i->k = bkey_next_skip_noops(i->k, i->end);
57
58         BUG_ON(i->k > i->end);
59
60         if (i->k == i->end)
61                 array_remove_item(iter->data, iter->used, idx);
62         else
63                 __sort_iter_sift(iter, idx, cmp);
64 }
65
66 static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
67 {
68         __sort_iter_advance(iter, 0, cmp);
69 }
70
71 static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
72                                                  sort_cmp_fn cmp)
73 {
74         struct bkey_packed *ret = sort_iter_peek(iter);
75
76         if (ret)
77                 sort_iter_advance(iter, cmp);
78
79         return ret;
80 }
81
82 /*
83  * If keys compare equal, compare by pointer order:
84  */
85 static inline int key_sort_fix_overlapping_cmp(struct btree *b,
86                                                struct bkey_packed *l,
87                                                struct bkey_packed *r)
88 {
89         return bkey_cmp_packed(b, l, r) ?:
90                 cmp_int((unsigned long) l, (unsigned long) r);
91 }
92
93 static inline bool should_drop_next_key(struct sort_iter *iter)
94 {
95         /*
96          * key_sort_cmp() ensures that when keys compare equal the older key
97          * comes first; so if l->k compares equal to r->k then l->k is older
98          * and should be dropped.
99          */
100         return iter->used >= 2 &&
101                 !bkey_cmp_packed(iter->b,
102                                  iter->data[0].k,
103                                  iter->data[1].k);
104 }
105
106 struct btree_nr_keys
107 bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
108                               struct sort_iter *iter)
109 {
110         struct bkey_packed *out = dst->start;
111         struct bkey_packed *k;
112         struct btree_nr_keys nr;
113
114         memset(&nr, 0, sizeof(nr));
115
116         sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
117
118         while ((k = sort_iter_peek(iter))) {
119                 if (!bkey_whiteout(k) &&
120                     !should_drop_next_key(iter)) {
121                         bkey_copy(out, k);
122                         btree_keys_account_key_add(&nr, 0, out);
123                         out = bkey_next(out);
124                 }
125
126                 sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
127         }
128
129         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
130         return nr;
131 }
132
133 static void extent_sort_append(struct bch_fs *c,
134                                struct bkey_format *f,
135                                struct btree_nr_keys *nr,
136                                struct bkey_packed **out,
137                                struct bkey_s k)
138 {
139         if (!bkey_whiteout(k.k)) {
140                 if (!bch2_bkey_pack_key(*out, k.k, f))
141                         memcpy_u64s_small(*out, k.k, BKEY_U64s);
142
143                 memcpy_u64s_small(bkeyp_val(f, *out), k.v, bkey_val_u64s(k.k));
144
145                 btree_keys_account_key_add(nr, 0, *out);
146                 *out = bkey_next(*out);
147         }
148 }
149
150 /* Sort + repack in a new format: */
151 struct btree_nr_keys
152 bch2_sort_repack(struct bset *dst, struct btree *src,
153                  struct btree_node_iter *src_iter,
154                  struct bkey_format *out_f,
155                  bool filter_whiteouts)
156 {
157         struct bkey_format *in_f = &src->format;
158         struct bkey_packed *in, *out = vstruct_last(dst);
159         struct btree_nr_keys nr;
160
161         memset(&nr, 0, sizeof(nr));
162
163         while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
164                 if (filter_whiteouts && bkey_whiteout(in))
165                         continue;
166
167                 if (bch2_bkey_transform(out_f, out, bkey_packed(in)
168                                        ? in_f : &bch2_bkey_format_current, in))
169                         out->format = KEY_FORMAT_LOCAL_BTREE;
170                 else
171                         bch2_bkey_unpack(src, (void *) out, in);
172
173                 btree_keys_account_key_add(&nr, 0, out);
174                 out = bkey_next(out);
175         }
176
177         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
178         return nr;
179 }
180
181 /* Sort, repack, and call bch2_bkey_normalize() to drop stale pointers: */
182 struct btree_nr_keys
183 bch2_sort_repack_merge(struct bch_fs *c,
184                        struct bset *dst, struct btree *src,
185                        struct btree_node_iter *iter,
186                        struct bkey_format *out_f,
187                        bool filter_whiteouts)
188 {
189         struct bkey_packed *out = vstruct_last(dst), *k_packed;
190         struct bkey_on_stack k;
191         struct btree_nr_keys nr;
192
193         memset(&nr, 0, sizeof(nr));
194         bkey_on_stack_init(&k);
195
196         while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
197                 if (filter_whiteouts && bkey_whiteout(k_packed))
198                         continue;
199
200                 /*
201                  * NOTE:
202                  * bch2_bkey_normalize may modify the key we pass it (dropping
203                  * stale pointers) and we don't have a write lock on the src
204                  * node; we have to make a copy of the entire key before calling
205                  * normalize
206                  */
207                 bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
208                 bch2_bkey_unpack(src, k.k, k_packed);
209
210                 if (filter_whiteouts &&
211                     bch2_bkey_normalize(c, bkey_i_to_s(k.k)))
212                         continue;
213
214                 extent_sort_append(c, out_f, &nr, &out, bkey_i_to_s(k.k));
215         }
216
217         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
218         bkey_on_stack_exit(&k, c);
219         return nr;
220 }
221
222 static inline int sort_keys_cmp(struct btree *b,
223                                 struct bkey_packed *l,
224                                 struct bkey_packed *r)
225 {
226         return bkey_cmp_packed(b, l, r) ?:
227                 (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
228                 (int) l->needs_whiteout - (int) r->needs_whiteout;
229 }
230
231 unsigned bch2_sort_keys(struct bkey_packed *dst,
232                         struct sort_iter *iter,
233                         bool filter_whiteouts)
234 {
235         const struct bkey_format *f = &iter->b->format;
236         struct bkey_packed *in, *next, *out = dst;
237
238         sort_iter_sort(iter, sort_keys_cmp);
239
240         while ((in = sort_iter_next(iter, sort_keys_cmp))) {
241                 bool needs_whiteout = false;
242
243                 if (bkey_whiteout(in) &&
244                     (filter_whiteouts || !in->needs_whiteout))
245                         continue;
246
247                 while ((next = sort_iter_peek(iter)) &&
248                        !bkey_cmp_packed(iter->b, in, next)) {
249                         BUG_ON(in->needs_whiteout &&
250                                next->needs_whiteout);
251                         needs_whiteout |= in->needs_whiteout;
252                         in = sort_iter_next(iter, sort_keys_cmp);
253                 }
254
255                 if (bkey_whiteout(in)) {
256                         memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
257                         set_bkeyp_val_u64s(f, out, 0);
258                 } else {
259                         bkey_copy(out, in);
260                 }
261                 out->needs_whiteout |= needs_whiteout;
262                 out = bkey_next(out);
263         }
264
265         return (u64 *) out - (u64 *) dst;
266 }
267
268 /* Compat code for btree_node_old_extent_overwrite: */
269
270 /*
271  * If keys compare equal, compare by pointer order:
272  *
273  * Necessary for sort_fix_overlapping() - if there are multiple keys that
274  * compare equal in different sets, we have to process them newest to oldest.
275  */
276 static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
277                                                   struct bkey_packed *l,
278                                                   struct bkey_packed *r)
279 {
280         struct bkey ul = bkey_unpack_key(b, l);
281         struct bkey ur = bkey_unpack_key(b, r);
282
283         return bkey_cmp(bkey_start_pos(&ul),
284                         bkey_start_pos(&ur)) ?:
285                 cmp_int((unsigned long) r, (unsigned long) l);
286 }
287
288 /*
289  * The algorithm in extent_sort_fix_overlapping() relies on keys in the same
290  * bset being ordered by start offset - but 0 size whiteouts (which are always
291  * KEY_TYPE_deleted) break this ordering, so we need to skip over them:
292  */
293 static void extent_iter_advance(struct sort_iter *iter, unsigned idx)
294 {
295         struct sort_iter_set *i = iter->data + idx;
296
297         do {
298                 i->k = bkey_next_skip_noops(i->k, i->end);
299         } while (i->k != i->end && bkey_deleted(i->k));
300
301         if (i->k == i->end)
302                 array_remove_item(iter->data, iter->used, idx);
303         else
304                 __sort_iter_sift(iter, idx, extent_sort_fix_overlapping_cmp);
305 }
306
307 struct btree_nr_keys
308 bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
309                                  struct sort_iter *iter)
310 {
311         struct btree *b = iter->b;
312         struct bkey_format *f = &b->format;
313         struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
314         struct bkey_packed *out = dst->start;
315         struct bkey l_unpacked, r_unpacked;
316         struct bkey_s l, r;
317         struct btree_nr_keys nr;
318         struct bkey_on_stack split;
319         unsigned i;
320
321         memset(&nr, 0, sizeof(nr));
322         bkey_on_stack_init(&split);
323
324         sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
325         for (i = 0; i < iter->used;) {
326                 if (bkey_deleted(iter->data[i].k))
327                         __sort_iter_advance(iter, i,
328                                             extent_sort_fix_overlapping_cmp);
329                 else
330                         i++;
331         }
332
333         while (!sort_iter_end(iter)) {
334                 l = __bkey_disassemble(b, _l->k, &l_unpacked);
335
336                 if (iter->used == 1) {
337                         extent_sort_append(c, f, &nr, &out, l);
338                         extent_iter_advance(iter, 0);
339                         continue;
340                 }
341
342                 r = __bkey_disassemble(b, _r->k, &r_unpacked);
343
344                 /* If current key and next key don't overlap, just append */
345                 if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
346                         extent_sort_append(c, f, &nr, &out, l);
347                         extent_iter_advance(iter, 0);
348                         continue;
349                 }
350
351                 /* Skip 0 size keys */
352                 if (!r.k->size) {
353                         extent_iter_advance(iter, 1);
354                         continue;
355                 }
356
357                 /*
358                  * overlap: keep the newer key and trim the older key so they
359                  * don't overlap. comparing pointers tells us which one is
360                  * newer, since the bsets are appended one after the other.
361                  */
362
363                 /* can't happen because of comparison func */
364                 BUG_ON(_l->k < _r->k &&
365                        !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
366
367                 if (_l->k > _r->k) {
368                         /* l wins, trim r */
369                         if (bkey_cmp(l.k->p, r.k->p) >= 0) {
370                                 extent_iter_advance(iter, 1);
371                         } else {
372                                 bch2_cut_front_s(l.k->p, r);
373                                 extent_save(b, _r->k, r.k);
374                                 __sort_iter_sift(iter, 1,
375                                          extent_sort_fix_overlapping_cmp);
376                         }
377                 } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
378
379                         /*
380                          * r wins, but it overlaps in the middle of l - split l:
381                          */
382                         bkey_on_stack_reassemble(&split, c, l.s_c);
383                         bch2_cut_back(bkey_start_pos(r.k), split.k);
384
385                         bch2_cut_front_s(r.k->p, l);
386                         extent_save(b, _l->k, l.k);
387
388                         __sort_iter_sift(iter, 0,
389                                          extent_sort_fix_overlapping_cmp);
390
391                         extent_sort_append(c, f, &nr, &out,
392                                            bkey_i_to_s(split.k));
393                 } else {
394                         bch2_cut_back_s(bkey_start_pos(r.k), l);
395                         extent_save(b, _l->k, l.k);
396                 }
397         }
398
399         dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
400
401         bkey_on_stack_exit(&split, c);
402         return nr;
403 }
404
405 static inline int sort_extents_cmp(struct btree *b,
406                                    struct bkey_packed *l,
407                                    struct bkey_packed *r)
408 {
409         return bkey_cmp_packed(b, l, r) ?:
410                 (int) bkey_deleted(l) - (int) bkey_deleted(r);
411 }
412
413 unsigned bch2_sort_extents(struct bkey_packed *dst,
414                            struct sort_iter *iter,
415                            bool filter_whiteouts)
416 {
417         struct bkey_packed *in, *out = dst;
418
419         sort_iter_sort(iter, sort_extents_cmp);
420
421         while ((in = sort_iter_next(iter, sort_extents_cmp))) {
422                 if (bkey_deleted(in))
423                         continue;
424
425                 if (bkey_whiteout(in) &&
426                     (filter_whiteouts || !in->needs_whiteout))
427                         continue;
428
429                 bkey_copy(out, in);
430                 out = bkey_next(out);
431         }
432
433         return (u64 *) out - (u64 *) dst;
434 }
435
436 static inline int sort_extent_whiteouts_cmp(struct btree *b,
437                                             struct bkey_packed *l,
438                                             struct bkey_packed *r)
439 {
440         struct bkey ul = bkey_unpack_key(b, l);
441         struct bkey ur = bkey_unpack_key(b, r);
442
443         return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
444 }
445
446 unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
447                                     struct sort_iter *iter)
448 {
449         const struct bkey_format *f = &iter->b->format;
450         struct bkey_packed *in, *out = dst;
451         struct bkey_i l, r;
452         bool prev = false, l_packed = false;
453         u64 max_packed_size     = bkey_field_max(f, BKEY_FIELD_SIZE);
454         u64 max_packed_offset   = bkey_field_max(f, BKEY_FIELD_OFFSET);
455         u64 new_size;
456
457         max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
458
459         sort_iter_sort(iter, sort_extent_whiteouts_cmp);
460
461         while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
462                 if (bkey_deleted(in))
463                         continue;
464
465                 EBUG_ON(bkeyp_val_u64s(f, in));
466                 EBUG_ON(in->type != KEY_TYPE_discard);
467
468                 r.k = bkey_unpack_key(iter->b, in);
469
470                 if (prev &&
471                     bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
472                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
473                                 continue;
474
475                         new_size = l_packed
476                                 ? min(max_packed_size, max_packed_offset -
477                                       bkey_start_offset(&l.k))
478                                 : KEY_SIZE_MAX;
479
480                         new_size = min(new_size, r.k.p.offset -
481                                        bkey_start_offset(&l.k));
482
483                         BUG_ON(new_size < l.k.size);
484
485                         bch2_key_resize(&l.k, new_size);
486
487                         if (bkey_cmp(l.k.p, r.k.p) >= 0)
488                                 continue;
489
490                         bch2_cut_front(l.k.p, &r);
491                 }
492
493                 if (prev) {
494                         if (!bch2_bkey_pack(out, &l, f)) {
495                                 BUG_ON(l_packed);
496                                 bkey_copy(out, &l);
497                         }
498                         out = bkey_next(out);
499                 }
500
501                 l = r;
502                 prev = true;
503                 l_packed = bkey_packed(in);
504         }
505
506         if (prev) {
507                 if (!bch2_bkey_pack(out, &l, f)) {
508                         BUG_ON(l_packed);
509                         bkey_copy(out, &l);
510                 }
511                 out = bkey_next(out);
512         }
513
514         return (u64 *) out - (u64 *) dst;
515 }