1 // SPDX-License-Identifier: GPL-2.0
3 #include "bkey_on_stack.h"
8 typedef int (*sort_cmp_fn)(struct btree *,
10 struct bkey_packed *);
12 static inline bool sort_iter_end(struct sort_iter *iter)
17 static inline void __sort_iter_sift(struct sort_iter *iter,
25 cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
27 swap(iter->data[i], iter->data[i + 1]);
30 static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
33 __sort_iter_sift(iter, 0, cmp);
36 static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
38 unsigned i = iter->used;
41 __sort_iter_sift(iter, i, cmp);
44 static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
46 return !sort_iter_end(iter) ? iter->data->k : NULL;
49 static inline void __sort_iter_advance(struct sort_iter *iter,
50 unsigned idx, sort_cmp_fn cmp)
52 struct sort_iter_set *i = iter->data + idx;
54 BUG_ON(idx >= iter->used);
56 i->k = bkey_next_skip_noops(i->k, i->end);
58 BUG_ON(i->k > i->end);
61 array_remove_item(iter->data, iter->used, idx);
63 __sort_iter_sift(iter, idx, cmp);
66 static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
68 __sort_iter_advance(iter, 0, cmp);
71 static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
74 struct bkey_packed *ret = sort_iter_peek(iter);
77 sort_iter_advance(iter, cmp);
83 * If keys compare equal, compare by pointer order:
85 static inline int key_sort_fix_overlapping_cmp(struct btree *b,
86 struct bkey_packed *l,
87 struct bkey_packed *r)
89 return bkey_cmp_packed(b, l, r) ?:
90 cmp_int((unsigned long) l, (unsigned long) r);
93 static inline bool should_drop_next_key(struct sort_iter *iter)
96 * key_sort_cmp() ensures that when keys compare equal the older key
97 * comes first; so if l->k compares equal to r->k then l->k is older
98 * and should be dropped.
100 return iter->used >= 2 &&
101 !bkey_cmp_packed(iter->b,
107 bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
108 struct sort_iter *iter)
110 struct bkey_packed *out = dst->start;
111 struct bkey_packed *k;
112 struct btree_nr_keys nr;
114 memset(&nr, 0, sizeof(nr));
116 sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
118 while ((k = sort_iter_peek(iter))) {
119 if (!bkey_whiteout(k) &&
120 !should_drop_next_key(iter)) {
122 btree_keys_account_key_add(&nr, 0, out);
123 out = bkey_next(out);
126 sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
129 dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
133 static void extent_sort_advance_prev(struct bkey_format *f,
134 struct btree_nr_keys *nr,
135 struct bkey_packed *start,
136 struct bkey_packed **prev)
139 bch2_bkey_pack(*prev, (void *) *prev, f);
141 btree_keys_account_key_add(nr, 0, *prev);
142 *prev = bkey_next(*prev);
148 static void extent_sort_append(struct bch_fs *c,
149 struct bkey_format *f,
150 struct btree_nr_keys *nr,
151 struct bkey_packed *start,
152 struct bkey_packed **prev,
155 if (bkey_whiteout(k.k))
159 * prev is always unpacked, for key merging - until right before we
164 bch2_bkey_merge(c, bkey_i_to_s((void *) *prev), k) ==
168 extent_sort_advance_prev(f, nr, start, prev);
170 bkey_reassemble((void *) *prev, k.s_c);
173 /* Sort + repack in a new format: */
175 bch2_sort_repack(struct bset *dst, struct btree *src,
176 struct btree_node_iter *src_iter,
177 struct bkey_format *out_f,
178 bool filter_whiteouts)
180 struct bkey_format *in_f = &src->format;
181 struct bkey_packed *in, *out = vstruct_last(dst);
182 struct btree_nr_keys nr;
184 memset(&nr, 0, sizeof(nr));
186 while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
187 if (filter_whiteouts && bkey_whiteout(in))
190 if (bch2_bkey_transform(out_f, out, bkey_packed(in)
191 ? in_f : &bch2_bkey_format_current, in))
192 out->format = KEY_FORMAT_LOCAL_BTREE;
194 bch2_bkey_unpack(src, (void *) out, in);
196 btree_keys_account_key_add(&nr, 0, out);
197 out = bkey_next(out);
200 dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
204 /* Sort, repack, and merge: */
206 bch2_sort_repack_merge(struct bch_fs *c,
207 struct bset *dst, struct btree *src,
208 struct btree_node_iter *iter,
209 struct bkey_format *out_f,
210 bool filter_whiteouts)
212 struct bkey_packed *prev = NULL, *k_packed;
213 struct bkey_on_stack k;
214 struct btree_nr_keys nr;
216 memset(&nr, 0, sizeof(nr));
217 bkey_on_stack_init(&k);
219 while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
220 if (filter_whiteouts && bkey_whiteout(k_packed))
225 * bch2_bkey_normalize may modify the key we pass it (dropping
226 * stale pointers) and we don't have a write lock on the src
227 * node; we have to make a copy of the entire key before calling
230 bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
231 bch2_bkey_unpack(src, k.k, k_packed);
233 if (filter_whiteouts &&
234 bch2_bkey_normalize(c, bkey_i_to_s(k.k)))
237 extent_sort_append(c, out_f, &nr, vstruct_last(dst),
238 &prev, bkey_i_to_s(k.k));
241 extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
243 dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
244 bkey_on_stack_exit(&k, c);
248 static inline int sort_keys_cmp(struct btree *b,
249 struct bkey_packed *l,
250 struct bkey_packed *r)
252 return bkey_cmp_packed(b, l, r) ?:
253 (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
254 (int) l->needs_whiteout - (int) r->needs_whiteout;
257 unsigned bch2_sort_keys(struct bkey_packed *dst,
258 struct sort_iter *iter,
259 bool filter_whiteouts)
261 const struct bkey_format *f = &iter->b->format;
262 struct bkey_packed *in, *next, *out = dst;
264 sort_iter_sort(iter, sort_keys_cmp);
266 while ((in = sort_iter_next(iter, sort_keys_cmp))) {
267 bool needs_whiteout = false;
269 if (bkey_whiteout(in) &&
270 (filter_whiteouts || !in->needs_whiteout))
273 while ((next = sort_iter_peek(iter)) &&
274 !bkey_cmp_packed(iter->b, in, next)) {
275 BUG_ON(in->needs_whiteout &&
276 next->needs_whiteout);
277 needs_whiteout |= in->needs_whiteout;
278 in = sort_iter_next(iter, sort_keys_cmp);
281 if (bkey_whiteout(in)) {
282 memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
283 set_bkeyp_val_u64s(f, out, 0);
287 out->needs_whiteout |= needs_whiteout;
288 out = bkey_next(out);
291 return (u64 *) out - (u64 *) dst;
294 /* Compat code for btree_node_old_extent_overwrite: */
297 * If keys compare equal, compare by pointer order:
299 * Necessary for sort_fix_overlapping() - if there are multiple keys that
300 * compare equal in different sets, we have to process them newest to oldest.
302 static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
303 struct bkey_packed *l,
304 struct bkey_packed *r)
306 struct bkey ul = bkey_unpack_key(b, l);
307 struct bkey ur = bkey_unpack_key(b, r);
309 return bkey_cmp(bkey_start_pos(&ul),
310 bkey_start_pos(&ur)) ?:
311 cmp_int((unsigned long) r, (unsigned long) l);
315 * The algorithm in extent_sort_fix_overlapping() relies on keys in the same
316 * bset being ordered by start offset - but 0 size whiteouts (which are always
317 * KEY_TYPE_deleted) break this ordering, so we need to skip over them:
319 static void extent_iter_advance(struct sort_iter *iter, unsigned idx)
321 struct sort_iter_set *i = iter->data + idx;
324 i->k = bkey_next_skip_noops(i->k, i->end);
325 } while (i->k != i->end && bkey_deleted(i->k));
328 array_remove_item(iter->data, iter->used, idx);
330 __sort_iter_sift(iter, idx, extent_sort_fix_overlapping_cmp);
334 bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
335 struct sort_iter *iter)
337 struct btree *b = iter->b;
338 struct bkey_format *f = &b->format;
339 struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
340 struct bkey_packed *prev = NULL;
341 struct bkey l_unpacked, r_unpacked;
343 struct btree_nr_keys nr;
344 struct bkey_on_stack split;
347 memset(&nr, 0, sizeof(nr));
348 bkey_on_stack_init(&split);
350 sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
351 for (i = 0; i < iter->used;) {
352 if (bkey_deleted(iter->data[i].k))
353 __sort_iter_advance(iter, i,
354 extent_sort_fix_overlapping_cmp);
359 while (!sort_iter_end(iter)) {
360 l = __bkey_disassemble(b, _l->k, &l_unpacked);
362 if (iter->used == 1) {
363 extent_sort_append(c, f, &nr, dst->start, &prev, l);
364 extent_iter_advance(iter, 0);
368 r = __bkey_disassemble(b, _r->k, &r_unpacked);
370 /* If current key and next key don't overlap, just append */
371 if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
372 extent_sort_append(c, f, &nr, dst->start, &prev, l);
373 extent_iter_advance(iter, 0);
377 /* Skip 0 size keys */
379 extent_iter_advance(iter, 1);
384 * overlap: keep the newer key and trim the older key so they
385 * don't overlap. comparing pointers tells us which one is
386 * newer, since the bsets are appended one after the other.
389 /* can't happen because of comparison func */
390 BUG_ON(_l->k < _r->k &&
391 !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
395 if (bkey_cmp(l.k->p, r.k->p) >= 0) {
396 extent_iter_advance(iter, 1);
398 bch2_cut_front_s(l.k->p, r);
399 extent_save(b, _r->k, r.k);
400 __sort_iter_sift(iter, 1,
401 extent_sort_fix_overlapping_cmp);
403 } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
406 * r wins, but it overlaps in the middle of l - split l:
408 bkey_on_stack_reassemble(&split, c, l.s_c);
409 bch2_cut_back(bkey_start_pos(r.k), split.k);
411 bch2_cut_front_s(r.k->p, l);
412 extent_save(b, _l->k, l.k);
414 __sort_iter_sift(iter, 0,
415 extent_sort_fix_overlapping_cmp);
417 extent_sort_append(c, f, &nr, dst->start,
418 &prev, bkey_i_to_s(split.k));
420 bch2_cut_back_s(bkey_start_pos(r.k), l);
421 extent_save(b, _l->k, l.k);
425 extent_sort_advance_prev(f, &nr, dst->start, &prev);
427 dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
429 bkey_on_stack_exit(&split, c);
433 static inline int sort_extents_cmp(struct btree *b,
434 struct bkey_packed *l,
435 struct bkey_packed *r)
437 return bkey_cmp_packed(b, l, r) ?:
438 (int) bkey_deleted(l) - (int) bkey_deleted(r);
441 unsigned bch2_sort_extents(struct bkey_packed *dst,
442 struct sort_iter *iter,
443 bool filter_whiteouts)
445 struct bkey_packed *in, *out = dst;
447 sort_iter_sort(iter, sort_extents_cmp);
449 while ((in = sort_iter_next(iter, sort_extents_cmp))) {
450 if (bkey_deleted(in))
453 if (bkey_whiteout(in) &&
454 (filter_whiteouts || !in->needs_whiteout))
458 out = bkey_next(out);
461 return (u64 *) out - (u64 *) dst;
464 static inline int sort_extent_whiteouts_cmp(struct btree *b,
465 struct bkey_packed *l,
466 struct bkey_packed *r)
468 struct bkey ul = bkey_unpack_key(b, l);
469 struct bkey ur = bkey_unpack_key(b, r);
471 return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
474 unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
475 struct sort_iter *iter)
477 const struct bkey_format *f = &iter->b->format;
478 struct bkey_packed *in, *out = dst;
480 bool prev = false, l_packed = false;
481 u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE);
482 u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET);
485 max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
487 sort_iter_sort(iter, sort_extent_whiteouts_cmp);
489 while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
490 if (bkey_deleted(in))
493 EBUG_ON(bkeyp_val_u64s(f, in));
494 EBUG_ON(in->type != KEY_TYPE_discard);
496 r.k = bkey_unpack_key(iter->b, in);
499 bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
500 if (bkey_cmp(l.k.p, r.k.p) >= 0)
504 ? min(max_packed_size, max_packed_offset -
505 bkey_start_offset(&l.k))
508 new_size = min(new_size, r.k.p.offset -
509 bkey_start_offset(&l.k));
511 BUG_ON(new_size < l.k.size);
513 bch2_key_resize(&l.k, new_size);
515 if (bkey_cmp(l.k.p, r.k.p) >= 0)
518 bch2_cut_front(l.k.p, &r);
522 if (!bch2_bkey_pack(out, &l, f)) {
526 out = bkey_next(out);
531 l_packed = bkey_packed(in);
535 if (!bch2_bkey_pack(out, &l, f)) {
539 out = bkey_next(out);
542 return (u64 *) out - (u64 *) dst;