]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/extent_update.c
Merge commit '780de81b36'
[bcachefs-tools-debian] / libbcachefs / extent_update.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bkey_on_stack.h"
4 #include "btree_update.h"
5 #include "btree_update_interior.h"
6 #include "buckets.h"
7 #include "debug.h"
8 #include "extents.h"
9 #include "extent_update.h"
10
11 /*
12  * This counts the number of iterators to the alloc & ec btrees we'll need
13  * inserting/removing this extent:
14  */
15 static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
16 {
17         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
18         const union bch_extent_entry *entry;
19         unsigned ret = 0;
20
21         bkey_extent_entry_for_each(ptrs, entry) {
22                 switch (__extent_entry_type(entry)) {
23                 case BCH_EXTENT_ENTRY_ptr:
24                 case BCH_EXTENT_ENTRY_stripe_ptr:
25                         ret++;
26                 }
27         }
28
29         return ret;
30 }
31
32 static int count_iters_for_insert(struct btree_trans *trans,
33                                   struct bkey_s_c k,
34                                   unsigned offset,
35                                   struct bpos *end,
36                                   unsigned *nr_iters,
37                                   unsigned max_iters,
38                                   bool overwrite)
39 {
40         int ret = 0;
41
42         switch (k.k->type) {
43         case KEY_TYPE_extent:
44         case KEY_TYPE_reflink_v:
45                 *nr_iters += bch2_bkey_nr_alloc_ptrs(k);
46
47                 if (*nr_iters >= max_iters) {
48                         *end = bpos_min(*end, k.k->p);
49                         ret = 1;
50                 }
51
52                 break;
53         case KEY_TYPE_reflink_p: {
54                 struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
55                 u64 idx = le64_to_cpu(p.v->idx);
56                 unsigned sectors = bpos_min(*end, p.k->p).offset -
57                         bkey_start_offset(p.k);
58                 struct btree_iter *iter;
59                 struct bkey_s_c r_k;
60
61                 for_each_btree_key(trans, iter,
62                                    BTREE_ID_REFLINK, POS(0, idx + offset),
63                                    BTREE_ITER_SLOTS, r_k, ret) {
64                         if (bkey_cmp(bkey_start_pos(r_k.k),
65                                      POS(0, idx + sectors)) >= 0)
66                                 break;
67
68                         *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
69
70                         if (*nr_iters >= max_iters) {
71                                 struct bpos pos = bkey_start_pos(k.k);
72                                 pos.offset += r_k.k->p.offset - idx;
73
74                                 *end = bpos_min(*end, pos);
75                                 ret = 1;
76                                 break;
77                         }
78                 }
79
80                 bch2_trans_iter_put(trans, iter);
81                 break;
82         }
83         }
84
85         return ret;
86 }
87
88 #define EXTENT_ITERS_MAX        (BTREE_ITER_MAX / 3)
89
90 int bch2_extent_atomic_end(struct btree_iter *iter,
91                            struct bkey_i *insert,
92                            struct bpos *end)
93 {
94         struct btree_trans *trans = iter->trans;
95         struct btree *b;
96         struct btree_node_iter  node_iter;
97         struct bkey_packed      *_k;
98         unsigned                nr_iters = 0;
99         int ret;
100
101         ret = bch2_btree_iter_traverse(iter);
102         if (ret)
103                 return ret;
104
105         b = iter->l[0].b;
106         node_iter = iter->l[0].iter;
107
108         BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
109
110         *end = bpos_min(insert->k.p, b->key.k.p);
111
112         ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
113                                      &nr_iters, EXTENT_ITERS_MAX / 2, false);
114         if (ret < 0)
115                 return ret;
116
117         while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
118                                                       KEY_TYPE_discard))) {
119                 struct bkey     unpacked;
120                 struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
121                 unsigned offset = 0;
122
123                 if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
124                         break;
125
126                 if (bkey_cmp(bkey_start_pos(&insert->k),
127                              bkey_start_pos(k.k)) > 0)
128                         offset = bkey_start_offset(&insert->k) -
129                                 bkey_start_offset(k.k);
130
131                 ret = count_iters_for_insert(trans, k, offset, end,
132                                         &nr_iters, EXTENT_ITERS_MAX, true);
133                 if (ret)
134                         break;
135
136                 bch2_btree_node_iter_advance(&node_iter, b);
137         }
138
139         return ret < 0 ? ret : 0;
140 }
141
142 int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
143 {
144         struct bpos end;
145         int ret;
146
147         ret = bch2_extent_atomic_end(iter, k, &end);
148         if (ret)
149                 return ret;
150
151         bch2_cut_back(end, k);
152         return 0;
153 }
154
155 int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
156 {
157         struct bpos end;
158         int ret;
159
160         ret = bch2_extent_atomic_end(iter, k, &end);
161         if (ret)
162                 return ret;
163
164         return !bkey_cmp(end, k->k.p);
165 }
166
167 enum btree_insert_ret
168 bch2_extent_can_insert(struct btree_trans *trans,
169                        struct btree_insert_entry *insert,
170                        unsigned *u64s)
171 {
172         struct btree_iter_level *l = &insert->iter->l[0];
173         struct btree_node_iter node_iter = l->iter;
174         enum bch_extent_overlap overlap;
175         struct bkey_packed *_k;
176         struct bkey unpacked;
177         struct bkey_s_c k;
178         int sectors;
179
180         /*
181          * We avoid creating whiteouts whenever possible when deleting, but
182          * those optimizations mean we may potentially insert two whiteouts
183          * instead of one (when we overlap with the front of one extent and the
184          * back of another):
185          */
186         if (bkey_whiteout(&insert->k->k))
187                 *u64s += BKEY_U64s;
188
189         _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
190                                               KEY_TYPE_discard);
191         if (!_k)
192                 return BTREE_INSERT_OK;
193
194         k = bkey_disassemble(l->b, _k, &unpacked);
195
196         overlap = bch2_extent_overlap(&insert->k->k, k.k);
197
198         /* account for having to split existing extent: */
199         if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
200                 *u64s += _k->u64s;
201
202         if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
203             (sectors = bch2_bkey_sectors_compressed(k))) {
204                 int flags = trans->flags & BTREE_INSERT_NOFAIL
205                         ? BCH_DISK_RESERVATION_NOFAIL : 0;
206
207                 switch (bch2_disk_reservation_add(trans->c,
208                                 trans->disk_res,
209                                 sectors, flags)) {
210                 case 0:
211                         break;
212                 case -ENOSPC:
213                         return BTREE_INSERT_ENOSPC;
214                 default:
215                         BUG();
216                 }
217         }
218
219         return BTREE_INSERT_OK;
220 }
221
222 static void verify_extent_nonoverlapping(struct bch_fs *c,
223                                          struct btree *b,
224                                          struct btree_node_iter *_iter,
225                                          struct bkey_i *insert)
226 {
227 #ifdef CONFIG_BCACHEFS_DEBUG
228         struct btree_node_iter iter;
229         struct bkey_packed *k;
230         struct bkey uk;
231
232         if (!expensive_debug_checks(c))
233                 return;
234
235         iter = *_iter;
236         k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard);
237         BUG_ON(k &&
238                (uk = bkey_unpack_key(b, k),
239                 bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));
240
241         iter = *_iter;
242         k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard);
243 #if 0
244         BUG_ON(k &&
245                (uk = bkey_unpack_key(b, k),
246                 bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0);
247 #else
248         if (k &&
249             (uk = bkey_unpack_key(b, k),
250              bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) {
251                 char buf1[100];
252                 char buf2[100];
253
254                 bch2_bkey_to_text(&PBUF(buf1), &insert->k);
255                 bch2_bkey_to_text(&PBUF(buf2), &uk);
256
257                 bch2_dump_btree_node(b);
258                 panic("insert > next :\n"
259                       "insert %s\n"
260                       "next   %s\n",
261                       buf1, buf2);
262         }
263 #endif
264
265 #endif
266 }
267
268 static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
269                                struct bkey_i *insert)
270 {
271         struct btree_iter_level *l = &iter->l[0];
272         struct bkey_packed *k =
273                 bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b));
274
275         BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b));
276
277         EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
278         verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
279
280         if (debug_check_bkeys(c))
281                 bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert));
282
283         bch2_bset_insert(l->b, &l->iter, k, insert, 0);
284         bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
285 }
286
287 static void
288 extent_squash(struct bch_fs *c, struct btree_iter *iter,
289               struct bkey_i *insert,
290               struct bkey_packed *_k, struct bkey_s k,
291               enum bch_extent_overlap overlap)
292 {
293         struct btree_iter_level *l = &iter->l[0];
294         int u64s_delta;
295
296         switch (overlap) {
297         case BCH_EXTENT_OVERLAP_FRONT:
298                 /* insert overlaps with start of k: */
299                 u64s_delta = bch2_cut_front_s(insert->k.p, k);
300                 btree_keys_account_val_delta(l->b, _k, u64s_delta);
301
302                 EBUG_ON(bkey_deleted(k.k));
303                 extent_save(l->b, _k, k.k);
304                 bch2_btree_iter_fix_key_modified(iter, l->b, _k);
305                 break;
306
307         case BCH_EXTENT_OVERLAP_BACK:
308                 /* insert overlaps with end of k: */
309                 u64s_delta = bch2_cut_back_s(bkey_start_pos(&insert->k), k);
310                 btree_keys_account_val_delta(l->b, _k, u64s_delta);
311
312                 EBUG_ON(bkey_deleted(k.k));
313                 extent_save(l->b, _k, k.k);
314
315                 /*
316                  * As the auxiliary tree is indexed by the end of the
317                  * key and we've just changed the end, update the
318                  * auxiliary tree.
319                  */
320                 bch2_bset_fix_invalidated_key(l->b, _k);
321                 bch2_btree_node_iter_fix(iter, l->b, &l->iter,
322                                          _k, _k->u64s, _k->u64s);
323                 break;
324
325         case BCH_EXTENT_OVERLAP_ALL: {
326                 /* The insert key completely covers k, invalidate k */
327                 if (!bkey_whiteout(k.k))
328                         btree_account_key_drop(l->b, _k);
329
330                 k.k->size = 0;
331                 k.k->type = KEY_TYPE_deleted;
332
333                 if (_k >= btree_bset_last(l->b)->start) {
334                         unsigned u64s = _k->u64s;
335
336                         bch2_bset_delete(l->b, _k, _k->u64s);
337                         bch2_btree_node_iter_fix(iter, l->b, &l->iter,
338                                                  _k, u64s, 0);
339                 } else {
340                         extent_save(l->b, _k, k.k);
341                         bch2_btree_iter_fix_key_modified(iter, l->b, _k);
342                 }
343
344                 break;
345         }
346         case BCH_EXTENT_OVERLAP_MIDDLE: {
347                 struct bkey_on_stack split;
348
349                 bkey_on_stack_init(&split);
350                 bkey_on_stack_realloc(&split, c, k.k->u64s);
351
352                 /*
353                  * The insert key falls 'in the middle' of k
354                  * The insert key splits k in 3:
355                  * - start only in k, preserve
356                  * - middle common section, invalidate in k
357                  * - end only in k, preserve
358                  *
359                  * We update the old key to preserve the start,
360                  * insert will be the new common section,
361                  * we manually insert the end that we are preserving.
362                  *
363                  * modify k _before_ doing the insert (which will move
364                  * what k points to)
365                  */
366                 bkey_reassemble(split.k, k.s_c);
367                 split.k->k.needs_whiteout |= bkey_written(l->b, _k);
368
369                 bch2_cut_back(bkey_start_pos(&insert->k), split.k);
370                 BUG_ON(bkey_deleted(&split.k->k));
371
372                 u64s_delta = bch2_cut_front_s(insert->k.p, k);
373                 btree_keys_account_val_delta(l->b, _k, u64s_delta);
374
375                 BUG_ON(bkey_deleted(k.k));
376                 extent_save(l->b, _k, k.k);
377                 bch2_btree_iter_fix_key_modified(iter, l->b, _k);
378
379                 extent_bset_insert(c, iter, split.k);
380                 bkey_on_stack_exit(&split, c);
381                 break;
382         }
383         }
384 }
385
386 /**
387  * bch_extent_insert_fixup - insert a new extent and deal with overlaps
388  *
389  * this may result in not actually doing the insert, or inserting some subset
390  * of the insert key. For cmpxchg operations this is where that logic lives.
391  *
392  * All subsets of @insert that need to be inserted are inserted using
393  * bch2_btree_insert_and_journal(). If @b or @res fills up, this function
394  * returns false, setting @iter->pos for the prefix of @insert that actually got
395  * inserted.
396  *
397  * BSET INVARIANTS: this function is responsible for maintaining all the
398  * invariants for bsets of extents in memory. things get really hairy with 0
399  * size extents
400  *
401  * within one bset:
402  *
403  * bkey_start_pos(bkey_next(k)) >= k
404  * or bkey_start_offset(bkey_next(k)) >= k->offset
405  *
406  * i.e. strict ordering, no overlapping extents.
407  *
408  * multiple bsets (i.e. full btree node):
409  *
410  * ∀ k, j
411  *   k.size != 0 ∧ j.size != 0 →
412  *     ¬ (k > bkey_start_pos(j) ∧ k < j)
413  *
414  * i.e. no two overlapping keys _of nonzero size_
415  *
416  * We can't realistically maintain this invariant for zero size keys because of
417  * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j
418  * there may be another 0 size key between them in another bset, and it will
419  * thus overlap with the merged key.
420  *
421  * In addition, the end of iter->pos indicates how much has been processed.
422  * If the end of iter->pos is not the same as the end of insert, then
423  * key insertion needs to continue/be retried.
424  */
425 void bch2_insert_fixup_extent(struct btree_trans *trans,
426                               struct btree_insert_entry *insert_entry)
427 {
428         struct bch_fs *c = trans->c;
429         struct btree_iter *iter = insert_entry->iter;
430         struct bkey_i *insert   = insert_entry->k;
431         struct btree_iter_level *l = &iter->l[0];
432         struct btree_node_iter node_iter = l->iter;
433         bool deleting           = bkey_whiteout(&insert->k);
434         bool update_journal     = !deleting;
435         bool update_btree       = !deleting;
436         struct bkey_i whiteout  = *insert;
437         struct bkey_packed *_k;
438         struct bkey unpacked;
439
440         EBUG_ON(iter->level);
441         EBUG_ON(!insert->k.size);
442         EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
443
444         while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
445                                                       KEY_TYPE_discard))) {
446                 struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
447                 struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
448                 enum bch_extent_overlap overlap =
449                         bch2_extent_overlap(&insert->k, k.k);
450
451                 if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
452                         break;
453
454                 if (!bkey_whiteout(k.k))
455                         update_journal = true;
456
457                 if (!update_journal) {
458                         bch2_cut_front(cur_end, insert);
459                         bch2_cut_front(cur_end, &whiteout);
460                         bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
461                         goto next;
462                 }
463
464                 /*
465                  * When deleting, if possible just do it by switching the type
466                  * of the key we're deleting, instead of creating and inserting
467                  * a new whiteout:
468                  */
469                 if (deleting &&
470                     !update_btree &&
471                     !bkey_cmp(insert->k.p, k.k->p) &&
472                     !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
473                         if (!bkey_whiteout(k.k)) {
474                                 btree_account_key_drop(l->b, _k);
475                                 _k->type = KEY_TYPE_discard;
476                                 reserve_whiteout(l->b, _k);
477                                 bch2_btree_iter_fix_key_modified(iter,
478                                                                  l->b, _k);
479                         }
480                         break;
481                 }
482
483                 if (k.k->needs_whiteout || bkey_written(l->b, _k)) {
484                         insert->k.needs_whiteout = true;
485                         update_btree = true;
486                 }
487
488                 if (update_btree &&
489                     overlap == BCH_EXTENT_OVERLAP_ALL &&
490                     bkey_whiteout(k.k) &&
491                     k.k->needs_whiteout) {
492                         unreserve_whiteout(l->b, _k);
493                         _k->needs_whiteout = false;
494                 }
495
496                 extent_squash(c, iter, insert, _k, k, overlap);
497
498                 if (!update_btree)
499                         bch2_cut_front(cur_end, insert);
500 next:
501                 node_iter = l->iter;
502
503                 if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
504                     overlap == BCH_EXTENT_OVERLAP_MIDDLE)
505                         break;
506         }
507
508         l->iter = node_iter;
509         bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p);
510
511         if (update_btree) {
512                 if (deleting)
513                         insert->k.type = KEY_TYPE_discard;
514
515                 EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
516
517                 extent_bset_insert(c, iter, insert);
518         }
519
520         if (update_journal) {
521                 struct bkey_i *k = !deleting ? insert : &whiteout;
522
523                 if (deleting)
524                         k->k.type = KEY_TYPE_discard;
525
526                 EBUG_ON(bkey_deleted(&k->k) || !k->k.size);
527
528                 bch2_btree_journal_key(trans, iter, k);
529         }
530
531         bch2_cut_front(insert->k.p, insert);
532 }