]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/btree_key_cache.c
Update bcachefs sources to 4837f82ee1 bcachefs: Use cached iterators for alloc btree
[bcachefs-tools-debian] / libbcachefs / btree_key_cache.c
1
2 #include "bcachefs.h"
3 #include "btree_iter.h"
4 #include "btree_key_cache.h"
5 #include "btree_locking.h"
6 #include "btree_update.h"
7 #include "error.h"
8 #include "journal.h"
9 #include "journal_reclaim.h"
10
11 #include <trace/events/bcachefs.h>
12
13 static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
14                                        const void *obj)
15 {
16         const struct bkey_cached *ck = obj;
17         const struct bkey_cached_key *key = arg->key;
18
19         return cmp_int(ck->key.btree_id, key->btree_id) ?:
20                 bkey_cmp(ck->key.pos, key->pos);
21 }
22
23 static const struct rhashtable_params bch2_btree_key_cache_params = {
24         .head_offset    = offsetof(struct bkey_cached, hash),
25         .key_offset     = offsetof(struct bkey_cached, key),
26         .key_len        = sizeof(struct bkey_cached_key),
27         .obj_cmpfn      = bch2_btree_key_cache_cmp_fn,
28 };
29
30 __flatten
31 static inline struct bkey_cached *
32 btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
33 {
34         struct bkey_cached_key key = {
35                 .btree_id       = btree_id,
36                 .pos            = pos,
37         };
38
39         return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
40                                       bch2_btree_key_cache_params);
41 }
42
43 static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
44 {
45         if (!six_trylock_intent(&ck->c.lock))
46                 return false;
47
48         if (!six_trylock_write(&ck->c.lock)) {
49                 six_unlock_intent(&ck->c.lock);
50                 return false;
51         }
52
53         if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
54                 six_unlock_write(&ck->c.lock);
55                 six_unlock_intent(&ck->c.lock);
56                 return false;
57         }
58
59         return true;
60 }
61
62 static void bkey_cached_evict(struct btree_key_cache *c,
63                               struct bkey_cached *ck)
64 {
65         BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
66                                       bch2_btree_key_cache_params));
67         memset(&ck->key, ~0, sizeof(ck->key));
68 }
69
70 static void bkey_cached_free(struct btree_key_cache *c,
71                              struct bkey_cached *ck)
72 {
73         list_move(&ck->list, &c->freed);
74
75         kfree(ck->k);
76         ck->k           = NULL;
77         ck->u64s        = 0;
78
79         six_unlock_write(&ck->c.lock);
80         six_unlock_intent(&ck->c.lock);
81 }
82
83 static struct bkey_cached *
84 bkey_cached_alloc(struct btree_key_cache *c)
85 {
86         struct bkey_cached *ck;
87
88         list_for_each_entry(ck, &c->freed, list)
89                 if (bkey_cached_lock_for_evict(ck))
90                         return ck;
91
92         list_for_each_entry(ck, &c->clean, list)
93                 if (bkey_cached_lock_for_evict(ck)) {
94                         bkey_cached_evict(c, ck);
95                         return ck;
96                 }
97
98         ck = kzalloc(sizeof(*ck), GFP_NOFS);
99         if (!ck)
100                 return NULL;
101
102         INIT_LIST_HEAD(&ck->list);
103         six_lock_init(&ck->c.lock);
104         BUG_ON(!six_trylock_intent(&ck->c.lock));
105         BUG_ON(!six_trylock_write(&ck->c.lock));
106
107         return ck;
108 }
109
110 static struct bkey_cached *
111 btree_key_cache_create(struct btree_key_cache *c,
112                        enum btree_id btree_id,
113                        struct bpos pos)
114 {
115         struct bkey_cached *ck;
116
117         ck = bkey_cached_alloc(c);
118         if (!ck)
119                 return ERR_PTR(-ENOMEM);
120
121         ck->c.level             = 0;
122         ck->c.btree_id          = btree_id;
123         ck->key.btree_id        = btree_id;
124         ck->key.pos             = pos;
125         ck->valid               = false;
126
127         BUG_ON(ck->flags);
128
129         if (rhashtable_lookup_insert_fast(&c->table,
130                                           &ck->hash,
131                                           bch2_btree_key_cache_params)) {
132                 /* We raced with another fill: */
133                 bkey_cached_free(c, ck);
134                 return NULL;
135         }
136
137         list_move(&ck->list, &c->clean);
138         six_unlock_write(&ck->c.lock);
139
140         return ck;
141 }
142
143 static int btree_key_cache_fill(struct btree_trans *trans,
144                                 struct btree_iter *ck_iter,
145                                 struct bkey_cached *ck)
146 {
147         struct btree_iter *iter;
148         struct bkey_s_c k;
149         unsigned new_u64s = 0;
150         struct bkey_i *new_k = NULL;
151         int ret;
152
153         iter = bch2_trans_get_iter(trans, ck->key.btree_id,
154                                    ck->key.pos, BTREE_ITER_SLOTS);
155         if (IS_ERR(iter))
156                 return PTR_ERR(iter);
157
158         k = bch2_btree_iter_peek_slot(iter);
159         ret = bkey_err(k);
160         if (ret) {
161                 bch2_trans_iter_put(trans, iter);
162                 return ret;
163         }
164
165         if (!bch2_btree_node_relock(ck_iter, 0)) {
166                 bch2_trans_iter_put(trans, iter);
167                 trace_transaction_restart_ip(trans->ip, _THIS_IP_);
168                 return -EINTR;
169         }
170
171         if (k.k->u64s > ck->u64s) {
172                 new_u64s = roundup_pow_of_two(k.k->u64s);
173                 new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
174                 if (!new_k) {
175                         bch2_trans_iter_put(trans, iter);
176                         return -ENOMEM;
177                 }
178         }
179
180         bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
181         if (new_k) {
182                 kfree(ck->k);
183                 ck->u64s = new_u64s;
184                 ck->k = new_k;
185         }
186
187         bkey_reassemble(ck->k, k);
188         ck->valid = true;
189         bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter);
190
191         /* We're not likely to need this iterator again: */
192         bch2_trans_iter_free(trans, iter);
193
194         return 0;
195 }
196
197 static int bkey_cached_check_fn(struct six_lock *lock, void *p)
198 {
199         struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
200         const struct btree_iter *iter = p;
201
202         return ck->key.btree_id == iter->btree_id &&
203                 !bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
204 }
205
206 int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
207 {
208         struct btree_trans *trans = iter->trans;
209         struct bch_fs *c = trans->c;
210         struct bkey_cached *ck;
211         int ret = 0;
212
213         BUG_ON(iter->level);
214
215         if (btree_node_locked(iter, 0)) {
216                 ck = (void *) iter->l[0].b;
217                 goto fill;
218         }
219 retry:
220         ck = btree_key_cache_find(c, iter->btree_id, iter->pos);
221         if (!ck) {
222                 if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
223                         iter->l[0].b = NULL;
224                         return 0;
225                 }
226
227                 mutex_lock(&c->btree_key_cache.lock);
228                 ck = btree_key_cache_create(&c->btree_key_cache,
229                                             iter->btree_id, iter->pos);
230                 mutex_unlock(&c->btree_key_cache.lock);
231
232                 ret = PTR_ERR_OR_ZERO(ck);
233                 if (ret)
234                         goto err;
235                 if (!ck)
236                         goto retry;
237
238                 mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
239                 iter->locks_want = 1;
240         } else {
241                 enum six_lock_type lock_want = __btree_lock_want(iter, 0);
242
243                 if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
244                                      bkey_cached_check_fn, iter)) {
245                         if (ck->key.btree_id != iter->btree_id ||
246                             bkey_cmp(ck->key.pos, iter->pos)) {
247                                 goto retry;
248                         }
249
250                         trace_transaction_restart_ip(trans->ip, _THIS_IP_);
251                         ret = -EINTR;
252                         goto err;
253                 }
254
255                 if (ck->key.btree_id != iter->btree_id ||
256                     bkey_cmp(ck->key.pos, iter->pos)) {
257                         six_unlock_type(&ck->c.lock, lock_want);
258                         goto retry;
259                 }
260
261                 mark_btree_node_locked(iter, 0, lock_want);
262         }
263
264         iter->l[0].lock_seq     = ck->c.lock.state.seq;
265         iter->l[0].b            = (void *) ck;
266 fill:
267         if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
268                 if (!btree_node_intent_locked(iter, 0))
269                         bch2_btree_iter_upgrade(iter, 1);
270                 if (!btree_node_intent_locked(iter, 0)) {
271                         trace_transaction_restart_ip(trans->ip, _THIS_IP_);
272                         ret = -EINTR;
273                         goto err;
274                 }
275
276                 ret = btree_key_cache_fill(trans, iter, ck);
277                 if (ret)
278                         goto err;
279         }
280
281         iter->uptodate = BTREE_ITER_NEED_PEEK;
282         bch2_btree_iter_downgrade(iter);
283         return ret;
284 err:
285         if (ret != -EINTR) {
286                 btree_node_unlock(iter, 0);
287                 iter->flags |= BTREE_ITER_ERROR;
288                 iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
289         }
290         return ret;
291 }
292
293 static int btree_key_cache_flush_pos(struct btree_trans *trans,
294                                      struct bkey_cached_key key,
295                                      u64 journal_seq,
296                                      bool evict)
297 {
298         struct bch_fs *c = trans->c;
299         struct journal *j = &c->journal;
300         struct btree_iter *c_iter = NULL, *b_iter = NULL;
301         struct bkey_cached *ck;
302         int ret;
303
304         b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
305                                      BTREE_ITER_SLOTS|
306                                      BTREE_ITER_INTENT);
307         ret = PTR_ERR_OR_ZERO(b_iter);
308         if (ret)
309                 goto out;
310
311         c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
312                                      BTREE_ITER_CACHED|
313                                      BTREE_ITER_CACHED_NOFILL|
314                                      BTREE_ITER_CACHED_NOCREATE|
315                                      BTREE_ITER_INTENT);
316         ret = PTR_ERR_OR_ZERO(c_iter);
317         if (ret)
318                 goto out;
319 retry:
320         ret = bch2_btree_iter_traverse(c_iter);
321         if (ret)
322                 goto err;
323
324         ck = (void *) c_iter->l[0].b;
325         if (!ck ||
326             (journal_seq && ck->journal.seq != journal_seq))
327                 goto out;
328
329         if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
330                 if (!evict)
331                         goto out;
332                 goto evict;
333         }
334
335         ret   = bch2_btree_iter_traverse(b_iter) ?:
336                 bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
337                 bch2_trans_commit(trans, NULL, NULL,
338                                   BTREE_INSERT_NOUNLOCK|
339                                   BTREE_INSERT_NOCHECK_RW|
340                                   BTREE_INSERT_NOFAIL|
341                                   BTREE_INSERT_USE_RESERVE|
342                                   BTREE_INSERT_USE_ALLOC_RESERVE|
343                                   BTREE_INSERT_JOURNAL_RESERVED|
344                                   BTREE_INSERT_JOURNAL_RECLAIM);
345 err:
346         if (ret == -EINTR)
347                 goto retry;
348
349         BUG_ON(ret && !bch2_journal_error(j));
350
351         if (ret)
352                 goto out;
353
354         bch2_journal_pin_drop(j, &ck->journal);
355         bch2_journal_preres_put(j, &ck->res);
356         clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
357
358         if (!evict) {
359                 mutex_lock(&c->btree_key_cache.lock);
360                 list_move_tail(&ck->list, &c->btree_key_cache.clean);
361                 mutex_unlock(&c->btree_key_cache.lock);
362         } else {
363 evict:
364                 BUG_ON(!btree_node_intent_locked(c_iter, 0));
365
366                 mark_btree_node_unlocked(c_iter, 0);
367                 c_iter->l[0].b = NULL;
368
369                 six_lock_write(&ck->c.lock, NULL, NULL);
370
371                 mutex_lock(&c->btree_key_cache.lock);
372                 bkey_cached_evict(&c->btree_key_cache, ck);
373                 bkey_cached_free(&c->btree_key_cache, ck);
374                 mutex_unlock(&c->btree_key_cache.lock);
375         }
376 out:
377         bch2_trans_iter_put(trans, b_iter);
378         bch2_trans_iter_put(trans, c_iter);
379         return ret;
380 }
381
382 static void btree_key_cache_journal_flush(struct journal *j,
383                                           struct journal_entry_pin *pin,
384                                           u64 seq)
385 {
386         struct bch_fs *c = container_of(j, struct bch_fs, journal);
387         struct bkey_cached *ck =
388                 container_of(pin, struct bkey_cached, journal);
389         struct bkey_cached_key key;
390         struct btree_trans trans;
391
392         six_lock_read(&ck->c.lock, NULL, NULL);
393         key = READ_ONCE(ck->key);
394
395         if (ck->journal.seq != seq ||
396             !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
397                 six_unlock_read(&ck->c.lock);
398                 return;
399         }
400         six_unlock_read(&ck->c.lock);
401
402         bch2_trans_init(&trans, c, 0, 0);
403         btree_key_cache_flush_pos(&trans, key, seq, false);
404         bch2_trans_exit(&trans);
405 }
406
407 /*
408  * Flush and evict a key from the key cache:
409  */
410 int bch2_btree_key_cache_flush(struct btree_trans *trans,
411                                enum btree_id id, struct bpos pos)
412 {
413         struct bch_fs *c = trans->c;
414         struct bkey_cached_key key = { id, pos };
415
416         /* Fastpath - assume it won't be found: */
417         if (!btree_key_cache_find(c, id, pos))
418                 return 0;
419
420         return btree_key_cache_flush_pos(trans, key, 0, true);
421 }
422
423 bool bch2_btree_insert_key_cached(struct btree_trans *trans,
424                                   struct btree_iter *iter,
425                                   struct bkey_i *insert)
426 {
427         struct bch_fs *c = trans->c;
428         struct bkey_cached *ck = (void *) iter->l[0].b;
429
430         BUG_ON(insert->u64s > ck->u64s);
431
432         if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
433                 int difference;
434
435                 BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
436
437                 difference = jset_u64s(insert->u64s) - ck->res.u64s;
438                 if (difference > 0) {
439                         trans->journal_preres.u64s      -= difference;
440                         ck->res.u64s                    += difference;
441                 }
442         }
443
444         bkey_copy(ck->k, insert);
445         ck->valid = true;
446
447         if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
448                 mutex_lock(&c->btree_key_cache.lock);
449                 list_del_init(&ck->list);
450
451                 set_bit(BKEY_CACHED_DIRTY, &ck->flags);
452                 mutex_unlock(&c->btree_key_cache.lock);
453         }
454
455         bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
456                                 &ck->journal, btree_key_cache_journal_flush);
457         return true;
458 }
459
460 #ifdef CONFIG_BCACHEFS_DEBUG
461 void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
462                                enum btree_id id, struct bpos pos)
463 {
464         BUG_ON(btree_key_cache_find(trans->c, id, pos));
465 }
466 #endif
467
468 void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
469 {
470         struct bkey_cached *ck, *n;
471
472         mutex_lock(&c->lock);
473         list_for_each_entry_safe(ck, n, &c->clean, list) {
474                 kfree(ck->k);
475                 kfree(ck);
476         }
477         list_for_each_entry_safe(ck, n, &c->freed, list)
478                 kfree(ck);
479         mutex_unlock(&c->lock);
480
481         rhashtable_destroy(&c->table);
482 }
483
484 void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
485 {
486         mutex_init(&c->lock);
487         INIT_LIST_HEAD(&c->freed);
488         INIT_LIST_HEAD(&c->clean);
489 }
490
491 int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
492 {
493         return rhashtable_init(&c->table, &bch2_btree_key_cache_params);
494 }