]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/btree_key_cache.c
Update bcachefs sources to d1fd471830 bcachefs: Add more debug checks
[bcachefs-tools-debian] / libbcachefs / btree_key_cache.c
1
2 #include "bcachefs.h"
3 #include "btree_cache.h"
4 #include "btree_iter.h"
5 #include "btree_key_cache.h"
6 #include "btree_locking.h"
7 #include "btree_update.h"
8 #include "error.h"
9 #include "journal.h"
10 #include "journal_reclaim.h"
11
12 #include <linux/sched/mm.h>
13 #include <trace/events/bcachefs.h>
14
15 static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
16                                        const void *obj)
17 {
18         const struct bkey_cached *ck = obj;
19         const struct bkey_cached_key *key = arg->key;
20
21         return cmp_int(ck->key.btree_id, key->btree_id) ?:
22                 bkey_cmp(ck->key.pos, key->pos);
23 }
24
25 static const struct rhashtable_params bch2_btree_key_cache_params = {
26         .head_offset    = offsetof(struct bkey_cached, hash),
27         .key_offset     = offsetof(struct bkey_cached, key),
28         .key_len        = sizeof(struct bkey_cached_key),
29         .obj_cmpfn      = bch2_btree_key_cache_cmp_fn,
30 };
31
32 __flatten
33 inline struct bkey_cached *
34 bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
35 {
36         struct bkey_cached_key key = {
37                 .btree_id       = btree_id,
38                 .pos            = pos,
39         };
40
41         return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
42                                       bch2_btree_key_cache_params);
43 }
44
45 static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
46 {
47         if (!six_trylock_intent(&ck->c.lock))
48                 return false;
49
50         if (!six_trylock_write(&ck->c.lock)) {
51                 six_unlock_intent(&ck->c.lock);
52                 return false;
53         }
54
55         if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
56                 six_unlock_write(&ck->c.lock);
57                 six_unlock_intent(&ck->c.lock);
58                 return false;
59         }
60
61         return true;
62 }
63
64 static void bkey_cached_evict(struct btree_key_cache *c,
65                               struct bkey_cached *ck)
66 {
67         BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
68                                       bch2_btree_key_cache_params));
69         memset(&ck->key, ~0, sizeof(ck->key));
70
71         c->nr_keys--;
72 }
73
74 static void bkey_cached_free(struct btree_key_cache *bc,
75                              struct bkey_cached *ck)
76 {
77         struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
78
79         ck->btree_trans_barrier_seq =
80                 start_poll_synchronize_srcu(&c->btree_trans_barrier);
81
82         list_move(&ck->list, &bc->freed);
83
84         kfree(ck->k);
85         ck->k           = NULL;
86         ck->u64s        = 0;
87
88         six_unlock_write(&ck->c.lock);
89         six_unlock_intent(&ck->c.lock);
90 }
91
92 static struct bkey_cached *
93 bkey_cached_alloc(struct btree_key_cache *c)
94 {
95         struct bkey_cached *ck;
96
97         list_for_each_entry(ck, &c->freed, list)
98                 if (bkey_cached_lock_for_evict(ck))
99                         return ck;
100
101         list_for_each_entry(ck, &c->clean, list)
102                 if (bkey_cached_lock_for_evict(ck)) {
103                         bkey_cached_evict(c, ck);
104                         return ck;
105                 }
106
107         ck = kzalloc(sizeof(*ck), GFP_NOFS);
108         if (!ck)
109                 return NULL;
110
111         INIT_LIST_HEAD(&ck->list);
112         six_lock_init(&ck->c.lock);
113         BUG_ON(!six_trylock_intent(&ck->c.lock));
114         BUG_ON(!six_trylock_write(&ck->c.lock));
115
116         return ck;
117 }
118
119 static struct bkey_cached *
120 btree_key_cache_create(struct btree_key_cache *c,
121                        enum btree_id btree_id,
122                        struct bpos pos)
123 {
124         struct bkey_cached *ck;
125
126         ck = bkey_cached_alloc(c);
127         if (!ck)
128                 return ERR_PTR(-ENOMEM);
129
130         ck->c.level             = 0;
131         ck->c.btree_id          = btree_id;
132         ck->key.btree_id        = btree_id;
133         ck->key.pos             = pos;
134         ck->valid               = false;
135
136         BUG_ON(ck->flags);
137
138         if (rhashtable_lookup_insert_fast(&c->table,
139                                           &ck->hash,
140                                           bch2_btree_key_cache_params)) {
141                 /* We raced with another fill: */
142                 bkey_cached_free(c, ck);
143                 return NULL;
144         }
145
146         c->nr_keys++;
147
148         list_move(&ck->list, &c->clean);
149         six_unlock_write(&ck->c.lock);
150
151         return ck;
152 }
153
154 static int btree_key_cache_fill(struct btree_trans *trans,
155                                 struct btree_iter *ck_iter,
156                                 struct bkey_cached *ck)
157 {
158         struct btree_iter *iter;
159         struct bkey_s_c k;
160         unsigned new_u64s = 0;
161         struct bkey_i *new_k = NULL;
162         int ret;
163
164         iter = bch2_trans_get_iter(trans, ck->key.btree_id,
165                                    ck->key.pos, BTREE_ITER_SLOTS);
166         if (IS_ERR(iter))
167                 return PTR_ERR(iter);
168
169         k = bch2_btree_iter_peek_slot(iter);
170         ret = bkey_err(k);
171         if (ret) {
172                 bch2_trans_iter_put(trans, iter);
173                 return ret;
174         }
175
176         if (!bch2_btree_node_relock(ck_iter, 0)) {
177                 bch2_trans_iter_put(trans, iter);
178                 trace_transaction_restart_ip(trans->ip, _THIS_IP_);
179                 return -EINTR;
180         }
181
182         if (k.k->u64s > ck->u64s) {
183                 new_u64s = roundup_pow_of_two(k.k->u64s);
184                 new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
185                 if (!new_k) {
186                         bch2_trans_iter_put(trans, iter);
187                         return -ENOMEM;
188                 }
189         }
190
191         bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
192         if (new_k) {
193                 kfree(ck->k);
194                 ck->u64s = new_u64s;
195                 ck->k = new_k;
196         }
197
198         bkey_reassemble(ck->k, k);
199         ck->valid = true;
200         bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter);
201
202         /* We're not likely to need this iterator again: */
203         bch2_trans_iter_free(trans, iter);
204
205         return 0;
206 }
207
208 static int bkey_cached_check_fn(struct six_lock *lock, void *p)
209 {
210         struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
211         const struct btree_iter *iter = p;
212
213         return ck->key.btree_id == iter->btree_id &&
214                 !bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
215 }
216
217 __flatten
218 int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
219 {
220         struct btree_trans *trans = iter->trans;
221         struct bch_fs *c = trans->c;
222         struct bkey_cached *ck;
223         int ret = 0;
224
225         BUG_ON(iter->level);
226
227         if (btree_node_locked(iter, 0)) {
228                 ck = (void *) iter->l[0].b;
229                 goto fill;
230         }
231 retry:
232         ck = bch2_btree_key_cache_find(c, iter->btree_id, iter->pos);
233         if (!ck) {
234                 if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
235                         iter->l[0].b = NULL;
236                         return 0;
237                 }
238
239                 mutex_lock(&c->btree_key_cache.lock);
240                 ck = btree_key_cache_create(&c->btree_key_cache,
241                                             iter->btree_id, iter->pos);
242                 mutex_unlock(&c->btree_key_cache.lock);
243
244                 ret = PTR_ERR_OR_ZERO(ck);
245                 if (ret)
246                         goto err;
247                 if (!ck)
248                         goto retry;
249
250                 mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
251                 iter->locks_want = 1;
252         } else {
253                 enum six_lock_type lock_want = __btree_lock_want(iter, 0);
254
255                 if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
256                                      bkey_cached_check_fn, iter, _THIS_IP_)) {
257                         if (ck->key.btree_id != iter->btree_id ||
258                             bkey_cmp(ck->key.pos, iter->pos)) {
259                                 goto retry;
260                         }
261
262                         trace_transaction_restart_ip(trans->ip, _THIS_IP_);
263                         ret = -EINTR;
264                         goto err;
265                 }
266
267                 if (ck->key.btree_id != iter->btree_id ||
268                     bkey_cmp(ck->key.pos, iter->pos)) {
269                         six_unlock_type(&ck->c.lock, lock_want);
270                         goto retry;
271                 }
272
273                 mark_btree_node_locked(iter, 0, lock_want);
274         }
275
276         iter->l[0].lock_seq     = ck->c.lock.state.seq;
277         iter->l[0].b            = (void *) ck;
278 fill:
279         if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
280                 if (!btree_node_intent_locked(iter, 0))
281                         bch2_btree_iter_upgrade(iter, 1);
282                 if (!btree_node_intent_locked(iter, 0)) {
283                         trace_transaction_restart_ip(trans->ip, _THIS_IP_);
284                         ret = -EINTR;
285                         goto err;
286                 }
287
288                 ret = btree_key_cache_fill(trans, iter, ck);
289                 if (ret)
290                         goto err;
291         }
292
293         iter->uptodate = BTREE_ITER_NEED_PEEK;
294         bch2_btree_iter_downgrade(iter);
295         return ret;
296 err:
297         if (ret != -EINTR) {
298                 btree_node_unlock(iter, 0);
299                 iter->flags |= BTREE_ITER_ERROR;
300                 iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
301         }
302         return ret;
303 }
304
305 static int btree_key_cache_flush_pos(struct btree_trans *trans,
306                                      struct bkey_cached_key key,
307                                      u64 journal_seq,
308                                      bool evict)
309 {
310         struct bch_fs *c = trans->c;
311         struct journal *j = &c->journal;
312         struct btree_iter *c_iter = NULL, *b_iter = NULL;
313         struct bkey_cached *ck;
314         int ret;
315
316         b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
317                                      BTREE_ITER_SLOTS|
318                                      BTREE_ITER_INTENT);
319         ret = PTR_ERR_OR_ZERO(b_iter);
320         if (ret)
321                 goto out;
322
323         c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
324                                      BTREE_ITER_CACHED|
325                                      BTREE_ITER_CACHED_NOFILL|
326                                      BTREE_ITER_CACHED_NOCREATE|
327                                      BTREE_ITER_INTENT);
328         ret = PTR_ERR_OR_ZERO(c_iter);
329         if (ret)
330                 goto out;
331 retry:
332         ret = bch2_btree_iter_traverse(c_iter);
333         if (ret)
334                 goto err;
335
336         ck = (void *) c_iter->l[0].b;
337         if (!ck ||
338             (journal_seq && ck->journal.seq != journal_seq))
339                 goto out;
340
341         if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
342                 if (!evict)
343                         goto out;
344                 goto evict;
345         }
346
347         ret   = bch2_btree_iter_traverse(b_iter) ?:
348                 bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
349                 bch2_trans_commit(trans, NULL, NULL,
350                                   BTREE_INSERT_NOUNLOCK|
351                                   BTREE_INSERT_NOCHECK_RW|
352                                   BTREE_INSERT_NOFAIL|
353                                   BTREE_INSERT_USE_RESERVE|
354                                   BTREE_INSERT_USE_ALLOC_RESERVE|
355                                   BTREE_INSERT_JOURNAL_RESERVED|
356                                   BTREE_INSERT_JOURNAL_RECLAIM);
357 err:
358         if (ret == -EINTR)
359                 goto retry;
360
361         BUG_ON(ret && !bch2_journal_error(j));
362
363         if (ret)
364                 goto out;
365
366         bch2_journal_pin_drop(j, &ck->journal);
367         bch2_journal_preres_put(j, &ck->res);
368
369         if (!evict) {
370                 mutex_lock(&c->btree_key_cache.lock);
371                 if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
372                         clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
373                         c->btree_key_cache.nr_dirty--;
374                 }
375
376                 list_move_tail(&ck->list, &c->btree_key_cache.clean);
377                 mutex_unlock(&c->btree_key_cache.lock);
378         } else {
379 evict:
380                 BUG_ON(!btree_node_intent_locked(c_iter, 0));
381
382                 mark_btree_node_unlocked(c_iter, 0);
383                 c_iter->l[0].b = NULL;
384
385                 six_lock_write(&ck->c.lock, NULL, NULL);
386
387                 mutex_lock(&c->btree_key_cache.lock);
388                 if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
389                         clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
390                         c->btree_key_cache.nr_dirty--;
391                 }
392
393                 bkey_cached_evict(&c->btree_key_cache, ck);
394                 bkey_cached_free(&c->btree_key_cache, ck);
395                 mutex_unlock(&c->btree_key_cache.lock);
396         }
397 out:
398         bch2_trans_iter_put(trans, b_iter);
399         bch2_trans_iter_put(trans, c_iter);
400         return ret;
401 }
402
403 static void btree_key_cache_journal_flush(struct journal *j,
404                                           struct journal_entry_pin *pin,
405                                           u64 seq)
406 {
407         struct bch_fs *c = container_of(j, struct bch_fs, journal);
408         struct bkey_cached *ck =
409                 container_of(pin, struct bkey_cached, journal);
410         struct bkey_cached_key key;
411         struct btree_trans trans;
412
413         int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
414
415         six_lock_read(&ck->c.lock, NULL, NULL);
416         key = ck->key;
417
418         if (ck->journal.seq != seq ||
419             !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
420                 six_unlock_read(&ck->c.lock);
421                 goto unlock;
422         }
423         six_unlock_read(&ck->c.lock);
424
425         bch2_trans_init(&trans, c, 0, 0);
426         btree_key_cache_flush_pos(&trans, key, seq, false);
427         bch2_trans_exit(&trans);
428 unlock:
429         srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
430 }
431
432 /*
433  * Flush and evict a key from the key cache:
434  */
435 int bch2_btree_key_cache_flush(struct btree_trans *trans,
436                                enum btree_id id, struct bpos pos)
437 {
438         struct bch_fs *c = trans->c;
439         struct bkey_cached_key key = { id, pos };
440
441         /* Fastpath - assume it won't be found: */
442         if (!bch2_btree_key_cache_find(c, id, pos))
443                 return 0;
444
445         return btree_key_cache_flush_pos(trans, key, 0, true);
446 }
447
448 bool bch2_btree_insert_key_cached(struct btree_trans *trans,
449                                   struct btree_iter *iter,
450                                   struct bkey_i *insert)
451 {
452         struct bch_fs *c = trans->c;
453         struct bkey_cached *ck = (void *) iter->l[0].b;
454
455         BUG_ON(insert->u64s > ck->u64s);
456
457         if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
458                 int difference;
459
460                 BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
461
462                 difference = jset_u64s(insert->u64s) - ck->res.u64s;
463                 if (difference > 0) {
464                         trans->journal_preres.u64s      -= difference;
465                         ck->res.u64s                    += difference;
466                 }
467         }
468
469         bkey_copy(ck->k, insert);
470         ck->valid = true;
471
472         if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
473                 mutex_lock(&c->btree_key_cache.lock);
474                 list_move(&ck->list, &c->btree_key_cache.dirty);
475
476                 set_bit(BKEY_CACHED_DIRTY, &ck->flags);
477                 c->btree_key_cache.nr_dirty++;
478                 mutex_unlock(&c->btree_key_cache.lock);
479         }
480
481         bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
482                                 &ck->journal, btree_key_cache_journal_flush);
483         return true;
484 }
485
486 #ifdef CONFIG_BCACHEFS_DEBUG
487 void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
488                                enum btree_id id, struct bpos pos)
489 {
490         BUG_ON(bch2_btree_key_cache_find(trans->c, id, pos));
491 }
492 #endif
493
494 static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
495                                            struct shrink_control *sc)
496 {
497         struct bch_fs *c = container_of(shrink, struct bch_fs,
498                                         btree_key_cache.shrink);
499         struct btree_key_cache *bc = &c->btree_key_cache;
500         struct bkey_cached *ck, *t;
501         size_t scanned = 0, freed = 0, nr = sc->nr_to_scan;
502         unsigned flags;
503
504         /* Return -1 if we can't do anything right now */
505         if (sc->gfp_mask & __GFP_FS)
506                 mutex_lock(&bc->lock);
507         else if (!mutex_trylock(&bc->lock))
508                 return -1;
509
510         flags = memalloc_nofs_save();
511
512         list_for_each_entry_safe(ck, t, &bc->freed, list) {
513                 scanned++;
514
515                 if (poll_state_synchronize_srcu(&c->btree_trans_barrier,
516                                                 ck->btree_trans_barrier_seq)) {
517                         list_del(&ck->list);
518                         kfree(ck);
519                         freed++;
520                 }
521
522                 if (scanned >= nr)
523                         goto out;
524         }
525
526         list_for_each_entry_safe(ck, t, &bc->clean, list) {
527                 scanned++;
528
529                 if (bkey_cached_lock_for_evict(ck)) {
530                         bkey_cached_evict(bc, ck);
531                         bkey_cached_free(bc, ck);
532                 }
533
534                 if (scanned >= nr) {
535                         if (&t->list != &bc->clean)
536                                 list_move_tail(&bc->clean, &t->list);
537                         goto out;
538                 }
539         }
540 out:
541         memalloc_nofs_restore(flags);
542         mutex_unlock(&bc->lock);
543
544         return freed;
545 }
546
547 static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
548                                             struct shrink_control *sc)
549 {
550         struct bch_fs *c = container_of(shrink, struct bch_fs,
551                                         btree_key_cache.shrink);
552         struct btree_key_cache *bc = &c->btree_key_cache;
553
554         return bc->nr_keys;
555 }
556
557 void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
558 {
559         struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
560         struct bkey_cached *ck, *n;
561
562         if (bc->shrink.list.next)
563                 unregister_shrinker(&bc->shrink);
564
565         mutex_lock(&bc->lock);
566         list_splice(&bc->dirty, &bc->clean);
567
568         list_for_each_entry_safe(ck, n, &bc->clean, list) {
569                 bch2_journal_pin_drop(&c->journal, &ck->journal);
570                 bch2_journal_preres_put(&c->journal, &ck->res);
571
572                 kfree(ck->k);
573                 kfree(ck);
574                 bc->nr_keys--;
575         }
576
577         BUG_ON(bc->nr_dirty && !bch2_journal_error(&c->journal));
578         BUG_ON(bc->nr_keys);
579
580         list_for_each_entry_safe(ck, n, &bc->freed, list)
581                 kfree(ck);
582         mutex_unlock(&bc->lock);
583
584         rhashtable_destroy(&bc->table);
585 }
586
587 void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
588 {
589         mutex_init(&c->lock);
590         INIT_LIST_HEAD(&c->freed);
591         INIT_LIST_HEAD(&c->clean);
592         INIT_LIST_HEAD(&c->dirty);
593 }
594
595 int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
596 {
597         c->shrink.count_objects         = bch2_btree_key_cache_count;
598         c->shrink.scan_objects          = bch2_btree_key_cache_scan;
599
600         return  register_shrinker(&c->shrink) ?:
601                 rhashtable_init(&c->table, &bch2_btree_key_cache_params);
602 }
603
604 void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
605 {
606         struct bucket_table *tbl;
607         struct bkey_cached *ck;
608         struct rhash_head *pos;
609         size_t i;
610
611         mutex_lock(&c->lock);
612         tbl = rht_dereference_rcu(c->table.tbl, &c->table);
613
614         for (i = 0; i < tbl->size; i++) {
615                 rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
616                         pr_buf(out, "%s:",
617                                bch2_btree_ids[ck->key.btree_id]);
618                         bch2_bpos_to_text(out, ck->key.pos);
619
620                         if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
621                                 pr_buf(out, " journal seq %llu", ck->journal.seq);
622                         pr_buf(out, "\n");
623                 }
624         }
625         mutex_unlock(&c->lock);
626 }