]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache/movinggc.c
Delete more unused shim code, update bcache code
[bcachefs-tools-debian] / libbcache / movinggc.c
1 /*
2  * Moving/copying garbage collector
3  *
4  * Copyright 2012 Google, Inc.
5  */
6
7 #include "bcache.h"
8 #include "btree_iter.h"
9 #include "buckets.h"
10 #include "clock.h"
11 #include "extents.h"
12 #include "io.h"
13 #include "keylist.h"
14 #include "move.h"
15 #include "movinggc.h"
16
17 #include <trace/events/bcache.h>
18 #include <linux/freezer.h>
19 #include <linux/kthread.h>
20 #include <linux/wait.h>
21
22 /* Moving GC - IO loop */
23
24 static const struct bch_extent_ptr *moving_pred(struct cache *ca,
25                                                 struct bkey_s_c k)
26 {
27         const struct bch_extent_ptr *ptr;
28
29         if (bkey_extent_is_data(k.k) &&
30             (ptr = bch_extent_has_device(bkey_s_c_to_extent(k),
31                                          ca->dev_idx)) &&
32             PTR_BUCKET(ca, ptr)->mark.copygc)
33                 return ptr;
34
35         return NULL;
36 }
37
38 static int issue_moving_gc_move(struct cache *ca,
39                                 struct moving_context *ctxt,
40                                 struct bkey_s_c k)
41 {
42         struct cache_set *c = ca->set;
43         const struct bch_extent_ptr *ptr;
44         int ret;
45
46         ptr = moving_pred(ca, k);
47         if (!ptr) /* We raced - bucket's been reused */
48                 return 0;
49
50         ret = bch_data_move(c, ctxt, &ca->copygc_write_point, k, ptr);
51         if (!ret)
52                 trace_bcache_gc_copy(k.k);
53         else
54                 trace_bcache_moving_gc_alloc_fail(c, k.k->size);
55         return ret;
56 }
57
58 static void read_moving(struct cache *ca, size_t buckets_to_move,
59                         u64 sectors_to_move)
60 {
61         struct cache_set *c = ca->set;
62         struct bucket *g;
63         struct moving_context ctxt;
64         struct btree_iter iter;
65         struct bkey_s_c k;
66         u64 sectors_not_moved = 0;
67         size_t buckets_not_moved = 0;
68
69         bch_ratelimit_reset(&ca->moving_gc_pd.rate);
70         bch_move_ctxt_init(&ctxt, &ca->moving_gc_pd.rate,
71                                 SECTORS_IN_FLIGHT_PER_DEVICE);
72         bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
73
74         while (1) {
75                 if (kthread_should_stop())
76                         goto out;
77                 if (bch_move_ctxt_wait(&ctxt))
78                         goto out;
79                 k = bch_btree_iter_peek(&iter);
80                 if (!k.k)
81                         break;
82                 if (btree_iter_err(k))
83                         goto out;
84
85                 if (!moving_pred(ca, k))
86                         goto next;
87
88                 if (issue_moving_gc_move(ca, &ctxt, k)) {
89                         bch_btree_iter_unlock(&iter);
90
91                         /* memory allocation failure, wait for some IO to finish */
92                         bch_move_ctxt_wait_for_io(&ctxt);
93                         continue;
94                 }
95 next:
96                 bch_btree_iter_advance_pos(&iter);
97                 //bch_btree_iter_cond_resched(&iter);
98
99                 /* unlock before calling moving_context_wait() */
100                 bch_btree_iter_unlock(&iter);
101                 cond_resched();
102         }
103
104         bch_btree_iter_unlock(&iter);
105         bch_move_ctxt_exit(&ctxt);
106         trace_bcache_moving_gc_end(ca, ctxt.sectors_moved, ctxt.keys_moved,
107                                    buckets_to_move);
108
109         /* don't check this if we bailed out early: */
110         for_each_bucket(g, ca)
111                 if (g->mark.copygc && bucket_sectors_used(g)) {
112                         sectors_not_moved += bucket_sectors_used(g);
113                         buckets_not_moved++;
114                 }
115
116         if (sectors_not_moved)
117                 bch_warn(c, "copygc finished but %llu/%llu sectors, %zu/%zu buckets not moved",
118                          sectors_not_moved, sectors_to_move,
119                          buckets_not_moved, buckets_to_move);
120         return;
121 out:
122         bch_btree_iter_unlock(&iter);
123         bch_move_ctxt_exit(&ctxt);
124         trace_bcache_moving_gc_end(ca, ctxt.sectors_moved, ctxt.keys_moved,
125                                    buckets_to_move);
126 }
127
128 static bool have_copygc_reserve(struct cache *ca)
129 {
130         bool ret;
131
132         spin_lock(&ca->freelist_lock);
133         ret = fifo_used(&ca->free[RESERVE_MOVINGGC]) >=
134                 COPYGC_BUCKETS_PER_ITER(ca);
135         spin_unlock(&ca->freelist_lock);
136
137         return ret;
138 }
139
140 static void bch_moving_gc(struct cache *ca)
141 {
142         struct cache_set *c = ca->set;
143         struct bucket *g;
144         struct bucket_mark new;
145         u64 sectors_to_move;
146         size_t buckets_to_move, buckets_unused = 0;
147         struct bucket_heap_entry e;
148         unsigned sectors_used, i;
149         int reserve_sectors;
150
151         if (!have_copygc_reserve(ca)) {
152                 struct closure cl;
153
154                 closure_init_stack(&cl);
155                 while (1) {
156                         closure_wait(&c->freelist_wait, &cl);
157                         if (have_copygc_reserve(ca))
158                                 break;
159                         closure_sync(&cl);
160                 }
161                 closure_wake_up(&c->freelist_wait);
162         }
163
164         reserve_sectors = COPYGC_SECTORS_PER_ITER(ca);
165
166         trace_bcache_moving_gc_start(ca);
167
168         /*
169          * Find buckets with lowest sector counts, skipping completely
170          * empty buckets, by building a maxheap sorted by sector count,
171          * and repeatedly replacing the maximum element until all
172          * buckets have been visited.
173          */
174
175         /*
176          * We need bucket marks to be up to date, so gc can't be recalculating
177          * them, and we don't want the allocator invalidating a bucket after
178          * we've decided to evacuate it but before we set copygc:
179          */
180         down_read(&c->gc_lock);
181         mutex_lock(&ca->heap_lock);
182         mutex_lock(&ca->set->bucket_lock);
183
184         ca->heap.used = 0;
185         for_each_bucket(g, ca) {
186                 bucket_cmpxchg(g, new, new.copygc = 0);
187
188                 if (bucket_unused(g)) {
189                         buckets_unused++;
190                         continue;
191                 }
192
193                 if (g->mark.owned_by_allocator ||
194                     g->mark.is_metadata)
195                         continue;
196
197                 sectors_used = bucket_sectors_used(g);
198
199                 if (sectors_used >= ca->mi.bucket_size)
200                         continue;
201
202                 bucket_heap_push(ca, g, sectors_used);
203         }
204
205         sectors_to_move = 0;
206         for (i = 0; i < ca->heap.used; i++)
207                 sectors_to_move += ca->heap.data[i].val;
208
209         while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
210                 BUG_ON(!heap_pop(&ca->heap, e, bucket_min_cmp));
211                 sectors_to_move -= e.val;
212         }
213
214         for (i = 0; i < ca->heap.used; i++)
215                 bucket_cmpxchg(ca->heap.data[i].g, new, new.copygc = 1);
216
217         buckets_to_move = ca->heap.used;
218
219         mutex_unlock(&ca->set->bucket_lock);
220         mutex_unlock(&ca->heap_lock);
221         up_read(&c->gc_lock);
222
223         read_moving(ca, buckets_to_move, sectors_to_move);
224 }
225
226 static int bch_moving_gc_thread(void *arg)
227 {
228         struct cache *ca = arg;
229         struct cache_set *c = ca->set;
230         struct io_clock *clock = &c->io_clock[WRITE];
231         unsigned long last;
232         u64 available, want, next;
233
234         set_freezable();
235
236         while (!kthread_should_stop()) {
237                 if (kthread_wait_freezable(c->copy_gc_enabled))
238                         break;
239
240                 last = atomic_long_read(&clock->now);
241                 /*
242                  * don't start copygc until less than half the gc reserve is
243                  * available:
244                  */
245                 available = buckets_available_cache(ca);
246                 want = div64_u64((ca->mi.nbuckets - ca->mi.first_bucket) *
247                                  c->opts.gc_reserve_percent, 200);
248                 if (available > want) {
249                         next = last + (available - want) *
250                                 ca->mi.bucket_size;
251                         bch_kthread_io_clock_wait(clock, next);
252                         continue;
253                 }
254
255                 bch_moving_gc(ca);
256         }
257
258         return 0;
259 }
260
261 void bch_moving_init_cache(struct cache *ca)
262 {
263         bch_pd_controller_init(&ca->moving_gc_pd);
264         ca->moving_gc_pd.d_term = 0;
265 }
266
267 int bch_moving_gc_thread_start(struct cache *ca)
268 {
269         struct task_struct *t;
270
271         /* The moving gc read thread must be stopped */
272         BUG_ON(ca->moving_gc_read != NULL);
273
274         if (ca->set->opts.nochanges)
275                 return 0;
276
277         if (bch_fs_init_fault("moving_gc_start"))
278                 return -ENOMEM;
279
280         t = kthread_create(bch_moving_gc_thread, ca, "bch_copygc_read");
281         if (IS_ERR(t))
282                 return PTR_ERR(t);
283
284         ca->moving_gc_read = t;
285         wake_up_process(ca->moving_gc_read);
286
287         return 0;
288 }
289
290 void bch_moving_gc_stop(struct cache *ca)
291 {
292         ca->moving_gc_pd.rate.rate = UINT_MAX;
293         bch_ratelimit_reset(&ca->moving_gc_pd.rate);
294
295         if (ca->moving_gc_read)
296                 kthread_stop(ca->moving_gc_read);
297         ca->moving_gc_read = NULL;
298 }