]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/rebalance.c
Update bcachefs sources to d7f6da1d60 bcachefs: fix missing include
[bcachefs-tools-debian] / libbcachefs / rebalance.c
1
2 #include "bcachefs.h"
3 #include "alloc_foreground.h"
4 #include "btree_iter.h"
5 #include "buckets.h"
6 #include "clock.h"
7 #include "disk_groups.h"
8 #include "extents.h"
9 #include "io.h"
10 #include "move.h"
11 #include "rebalance.h"
12 #include "super-io.h"
13
14 #include <linux/freezer.h>
15 #include <linux/kthread.h>
16 #include <linux/sched/cputime.h>
17 #include <trace/events/bcachefs.h>
18
19 static inline bool rebalance_ptr_pred(struct bch_fs *c,
20                                       const struct bch_extent_ptr *ptr,
21                                       struct bch_extent_crc_unpacked crc,
22                                       struct bch_io_opts *io_opts)
23 {
24         if (io_opts->background_target &&
25             !bch2_dev_in_target(c, ptr->dev, io_opts->background_target) &&
26             !ptr->cached)
27                 return true;
28
29         if (io_opts->background_compression &&
30             crc.compression_type !=
31             bch2_compression_opt_to_type[io_opts->background_compression])
32                 return true;
33
34         return false;
35 }
36
37 void bch2_rebalance_add_key(struct bch_fs *c,
38                             struct bkey_s_c k,
39                             struct bch_io_opts *io_opts)
40 {
41         const struct bch_extent_ptr *ptr;
42         struct bch_extent_crc_unpacked crc;
43         struct bkey_s_c_extent e;
44
45         if (!bkey_extent_is_data(k.k))
46                 return;
47
48         if (!io_opts->background_target &&
49             !io_opts->background_compression)
50                 return;
51
52         e = bkey_s_c_to_extent(k);
53
54         extent_for_each_ptr_crc(e, ptr, crc)
55                 if (rebalance_ptr_pred(c, ptr, crc, io_opts)) {
56                         struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
57
58                         if (atomic64_add_return(crc.compressed_size,
59                                                 &ca->rebalance_work) ==
60                             crc.compressed_size)
61                                 rebalance_wakeup(c);
62                 }
63 }
64
65 void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
66 {
67         if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
68             sectors)
69                 rebalance_wakeup(c);
70 }
71
72 static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
73                                     enum bkey_type type,
74                                     struct bkey_s_c_extent e,
75                                     struct bch_io_opts *io_opts,
76                                     struct data_opts *data_opts)
77 {
78         const struct bch_extent_ptr *ptr;
79         struct bch_extent_crc_unpacked crc;
80
81         /* Make sure we have room to add a new pointer: */
82         if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
83             BKEY_EXTENT_VAL_U64s_MAX)
84                 return DATA_SKIP;
85
86         extent_for_each_ptr_crc(e, ptr, crc)
87                 if (rebalance_ptr_pred(c, ptr, crc, io_opts))
88                         goto found;
89
90         return DATA_SKIP;
91 found:
92         data_opts->target               = io_opts->background_target;
93         data_opts->btree_insert_flags   = 0;
94         return DATA_ADD_REPLICAS;
95 }
96
97 struct rebalance_work {
98         int             dev_most_full_idx;
99         unsigned        dev_most_full_percent;
100         u64             dev_most_full_work;
101         u64             dev_most_full_capacity;
102         u64             total_work;
103 };
104
105 static void rebalance_work_accumulate(struct rebalance_work *w,
106                 u64 dev_work, u64 unknown_dev, u64 capacity, int idx)
107 {
108         unsigned percent_full;
109         u64 work = dev_work + unknown_dev;
110
111         if (work < dev_work || work < unknown_dev)
112                 work = U64_MAX;
113         work = min(work, capacity);
114
115         percent_full = div64_u64(work * 100, capacity);
116
117         if (percent_full >= w->dev_most_full_percent) {
118                 w->dev_most_full_idx            = idx;
119                 w->dev_most_full_percent        = percent_full;
120                 w->dev_most_full_work           = work;
121                 w->dev_most_full_capacity       = capacity;
122         }
123
124         if (w->total_work + dev_work >= w->total_work &&
125             w->total_work + dev_work >= dev_work)
126                 w->total_work += dev_work;
127 }
128
129 static struct rebalance_work rebalance_work(struct bch_fs *c)
130 {
131         struct bch_dev *ca;
132         struct rebalance_work ret = { .dev_most_full_idx = -1 };
133         u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev);
134         unsigned i;
135
136         for_each_online_member(ca, c, i)
137                 rebalance_work_accumulate(&ret,
138                         atomic64_read(&ca->rebalance_work),
139                         unknown_dev,
140                         bucket_to_sector(ca, ca->mi.nbuckets -
141                                          ca->mi.first_bucket),
142                         i);
143
144         rebalance_work_accumulate(&ret,
145                 unknown_dev, 0, c->capacity, -1);
146
147         return ret;
148 }
149
150 static void rebalance_work_reset(struct bch_fs *c)
151 {
152         struct bch_dev *ca;
153         unsigned i;
154
155         for_each_online_member(ca, c, i)
156                 atomic64_set(&ca->rebalance_work, 0);
157
158         atomic64_set(&c->rebalance.work_unknown_dev, 0);
159 }
160
161 static unsigned long curr_cputime(void)
162 {
163         u64 utime, stime;
164
165         task_cputime_adjusted(current, &utime, &stime);
166         return nsecs_to_jiffies(utime + stime);
167 }
168
169 static int bch2_rebalance_thread(void *arg)
170 {
171         struct bch_fs *c = arg;
172         struct bch_fs_rebalance *r = &c->rebalance;
173         struct io_clock *clock = &c->io_clock[WRITE];
174         struct rebalance_work w, p;
175         unsigned long start, prev_start;
176         unsigned long prev_run_time, prev_run_cputime;
177         unsigned long cputime, prev_cputime;
178         unsigned long io_start;
179         long throttle;
180
181         set_freezable();
182
183         io_start        = atomic_long_read(&clock->now);
184         p               = rebalance_work(c);
185         prev_start      = jiffies;
186         prev_cputime    = curr_cputime();
187
188         while (!kthread_wait_freezable(r->enabled)) {
189                 start                   = jiffies;
190                 cputime                 = curr_cputime();
191
192                 prev_run_time           = start - prev_start;
193                 prev_run_cputime        = cputime - prev_cputime;
194
195                 w                       = rebalance_work(c);
196                 BUG_ON(!w.dev_most_full_capacity);
197
198                 if (!w.total_work) {
199                         r->state = REBALANCE_WAITING;
200                         kthread_wait_freezable(rebalance_work(c).total_work);
201                         continue;
202                 }
203
204                 /*
205                  * If there isn't much work to do, throttle cpu usage:
206                  */
207                 throttle = prev_run_cputime * 100 /
208                         max(1U, w.dev_most_full_percent) -
209                         prev_run_time;
210
211                 if (w.dev_most_full_percent < 20 && throttle > 0) {
212                         r->state = REBALANCE_THROTTLED;
213                         r->throttled_until_iotime = io_start +
214                                 div_u64(w.dev_most_full_capacity *
215                                         (20 - w.dev_most_full_percent),
216                                         50);
217                         r->throttled_until_cputime = start + throttle;
218
219                         bch2_kthread_io_clock_wait(clock,
220                                 r->throttled_until_iotime,
221                                 throttle);
222                         continue;
223                 }
224
225                 /* minimum 1 mb/sec: */
226                 r->pd.rate.rate =
227                         max_t(u64, 1 << 11,
228                               r->pd.rate.rate *
229                               max(p.dev_most_full_percent, 1U) /
230                               max(w.dev_most_full_percent, 1U));
231
232                 io_start        = atomic_long_read(&clock->now);
233                 p               = w;
234                 prev_start      = start;
235                 prev_cputime    = cputime;
236
237                 r->state = REBALANCE_RUNNING;
238                 memset(&r->move_stats, 0, sizeof(r->move_stats));
239                 rebalance_work_reset(c);
240
241                 bch2_move_data(c,
242                                /* ratelimiting disabled for now */
243                                NULL, /*  &r->pd.rate, */
244                                writepoint_ptr(&c->rebalance_write_point),
245                                POS_MIN, POS_MAX,
246                                rebalance_pred, NULL,
247                                &r->move_stats);
248         }
249
250         return 0;
251 }
252
253 ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf)
254 {
255         char *out = buf, *end = out + PAGE_SIZE;
256         struct bch_fs_rebalance *r = &c->rebalance;
257         struct rebalance_work w = rebalance_work(c);
258         char h1[21], h2[21];
259
260         bch2_hprint(h1, w.dev_most_full_work << 9);
261         bch2_hprint(h2, w.dev_most_full_capacity << 9);
262         out += scnprintf(out, end - out,
263                          "fullest_dev (%i):\t%s/%s\n",
264                          w.dev_most_full_idx, h1, h2);
265
266         bch2_hprint(h1, w.total_work << 9);
267         bch2_hprint(h2, c->capacity << 9);
268         out += scnprintf(out, end - out,
269                          "total work:\t\t%s/%s\n",
270                          h1, h2);
271
272         out += scnprintf(out, end - out,
273                          "rate:\t\t\t%u\n",
274                          r->pd.rate.rate);
275
276         switch (r->state) {
277         case REBALANCE_WAITING:
278                 out += scnprintf(out, end - out, "waiting\n");
279                 break;
280         case REBALANCE_THROTTLED:
281                 bch2_hprint(h1,
282                             (r->throttled_until_iotime -
283                              atomic_long_read(&c->io_clock[WRITE].now)) << 9);
284                 out += scnprintf(out, end - out,
285                                  "throttled for %lu sec or %s io\n",
286                                  (r->throttled_until_cputime - jiffies) / HZ,
287                                  h1);
288                 break;
289         case REBALANCE_RUNNING:
290                 out += scnprintf(out, end - out, "running\n");
291                 out += scnprintf(out, end - out, "pos %llu:%llu\n",
292                                  r->move_stats.iter.pos.inode,
293                                  r->move_stats.iter.pos.offset);
294                 break;
295         }
296
297         return out - buf;
298 }
299
300 void bch2_rebalance_stop(struct bch_fs *c)
301 {
302         struct task_struct *p;
303
304         c->rebalance.pd.rate.rate = UINT_MAX;
305         bch2_ratelimit_reset(&c->rebalance.pd.rate);
306
307         p = rcu_dereference_protected(c->rebalance.thread, 1);
308         c->rebalance.thread = NULL;
309
310         if (p) {
311                 /* for sychronizing with rebalance_wakeup() */
312                 synchronize_rcu();
313
314                 kthread_stop(p);
315                 put_task_struct(p);
316         }
317 }
318
319 int bch2_rebalance_start(struct bch_fs *c)
320 {
321         struct task_struct *p;
322
323         if (c->opts.nochanges)
324                 return 0;
325
326         p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
327         if (IS_ERR(p))
328                 return PTR_ERR(p);
329
330         get_task_struct(p);
331         rcu_assign_pointer(c->rebalance.thread, p);
332         wake_up_process(p);
333         return 0;
334 }
335
336 void bch2_fs_rebalance_init(struct bch_fs *c)
337 {
338         bch2_pd_controller_init(&c->rebalance.pd);
339
340         atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX);
341 }