]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/migrate.c
Update bcachefs sources to 0e765bc37c bcachefs: foreground merging of interior btree...
[bcachefs-tools-debian] / libbcachefs / migrate.c
1 /*
2  * Code for moving data off a device.
3  */
4
5 #include "bcachefs.h"
6 #include "btree_update.h"
7 #include "buckets.h"
8 #include "extents.h"
9 #include "io.h"
10 #include "journal.h"
11 #include "keylist.h"
12 #include "migrate.h"
13 #include "move.h"
14 #include "super-io.h"
15
16 static bool migrate_pred(void *arg, struct bkey_s_c_extent e)
17 {
18         struct bch_dev *ca = arg;
19
20         return bch2_extent_has_device(e, ca->dev_idx);
21 }
22
23 #define MAX_DATA_OFF_ITER       10
24
25 static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
26                                     int flags)
27 {
28         struct btree_iter iter;
29         struct bkey_s_c k;
30         struct bch_move_stats stats;
31         unsigned pass = 0;
32         int ret = 0;
33
34         if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_USER)))
35                 return 0;
36
37         /*
38          * XXX: we should be able to do this in one pass, but bch2_move_data()
39          * can spuriously fail to move an extent due to racing with other move
40          * operations
41          */
42         do {
43                 ret = bch2_move_data(c, NULL,
44                                      SECTORS_IN_FLIGHT_PER_DEVICE,
45                                      NULL,
46                                      writepoint_hashed((unsigned long) current),
47                                      0,
48                                      ca->dev_idx,
49                                      migrate_pred, ca,
50                                      &stats);
51                 if (ret) {
52                         bch_err(c, "error migrating data: %i", ret);
53                         return ret;
54                 }
55         } while (atomic64_read(&stats.keys_moved) && pass++ < MAX_DATA_OFF_ITER);
56
57         if (atomic64_read(&stats.keys_moved)) {
58                 bch_err(c, "unable to migrate all data in %d iterations",
59                         MAX_DATA_OFF_ITER);
60                 return -1;
61         }
62
63         mutex_lock(&c->replicas_gc_lock);
64         bch2_replicas_gc_start(c, 1 << BCH_DATA_USER);
65
66         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) {
67                 ret = bch2_check_mark_super(c, BCH_DATA_USER, bch2_bkey_devs(k));
68                 if (ret) {
69                         bch_err(c, "error migrating data %i from check_mark_super()", ret);
70                         break;
71                 }
72         }
73
74         bch2_replicas_gc_end(c, ret);
75         mutex_unlock(&c->replicas_gc_lock);
76         return ret;
77 }
78
79 static int bch2_dev_metadata_migrate(struct bch_fs *c, struct bch_dev *ca,
80                                      int flags)
81 {
82         struct btree_iter iter;
83         struct btree *b;
84         int ret = 0;
85         unsigned id;
86
87         if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_BTREE)))
88                 return 0;
89
90         mutex_lock(&c->replicas_gc_lock);
91         bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
92
93         for (id = 0; id < BTREE_ID_NR; id++) {
94                 for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
95                         struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
96
97                         if (!bch2_extent_has_device(e, ca->dev_idx))
98                                 continue;
99
100                         ret = bch2_btree_node_rewrite(c, &iter, b->data->keys.seq, 0);
101                         if (ret) {
102                                 bch2_btree_iter_unlock(&iter);
103                                 goto err;
104                         }
105                 }
106                 ret = bch2_btree_iter_unlock(&iter);
107                 if (ret)
108                         goto err;
109         }
110 err:
111         bch2_replicas_gc_end(c, ret);
112         mutex_unlock(&c->replicas_gc_lock);
113         return ret;
114 }
115
116 int bch2_dev_data_migrate(struct bch_fs *c, struct bch_dev *ca, int flags)
117 {
118         BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW &&
119                bch2_dev_is_online(ca));
120
121         return bch2_dev_usrdata_migrate(c, ca, flags) ?:
122                 bch2_dev_metadata_migrate(c, ca, flags);
123 }
124
125 static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
126                          unsigned dev_idx, int flags, bool metadata)
127 {
128         unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
129         unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
130         unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
131         unsigned nr_good;
132
133         bch2_extent_drop_device(e, dev_idx);
134
135         nr_good = bch2_extent_nr_good_ptrs(c, e.c);
136         if ((!nr_good && !(flags & lost)) ||
137             (nr_good < replicas && !(flags & degraded)))
138                 return -EINVAL;
139
140         return 0;
141 }
142
143 static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
144 {
145         struct bkey_s_c k;
146         struct bkey_s_extent e;
147         BKEY_PADDED(key) tmp;
148         struct btree_iter iter;
149         int ret = 0;
150
151         mutex_lock(&c->replicas_gc_lock);
152         bch2_replicas_gc_start(c, 1 << BCH_DATA_USER);
153
154         bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
155                              POS_MIN, BTREE_ITER_PREFETCH);
156
157         while ((k = bch2_btree_iter_peek(&iter)).k &&
158                !(ret = btree_iter_err(k))) {
159                 if (!bkey_extent_is_data(k.k) ||
160                     !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
161                         ret = bch2_check_mark_super(c, BCH_DATA_USER,
162                                                     bch2_bkey_devs(k));
163                         if (ret)
164                                 break;
165                         bch2_btree_iter_next(&iter);
166                         continue;
167                 }
168
169                 bkey_reassemble(&tmp.key, k);
170                 e = bkey_i_to_s_extent(&tmp.key);
171
172                 ret = drop_dev_ptrs(c, e, dev_idx, flags, false);
173                 if (ret)
174                         break;
175
176                 /*
177                  * If the new extent no longer has any pointers, bch2_extent_normalize()
178                  * will do the appropriate thing with it (turning it into a
179                  * KEY_TYPE_ERROR key, or just a discard if it was a cached extent)
180                  */
181                 bch2_extent_normalize(c, e.s);
182
183                 ret = bch2_check_mark_super(c, BCH_DATA_USER,
184                                 bch2_bkey_devs(bkey_i_to_s_c(&tmp.key)));
185                 if (ret)
186                         break;
187
188                 iter.pos = bkey_start_pos(&tmp.key.k);
189
190                 ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
191                                            BTREE_INSERT_ATOMIC|
192                                            BTREE_INSERT_NOFAIL,
193                                            BTREE_INSERT_ENTRY(&iter, &tmp.key));
194
195                 /*
196                  * don't want to leave ret == -EINTR, since if we raced and
197                  * something else overwrote the key we could spuriously return
198                  * -EINTR below:
199                  */
200                 if (ret == -EINTR)
201                         ret = 0;
202                 if (ret)
203                         break;
204         }
205
206         bch2_btree_iter_unlock(&iter);
207
208         bch2_replicas_gc_end(c, ret);
209         mutex_unlock(&c->replicas_gc_lock);
210
211         return ret;
212 }
213
214 static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
215 {
216         struct btree_iter iter;
217         struct closure cl;
218         struct btree *b;
219         unsigned id;
220         int ret;
221
222         /* don't handle this yet: */
223         if (flags & BCH_FORCE_IF_METADATA_LOST)
224                 return -EINVAL;
225
226         closure_init_stack(&cl);
227
228         mutex_lock(&c->replicas_gc_lock);
229         bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
230
231         for (id = 0; id < BTREE_ID_NR; id++) {
232                 for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
233                         __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
234                         struct bkey_i_extent *new_key;
235 retry:
236                         if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key),
237                                                     dev_idx)) {
238                                 bch2_btree_iter_set_locks_want(&iter, 0);
239
240                                 ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
241                                                 bch2_bkey_devs(bkey_i_to_s_c(&b->key)));
242                                 if (ret)
243                                         goto err;
244                         } else {
245                                 bkey_copy(&tmp.k, &b->key);
246                                 new_key = bkey_i_to_extent(&tmp.k);
247
248                                 ret = drop_dev_ptrs(c, extent_i_to_s(new_key),
249                                                     dev_idx, flags, true);
250                                 if (ret)
251                                         goto err;
252
253                                 if (!bch2_btree_iter_set_locks_want(&iter, U8_MAX)) {
254                                         b = bch2_btree_iter_peek_node(&iter);
255                                         goto retry;
256                                 }
257
258                                 ret = bch2_btree_node_update_key(c, &iter, b, new_key);
259                                 if (ret == -EINTR) {
260                                         b = bch2_btree_iter_peek_node(&iter);
261                                         goto retry;
262                                 }
263                                 if (ret)
264                                         goto err;
265                         }
266                 }
267                 bch2_btree_iter_unlock(&iter);
268         }
269
270         ret = 0;
271 out:
272         bch2_replicas_gc_end(c, ret);
273         mutex_unlock(&c->replicas_gc_lock);
274
275         return ret;
276 err:
277         bch2_btree_iter_unlock(&iter);
278         goto out;
279 }
280
281 int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
282 {
283         return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
284                 bch2_dev_metadata_drop(c, dev_idx, flags);
285 }