]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/recovery.c
Update bcachefs sources to 15f6e66e86 bcachefs: pass around bset_tree less
[bcachefs-tools-debian] / libbcachefs / recovery.c
1
2 #include "bcachefs.h"
3 #include "alloc.h"
4 #include "btree_gc.h"
5 #include "btree_update.h"
6 #include "btree_update_interior.h"
7 #include "btree_io.h"
8 #include "dirent.h"
9 #include "error.h"
10 #include "fsck.h"
11 #include "journal_io.h"
12 #include "quota.h"
13 #include "recovery.h"
14 #include "super-io.h"
15
16 #include <linux/stat.h>
17
18 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
19
20 struct bkey_i *btree_root_find(struct bch_fs *c,
21                                struct bch_sb_field_clean *clean,
22                                struct jset *j,
23                                enum btree_id id, unsigned *level)
24 {
25         struct bkey_i *k;
26         struct jset_entry *entry, *start, *end;
27
28         if (clean) {
29                 start = clean->start;
30                 end = vstruct_end(&clean->field);
31         } else {
32                 start = j->start;
33                 end = vstruct_last(j);
34         }
35
36         for (entry = start; entry < end; entry = vstruct_next(entry))
37                 if (entry->type == BCH_JSET_ENTRY_btree_root &&
38                     entry->btree_id == id)
39                         goto found;
40
41         return NULL;
42 found:
43         if (!entry->u64s)
44                 return ERR_PTR(-EINVAL);
45
46         k = entry->start;
47         *level = entry->level;
48         return k;
49 }
50
51 static int verify_superblock_clean(struct bch_fs *c,
52                                    struct bch_sb_field_clean *clean,
53                                    struct jset *j)
54 {
55         unsigned i;
56         int ret = 0;
57
58         if (!clean || !j)
59                 return 0;
60
61         if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
62                         "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
63                         le64_to_cpu(clean->journal_seq),
64                         le64_to_cpu(j->seq)))
65                 bch2_fs_mark_clean(c, false);
66
67         mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
68                         "superblock read clock doesn't match journal after clean shutdown");
69         mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
70                         "superblock read clock doesn't match journal after clean shutdown");
71
72         for (i = 0; i < BTREE_ID_NR; i++) {
73                 struct bkey_i *k1, *k2;
74                 unsigned l1 = 0, l2 = 0;
75
76                 k1 = btree_root_find(c, clean, NULL, i, &l1);
77                 k2 = btree_root_find(c, NULL, j, i, &l2);
78
79                 if (!k1 && !k2)
80                         continue;
81
82                 mustfix_fsck_err_on(!k1 || !k2 ||
83                                     IS_ERR(k1) ||
84                                     IS_ERR(k2) ||
85                                     k1->k.u64s != k2->k.u64s ||
86                                     memcmp(k1, k2, bkey_bytes(k1)) ||
87                                     l1 != l2, c,
88                         "superblock btree root doesn't match journal after clean shutdown");
89         }
90 fsck_err:
91         return ret;
92 }
93
94 static bool journal_empty(struct list_head *journal)
95 {
96         struct journal_replay *i;
97         struct jset_entry *entry;
98
99         if (list_empty(journal))
100                 return true;
101
102         i = list_last_entry(journal, struct journal_replay, list);
103
104         if (i->j.last_seq != i->j.seq)
105                 return false;
106
107         list_for_each_entry(i, journal, list) {
108                 vstruct_for_each(&i->j, entry) {
109                         if (entry->type == BCH_JSET_ENTRY_btree_root)
110                                 continue;
111
112                         if (entry->type == BCH_JSET_ENTRY_btree_keys &&
113                             !entry->u64s)
114                                 continue;
115                         return false;
116                 }
117         }
118
119         return true;
120 }
121
122 int bch2_fs_recovery(struct bch_fs *c)
123 {
124         const char *err = "cannot allocate memory";
125         struct bch_sb_field_clean *clean = NULL, *sb_clean = NULL;
126         LIST_HEAD(journal);
127         struct jset *j = NULL;
128         unsigned i;
129         int ret;
130
131         mutex_lock(&c->sb_lock);
132         if (!bch2_sb_get_replicas(c->disk_sb.sb)) {
133                 bch_info(c, "building replicas info");
134                 set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
135         }
136
137         if (c->sb.clean)
138                 sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
139         if (sb_clean) {
140                 clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
141                                 GFP_KERNEL);
142                 if (!clean) {
143                         ret = -ENOMEM;
144                         mutex_unlock(&c->sb_lock);
145                         goto err;
146                 }
147         }
148         mutex_unlock(&c->sb_lock);
149
150         if (clean)
151                 bch_info(c, "recovering from clean shutdown, journal seq %llu",
152                          le64_to_cpu(clean->journal_seq));
153
154         if (!clean || !c->opts.nofsck) {
155                 ret = bch2_journal_read(c, &journal);
156                 if (ret)
157                         goto err;
158
159                 j = &list_entry(journal.prev, struct journal_replay, list)->j;
160         } else {
161                 ret = bch2_journal_set_seq(c,
162                                            le64_to_cpu(clean->journal_seq),
163                                            le64_to_cpu(clean->journal_seq));
164                 BUG_ON(ret);
165         }
166
167         ret = verify_superblock_clean(c, clean, j);
168         if (ret)
169                 goto err;
170
171         fsck_err_on(clean && !journal_empty(&journal), c,
172                     "filesystem marked clean but journal not empty");
173
174         if (clean) {
175                 c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
176                 c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
177         } else {
178                 c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock);
179                 c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock);
180         }
181
182         for (i = 0; i < BTREE_ID_NR; i++) {
183                 unsigned level;
184                 struct bkey_i *k;
185
186                 k = btree_root_find(c, clean, j, i, &level);
187                 if (!k)
188                         continue;
189
190                 err = "invalid btree root pointer";
191                 if (IS_ERR(k))
192                         goto err;
193
194                 err = "error reading btree root";
195                 if (bch2_btree_root_read(c, i, k, level)) {
196                         if (i != BTREE_ID_ALLOC)
197                                 goto err;
198
199                         mustfix_fsck_err(c, "error reading btree root");
200                 }
201         }
202
203         for (i = 0; i < BTREE_ID_NR; i++)
204                 if (!c->btree_roots[i].b)
205                         bch2_btree_root_alloc(c, i);
206
207         err = "error reading allocation information";
208         ret = bch2_alloc_read(c, &journal);
209         if (ret)
210                 goto err;
211
212         set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
213
214         bch_verbose(c, "starting mark and sweep:");
215         err = "error in recovery";
216         ret = bch2_initial_gc(c, &journal);
217         if (ret)
218                 goto err;
219         bch_verbose(c, "mark and sweep done");
220
221         if (c->opts.noreplay)
222                 goto out;
223
224         /*
225          * Mark dirty before journal replay, fsck:
226          * XXX: after a clean shutdown, this could be done lazily only when fsck
227          * finds an error
228          */
229         bch2_fs_mark_clean(c, false);
230
231         /*
232          * bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
233          * will give spurious errors about oldest_gen > bucket_gen -
234          * this is a hack but oh well.
235          */
236         bch2_fs_journal_start(&c->journal);
237
238         err = "error starting allocator";
239         ret = bch2_fs_allocator_start(c);
240         if (ret)
241                 goto err;
242
243         bch_verbose(c, "starting journal replay:");
244         err = "journal replay failed";
245         ret = bch2_journal_replay(c, &journal);
246         if (ret)
247                 goto err;
248         bch_verbose(c, "journal replay done");
249
250         if (c->opts.norecovery)
251                 goto out;
252
253         err = "error in fsck";
254         ret = bch2_fsck(c);
255         if (ret)
256                 goto err;
257
258         if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) {
259                 mutex_lock(&c->sb_lock);
260                 c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
261                 mutex_unlock(&c->sb_lock);
262         }
263
264         if (enabled_qtypes(c)) {
265                 bch_verbose(c, "reading quotas:");
266                 ret = bch2_fs_quota_read(c);
267                 if (ret)
268                         goto err;
269                 bch_verbose(c, "quotas done");
270         }
271
272 out:
273         bch2_journal_entries_free(&journal);
274         kfree(clean);
275         return ret;
276 err:
277 fsck_err:
278         BUG_ON(!ret);
279         goto out;
280 }
281
282 int bch2_fs_initialize(struct bch_fs *c)
283 {
284         struct bch_inode_unpacked root_inode, lostfound_inode;
285         struct bkey_inode_buf packed_inode;
286         struct bch_hash_info root_hash_info;
287         struct qstr lostfound = QSTR("lost+found");
288         const char *err = "cannot allocate memory";
289         struct bch_dev *ca;
290         LIST_HEAD(journal);
291         unsigned i;
292         int ret;
293
294         bch_notice(c, "initializing new filesystem");
295
296         set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
297
298         ret = bch2_initial_gc(c, &journal);
299         if (ret)
300                 goto err;
301
302         err = "unable to allocate journal buckets";
303         for_each_online_member(ca, c, i)
304                 if (bch2_dev_journal_alloc(ca)) {
305                         percpu_ref_put(&ca->io_ref);
306                         goto err;
307                 }
308
309         for (i = 0; i < BTREE_ID_NR; i++)
310                 bch2_btree_root_alloc(c, i);
311
312         /*
313          * journal_res_get() will crash if called before this has
314          * set up the journal.pin FIFO and journal.cur pointer:
315          */
316         bch2_fs_journal_start(&c->journal);
317         bch2_journal_set_replay_done(&c->journal);
318
319         err = "error starting allocator";
320         ret = bch2_fs_allocator_start(c);
321         if (ret)
322                 goto err;
323
324         bch2_inode_init(c, &root_inode, 0, 0,
325                         S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
326         root_inode.bi_inum = BCACHEFS_ROOT_INO;
327         root_inode.bi_nlink++; /* lost+found */
328         bch2_inode_pack(&packed_inode, &root_inode);
329
330         err = "error creating root directory";
331         ret = bch2_btree_insert(c, BTREE_ID_INODES,
332                                 &packed_inode.inode.k_i,
333                                 NULL, NULL, 0);
334         if (ret)
335                 goto err;
336
337         bch2_inode_init(c, &lostfound_inode, 0, 0,
338                         S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0,
339                         &root_inode);
340         lostfound_inode.bi_inum = BCACHEFS_ROOT_INO + 1;
341         bch2_inode_pack(&packed_inode, &lostfound_inode);
342
343         err = "error creating lost+found";
344         ret = bch2_btree_insert(c, BTREE_ID_INODES,
345                                 &packed_inode.inode.k_i,
346                                 NULL, NULL, 0);
347         if (ret)
348                 goto err;
349
350         root_hash_info = bch2_hash_info_init(c, &root_inode);
351
352         ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR,
353                                  &lostfound, lostfound_inode.bi_inum, NULL,
354                                  BTREE_INSERT_NOFAIL);
355         if (ret)
356                 goto err;
357
358         atomic_long_set(&c->nr_inodes, 2);
359
360         if (enabled_qtypes(c)) {
361                 ret = bch2_fs_quota_read(c);
362                 if (ret)
363                         goto err;
364         }
365
366         err = "error writing first journal entry";
367         ret = bch2_journal_meta(&c->journal);
368         if (ret)
369                 goto err;
370
371         mutex_lock(&c->sb_lock);
372         SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
373         SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
374         c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
375
376         bch2_write_super(c);
377         mutex_unlock(&c->sb_lock);
378
379         return 0;
380 err:
381         BUG_ON(!ret);
382         return ret;
383 }