1 // SPDX-License-Identifier: GPL-2.0
4 #include "alloc_background.h"
6 #include "btree_update.h"
7 #include "btree_update_interior.h"
14 #include "journal_io.h"
15 #include "journal_reclaim.h"
16 #include "journal_seq_blacklist.h"
22 #include <linux/sort.h>
23 #include <linux/stat.h>
25 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
27 /* sort and dedup all keys in the journal: */
29 static void journal_entries_free(struct list_head *list)
32 while (!list_empty(list)) {
33 struct journal_replay *i =
34 list_first_entry(list, struct journal_replay, list);
36 kvpfree(i, offsetof(struct journal_replay, j) +
37 vstruct_bytes(&i->j));
41 static int journal_sort_key_cmp(const void *_l, const void *_r)
43 const struct journal_key *l = _l;
44 const struct journal_key *r = _r;
46 return cmp_int(l->btree_id, r->btree_id) ?:
47 bkey_cmp(l->pos, r->pos) ?:
48 cmp_int(l->journal_seq, r->journal_seq) ?:
49 cmp_int(l->journal_offset, r->journal_offset);
52 static int journal_sort_seq_cmp(const void *_l, const void *_r)
54 const struct journal_key *l = _l;
55 const struct journal_key *r = _r;
57 return cmp_int(l->journal_seq, r->journal_seq) ?:
58 cmp_int(l->btree_id, r->btree_id) ?:
59 bkey_cmp(l->pos, r->pos);
62 static void journal_keys_sift(struct journal_keys *keys, struct journal_key *i)
64 while (i + 1 < keys->d + keys->nr &&
65 journal_sort_key_cmp(i, i + 1) > 0) {
71 static void journal_keys_free(struct journal_keys *keys)
73 struct journal_key *i;
75 for_each_journal_key(*keys, i)
83 static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
85 struct journal_replay *p;
86 struct jset_entry *entry;
87 struct bkey_i *k, *_n;
88 struct journal_keys keys = { NULL }, keys_deduped = { NULL };
89 struct journal_key *i;
92 list_for_each_entry(p, journal_entries, list)
93 for_each_jset_key(k, _n, entry, &p->j)
96 keys.journal_seq_base = keys_deduped.journal_seq_base =
97 le64_to_cpu(list_first_entry(journal_entries,
98 struct journal_replay,
101 keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL);
105 keys_deduped.d = kvmalloc(sizeof(keys.d[0]) * nr_keys * 2, GFP_KERNEL);
109 list_for_each_entry(p, journal_entries, list)
110 for_each_jset_key(k, _n, entry, &p->j)
111 keys.d[keys.nr++] = (struct journal_key) {
112 .btree_id = entry->btree_id,
113 .pos = bkey_start_pos(&k->k),
115 .journal_seq = le64_to_cpu(p->j.seq) -
116 keys.journal_seq_base,
117 .journal_offset = k->_data - p->j._data,
120 sort(keys.d, nr_keys, sizeof(keys.d[0]), journal_sort_key_cmp, NULL);
123 while (i < keys.d + keys.nr) {
124 if (i + 1 < keys.d + keys.nr &&
125 i[0].btree_id == i[1].btree_id &&
126 !bkey_cmp(i[0].pos, i[1].pos)) {
127 if (bkey_cmp(i[0].k->k.p, i[1].k->k.p) <= 0) {
130 bch2_cut_front(i[1].k->k.p, i[0].k);
131 i[0].pos = i[1].k->k.p;
132 journal_keys_sift(&keys, i);
137 if (i + 1 < keys.d + keys.nr &&
138 i[0].btree_id == i[1].btree_id &&
139 bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)) > 0) {
140 if ((cmp_int(i[0].journal_seq, i[1].journal_seq) ?:
141 cmp_int(i[0].journal_offset, i[1].journal_offset)) < 0) {
142 if (bkey_cmp(i[0].k->k.p, i[1].k->k.p) <= 0) {
143 bch2_cut_back(bkey_start_pos(&i[1].k->k), &i[0].k->k);
145 struct bkey_i *split =
146 kmalloc(bkey_bytes(i[0].k), GFP_KERNEL);
151 bkey_copy(split, i[0].k);
152 bch2_cut_back(bkey_start_pos(&i[1].k->k), &split->k);
153 keys_deduped.d[keys_deduped.nr++] = (struct journal_key) {
154 .btree_id = i[0].btree_id,
156 .pos = bkey_start_pos(&split->k),
158 .journal_seq = i[0].journal_seq,
159 .journal_offset = i[0].journal_offset,
162 bch2_cut_front(i[1].k->k.p, i[0].k);
163 i[0].pos = i[1].k->k.p;
164 journal_keys_sift(&keys, i);
168 if (bkey_cmp(i[0].k->k.p, i[1].k->k.p) >= 0) {
173 bch2_cut_front(i[0].k->k.p, i[1].k);
174 i[1].pos = i[0].k->k.p;
175 journal_keys_sift(&keys, i + 1);
181 keys_deduped.d[keys_deduped.nr++] = *i++;
187 journal_keys_free(&keys_deduped);
189 return (struct journal_keys) { NULL };
192 /* journal replay: */
194 static void replay_now_at(struct journal *j, u64 seq)
196 BUG_ON(seq < j->replay_journal_seq);
197 BUG_ON(seq > j->replay_journal_seq_end);
199 while (j->replay_journal_seq < seq)
200 bch2_journal_pin_put(j, j->replay_journal_seq++);
203 static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
205 struct btree_trans trans;
206 struct btree_iter *iter, *split_iter;
208 * We might cause compressed extents to be split, so we need to pass in
209 * a disk_reservation:
211 struct disk_reservation disk_res =
212 bch2_disk_reservation_init(c, 0);
213 struct bkey_i *split;
214 bool split_compressed = false;
217 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
219 bch2_trans_begin(&trans);
221 iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
222 bkey_start_pos(&k->k),
226 ret = bch2_btree_iter_traverse(iter);
230 split_iter = bch2_trans_copy_iter(&trans, iter);
231 ret = PTR_ERR_OR_ZERO(split_iter);
235 split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
236 ret = PTR_ERR_OR_ZERO(split);
240 if (!split_compressed &&
241 bch2_extent_is_compressed(bkey_i_to_s_c(k)) &&
242 !bch2_extent_is_atomic(k, split_iter)) {
243 ret = bch2_disk_reservation_add(c, &disk_res,
245 bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(k)),
246 BCH_DISK_RESERVATION_NOFAIL);
249 split_compressed = true;
253 bch2_cut_front(split_iter->pos, split);
254 bch2_extent_trim_atomic(split, split_iter);
256 bch2_trans_update(&trans, BTREE_INSERT_ENTRY(split_iter, split));
257 bch2_btree_iter_set_pos(iter, split->k.p);
258 } while (bkey_cmp(iter->pos, k->k.p) < 0);
260 if (split_compressed) {
261 ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
263 BCH_BUCKET_MARK_OVERWRITE) ?:
264 bch2_trans_commit(&trans, &disk_res, NULL,
267 BTREE_INSERT_LAZY_RW|
268 BTREE_INSERT_NOMARK_OVERWRITES|
269 BTREE_INSERT_NO_CLEAR_REPLICAS);
271 ret = bch2_trans_commit(&trans, &disk_res, NULL,
274 BTREE_INSERT_LAZY_RW|
275 BTREE_INSERT_JOURNAL_REPLAY|
276 BTREE_INSERT_NOMARK);
285 bch2_disk_reservation_put(c, &disk_res);
287 return bch2_trans_exit(&trans) ?: ret;
290 static int bch2_journal_replay(struct bch_fs *c,
291 struct journal_keys keys)
293 struct journal *j = &c->journal;
294 struct journal_key *i;
297 sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_seq_cmp, NULL);
299 for_each_journal_key(keys, i) {
300 replay_now_at(j, keys.journal_seq_base + i->journal_seq);
302 switch (i->btree_id) {
304 ret = bch2_alloc_replay_key(c, i->k);
306 case BTREE_ID_EXTENTS:
307 ret = bch2_extent_replay_key(c, i->k);
310 ret = bch2_btree_insert(c, i->btree_id, i->k,
313 BTREE_INSERT_LAZY_RW|
314 BTREE_INSERT_JOURNAL_REPLAY|
315 BTREE_INSERT_NOMARK);
320 bch_err(c, "journal replay: error %d while replaying key",
328 replay_now_at(j, j->replay_journal_seq_end);
329 j->replay_journal_seq = 0;
331 bch2_journal_set_replay_done(j);
332 bch2_journal_flush_all_pins(j);
333 return bch2_journal_error(j);
336 static bool journal_empty(struct list_head *journal)
338 return list_empty(journal) ||
339 journal_entry_empty(&list_last_entry(journal,
340 struct journal_replay, list)->j);
344 verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
345 struct list_head *journal)
347 struct journal_replay *i =
348 list_last_entry(journal, struct journal_replay, list);
349 u64 start_seq = le64_to_cpu(i->j.last_seq);
350 u64 end_seq = le64_to_cpu(i->j.seq);
354 list_for_each_entry(i, journal, list) {
355 fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
356 "journal entries %llu-%llu missing! (replaying %llu-%llu)",
357 seq, le64_to_cpu(i->j.seq) - 1,
360 seq = le64_to_cpu(i->j.seq);
362 fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
363 "found blacklisted journal entry %llu", seq);
367 } while (bch2_journal_seq_is_blacklisted(c, seq, false));
373 /* journal replay early: */
375 static int journal_replay_entry_early(struct bch_fs *c,
376 struct jset_entry *entry)
380 switch (entry->type) {
381 case BCH_JSET_ENTRY_btree_root: {
382 struct btree_root *r;
384 if (entry->btree_id >= BTREE_ID_NR) {
385 bch_err(c, "filesystem has unknown btree type %u",
390 r = &c->btree_roots[entry->btree_id];
393 r->level = entry->level;
394 bkey_copy(&r->key, &entry->start[0]);
402 case BCH_JSET_ENTRY_usage: {
403 struct jset_entry_usage *u =
404 container_of(entry, struct jset_entry_usage, entry);
406 switch (entry->btree_id) {
407 case FS_USAGE_RESERVED:
408 if (entry->level < BCH_REPLICAS_MAX)
409 c->usage_base->persistent_reserved[entry->level] =
412 case FS_USAGE_INODES:
413 c->usage_base->nr_inodes = le64_to_cpu(u->v);
415 case FS_USAGE_KEY_VERSION:
416 atomic64_set(&c->key_version,
423 case BCH_JSET_ENTRY_data_usage: {
424 struct jset_entry_data_usage *u =
425 container_of(entry, struct jset_entry_data_usage, entry);
426 ret = bch2_replicas_set_usage(c, &u->r,
430 case BCH_JSET_ENTRY_blacklist: {
431 struct jset_entry_blacklist *bl_entry =
432 container_of(entry, struct jset_entry_blacklist, entry);
434 ret = bch2_journal_seq_blacklist_add(c,
435 le64_to_cpu(bl_entry->seq),
436 le64_to_cpu(bl_entry->seq) + 1);
439 case BCH_JSET_ENTRY_blacklist_v2: {
440 struct jset_entry_blacklist_v2 *bl_entry =
441 container_of(entry, struct jset_entry_blacklist_v2, entry);
443 ret = bch2_journal_seq_blacklist_add(c,
444 le64_to_cpu(bl_entry->start),
445 le64_to_cpu(bl_entry->end) + 1);
453 static int journal_replay_early(struct bch_fs *c,
454 struct bch_sb_field_clean *clean,
455 struct list_head *journal)
457 struct jset_entry *entry;
461 c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
462 c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
464 for (entry = clean->start;
465 entry != vstruct_end(&clean->field);
466 entry = vstruct_next(entry)) {
467 ret = journal_replay_entry_early(c, entry);
472 struct journal_replay *i =
473 list_last_entry(journal, struct journal_replay, list);
475 c->bucket_clock[READ].hand = le16_to_cpu(i->j.read_clock);
476 c->bucket_clock[WRITE].hand = le16_to_cpu(i->j.write_clock);
478 list_for_each_entry(i, journal, list)
479 vstruct_for_each(&i->j, entry) {
480 ret = journal_replay_entry_early(c, entry);
486 bch2_fs_usage_initialize(c);
491 /* sb clean section: */
493 static struct bkey_i *btree_root_find(struct bch_fs *c,
494 struct bch_sb_field_clean *clean,
496 enum btree_id id, unsigned *level)
499 struct jset_entry *entry, *start, *end;
502 start = clean->start;
503 end = vstruct_end(&clean->field);
506 end = vstruct_last(j);
509 for (entry = start; entry < end; entry = vstruct_next(entry))
510 if (entry->type == BCH_JSET_ENTRY_btree_root &&
511 entry->btree_id == id)
517 return ERR_PTR(-EINVAL);
520 *level = entry->level;
524 static int verify_superblock_clean(struct bch_fs *c,
525 struct bch_sb_field_clean **cleanp,
529 struct bch_sb_field_clean *clean = *cleanp;
532 if (!c->sb.clean || !j)
535 if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
536 "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
537 le64_to_cpu(clean->journal_seq),
538 le64_to_cpu(j->seq))) {
544 mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
545 "superblock read clock doesn't match journal after clean shutdown");
546 mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
547 "superblock read clock doesn't match journal after clean shutdown");
549 for (i = 0; i < BTREE_ID_NR; i++) {
550 struct bkey_i *k1, *k2;
551 unsigned l1 = 0, l2 = 0;
553 k1 = btree_root_find(c, clean, NULL, i, &l1);
554 k2 = btree_root_find(c, NULL, j, i, &l2);
559 mustfix_fsck_err_on(!k1 || !k2 ||
562 k1->k.u64s != k2->k.u64s ||
563 memcmp(k1, k2, bkey_bytes(k1)) ||
565 "superblock btree root doesn't match journal after clean shutdown");
571 static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
573 struct bch_sb_field_clean *clean, *sb_clean;
576 mutex_lock(&c->sb_lock);
577 sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
579 if (fsck_err_on(!sb_clean, c,
580 "superblock marked clean but clean section not present")) {
581 SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
583 mutex_unlock(&c->sb_lock);
587 clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
590 mutex_unlock(&c->sb_lock);
591 return ERR_PTR(-ENOMEM);
594 if (le16_to_cpu(c->disk_sb.sb->version) <
595 bcachefs_metadata_version_bkey_renumber)
596 bch2_sb_clean_renumber(clean, READ);
598 mutex_unlock(&c->sb_lock);
602 mutex_unlock(&c->sb_lock);
606 static int read_btree_roots(struct bch_fs *c)
611 for (i = 0; i < BTREE_ID_NR; i++) {
612 struct btree_root *r = &c->btree_roots[i];
617 if (i == BTREE_ID_ALLOC &&
618 test_reconstruct_alloc(c)) {
619 c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
625 __fsck_err(c, i == BTREE_ID_ALLOC
626 ? FSCK_CAN_IGNORE : 0,
627 "invalid btree root %s",
629 if (i == BTREE_ID_ALLOC)
630 c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
633 ret = bch2_btree_root_read(c, i, &r->key, r->level);
635 __fsck_err(c, i == BTREE_ID_ALLOC
636 ? FSCK_CAN_IGNORE : 0,
637 "error reading btree root %s",
639 if (i == BTREE_ID_ALLOC)
640 c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
644 for (i = 0; i < BTREE_ID_NR; i++)
645 if (!c->btree_roots[i].b)
646 bch2_btree_root_alloc(c, i);
651 int bch2_fs_recovery(struct bch_fs *c)
653 const char *err = "cannot allocate memory";
654 struct bch_sb_field_clean *clean = NULL;
656 LIST_HEAD(journal_entries);
657 struct journal_keys journal_keys = { NULL };
658 bool wrote = false, write_sb = false;
662 clean = read_superblock_clean(c);
663 ret = PTR_ERR_OR_ZERO(clean);
668 bch_info(c, "recovering from clean shutdown, journal seq %llu",
669 le64_to_cpu(clean->journal_seq));
671 if (!c->replicas.entries) {
672 bch_info(c, "building replicas info");
673 set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
676 if (!c->sb.clean || c->opts.fsck) {
679 ret = bch2_journal_read(c, &journal_entries);
683 if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
684 "filesystem marked clean but journal not empty")) {
685 c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
686 SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
690 if (!c->sb.clean && list_empty(&journal_entries)) {
691 bch_err(c, "no journal entries found");
692 ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
696 journal_keys = journal_keys_sort(&journal_entries);
697 if (!journal_keys.d) {
702 j = &list_last_entry(&journal_entries,
703 struct journal_replay, list)->j;
705 ret = verify_superblock_clean(c, &clean, j);
709 journal_seq = le64_to_cpu(j->seq) + 1;
711 journal_seq = le64_to_cpu(clean->journal_seq) + 1;
714 ret = journal_replay_early(c, clean, &journal_entries);
719 ret = bch2_journal_seq_blacklist_add(c,
723 bch_err(c, "error creating new journal seq blacklist entry");
730 ret = bch2_blacklist_table_initialize(c);
732 if (!list_empty(&journal_entries)) {
733 ret = verify_journal_entries_not_blacklisted_or_missing(c,
739 ret = bch2_fs_journal_start(&c->journal, journal_seq,
744 ret = read_btree_roots(c);
748 bch_verbose(c, "starting alloc read");
749 err = "error reading allocation information";
750 ret = bch2_alloc_read(c, &journal_keys);
753 bch_verbose(c, "alloc read done");
755 bch_verbose(c, "starting stripes_read");
756 err = "error reading stripes";
757 ret = bch2_stripes_read(c, &journal_keys);
760 bch_verbose(c, "stripes_read done");
762 set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
764 if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
765 !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
767 * interior btree node updates aren't consistent with the
768 * journal; after an unclean shutdown we have to walk all
769 * pointers to metadata:
771 bch_info(c, "starting metadata mark and sweep");
772 err = "error in mark and sweep";
773 ret = bch2_gc(c, NULL, true, true);
776 bch_verbose(c, "mark and sweep done");
780 !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
781 test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
782 bch_info(c, "starting mark and sweep");
783 err = "error in mark and sweep";
784 ret = bch2_gc(c, &journal_keys, true, false);
787 bch_verbose(c, "mark and sweep done");
790 clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
791 set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
794 * Skip past versions that might have possibly been used (as nonces),
795 * but hadn't had their pointers written:
797 if (c->sb.encryption_type && !c->sb.clean)
798 atomic64_add(1 << 16, &c->key_version);
800 if (c->opts.norecovery)
803 bch_verbose(c, "starting journal replay");
804 err = "journal replay failed";
805 ret = bch2_journal_replay(c, journal_keys);
808 bch_verbose(c, "journal replay done");
810 if (!c->opts.nochanges) {
812 * note that even when filesystem was clean there might be work
813 * to do here, if we ran gc (because of fsck) which recalculated
816 bch_verbose(c, "writing allocation info");
817 err = "error writing out alloc info";
818 ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
819 bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
821 bch_err(c, "error writing alloc info");
824 bch_verbose(c, "alloc write done");
828 if (!(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
829 bch_info(c, "checking inode link counts");
830 err = "error in recovery";
831 ret = bch2_fsck_inode_nlink(c);
834 bch_verbose(c, "check inodes done");
837 bch_verbose(c, "checking for deleted inodes");
838 err = "error in recovery";
839 ret = bch2_fsck_walk_inodes_only(c);
842 bch_verbose(c, "check inodes done");
847 bch_info(c, "starting fsck");
848 err = "error in fsck";
849 ret = bch2_fsck_full(c);
852 bch_verbose(c, "fsck done");
855 if (enabled_qtypes(c)) {
856 bch_verbose(c, "reading quotas");
857 ret = bch2_fs_quota_read(c);
860 bch_verbose(c, "quotas done");
863 mutex_lock(&c->sb_lock);
864 if (c->opts.version_upgrade) {
865 if (c->sb.version < bcachefs_metadata_version_new_versioning)
866 c->disk_sb.sb->version_min =
867 le16_to_cpu(bcachefs_metadata_version_min);
868 c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
872 if (!test_bit(BCH_FS_ERROR, &c->flags)) {
873 c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
878 !test_bit(BCH_FS_ERROR, &c->flags)) {
879 c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
880 SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
886 mutex_unlock(&c->sb_lock);
888 if (c->journal_seq_blacklist_table &&
889 c->journal_seq_blacklist_table->nr > 128)
890 queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
895 bch2_flush_fsck_errs(c);
896 journal_keys_free(&journal_keys);
897 journal_entries_free(&journal_entries);
900 bch_err(c, "Error in recovery: %s (%i)", err, ret);
902 bch_verbose(c, "ret %i", ret);
906 int bch2_fs_initialize(struct bch_fs *c)
908 struct bch_inode_unpacked root_inode, lostfound_inode;
909 struct bkey_inode_buf packed_inode;
910 struct bch_hash_info root_hash_info;
911 struct qstr lostfound = QSTR("lost+found");
912 const char *err = "cannot allocate memory";
918 bch_notice(c, "initializing new filesystem");
920 mutex_lock(&c->sb_lock);
921 for_each_online_member(ca, c, i)
922 bch2_mark_dev_superblock(c, ca, 0);
923 mutex_unlock(&c->sb_lock);
925 set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
926 set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
928 for (i = 0; i < BTREE_ID_NR; i++)
929 bch2_btree_root_alloc(c, i);
931 err = "unable to allocate journal buckets";
932 for_each_online_member(ca, c, i) {
933 ret = bch2_dev_journal_alloc(ca);
935 percpu_ref_put(&ca->io_ref);
941 * journal_res_get() will crash if called before this has
942 * set up the journal.pin FIFO and journal.cur pointer:
944 bch2_fs_journal_start(&c->journal, 1, &journal);
945 bch2_journal_set_replay_done(&c->journal);
947 err = "error going read write";
948 ret = __bch2_fs_read_write(c, true);
952 bch2_inode_init(c, &root_inode, 0, 0,
953 S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
954 root_inode.bi_inum = BCACHEFS_ROOT_INO;
955 root_inode.bi_nlink++; /* lost+found */
956 bch2_inode_pack(&packed_inode, &root_inode);
958 err = "error creating root directory";
959 ret = bch2_btree_insert(c, BTREE_ID_INODES,
960 &packed_inode.inode.k_i,
965 bch2_inode_init(c, &lostfound_inode, 0, 0,
966 S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0,
968 lostfound_inode.bi_inum = BCACHEFS_ROOT_INO + 1;
969 bch2_inode_pack(&packed_inode, &lostfound_inode);
971 err = "error creating lost+found";
972 ret = bch2_btree_insert(c, BTREE_ID_INODES,
973 &packed_inode.inode.k_i,
978 root_hash_info = bch2_hash_info_init(c, &root_inode);
980 ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR,
981 &lostfound, lostfound_inode.bi_inum, NULL,
982 BTREE_INSERT_NOFAIL);
986 if (enabled_qtypes(c)) {
987 ret = bch2_fs_quota_read(c);
992 err = "error writing first journal entry";
993 ret = bch2_journal_meta(&c->journal);
997 mutex_lock(&c->sb_lock);
998 c->disk_sb.sb->version = c->disk_sb.sb->version_min =
999 le16_to_cpu(bcachefs_metadata_version_current);
1000 c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
1002 SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
1003 SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
1005 bch2_write_super(c);
1006 mutex_unlock(&c->sb_lock);
1010 pr_err("Error initializing new filesystem: %s (%i)", err, ret);