From: Kent Overstreet Date: Sun, 9 Jul 2023 19:16:50 +0000 (-0400) Subject: Update bcachefs sources to da7d42a9a2 bcachefs: Add new assertions for shutdown path X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=5d507f795b0b679a67e972a48cbd0854c4ad0f02;p=bcachefs-tools-debian Update bcachefs sources to da7d42a9a2 bcachefs: Add new assertions for shutdown path Signed-off-by: Kent Overstreet --- diff --git a/.bcachefs_revision b/.bcachefs_revision index e6dc62b..9f546cc 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -25de2b00dcca9bd801d37efd5e08c15dbf151825 +da7d42a9a2f78ca2a8ccffb0f15bf61bc7a7bc49 diff --git a/cmd_migrate.c b/cmd_migrate.c index 8cf7c98..3958ba6 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -229,11 +229,12 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst, die("error getting xattr val: %m"); const struct xattr_handler *h = xattr_resolve_name(&attr); + struct bch_inode_unpacked inode_u; int ret = bch2_trans_do(c, NULL, NULL, 0, bch2_xattr_set(&trans, (subvol_inum) { 1, dst->bi_inum }, - &hash_info, attr, + &inode_u, &hash_info, attr, val, val_size, h->flags, 0)); if (ret < 0) die("error creating xattr: %s", bch2_err_str(ret)); diff --git a/include/linux/slab.h b/include/linux/slab.h index 144e333..a36f6f4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -20,7 +20,7 @@ #define ARCH_KMALLOC_MINALIGN 16 #define KMALLOC_MAX_SIZE SIZE_MAX -static inline void *_kmalloc(size_t size, gfp_t flags) +static inline void *kmalloc_noprof(size_t size, gfp_t flags) { unsigned i; void *p; @@ -46,7 +46,7 @@ static inline void *_kmalloc(size_t size, gfp_t flags) return p; } -#define kmalloc _kmalloc +#define kmalloc kmalloc_noprof static inline void *krealloc(void *old, size_t size, gfp_t flags) { @@ -97,7 +97,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t #define kvzalloc(size, flags) kzalloc(size, flags) #define kvfree(p) kfree(p) -static inline struct page *_alloc_pages(gfp_t flags, unsigned int order) +static inline struct page *alloc_pages_noprof(gfp_t flags, unsigned int order) { size_t size = PAGE_SIZE << order; unsigned i; @@ -117,12 +117,14 @@ static inline struct page *_alloc_pages(gfp_t flags, unsigned int order) return p; } -#define alloc_pages _alloc_pages +#define alloc_pages alloc_pages_noprof #define alloc_page(gfp) alloc_pages(gfp, 0) #define _get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order)) #define __get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order)) +#define get_free_pages_noprof(gfp, order) \ + ((unsigned long) alloc_pages(gfp, order)) #define __get_free_page(gfp) __get_free_pages(gfp, 0) #define __free_pages(page, order) \ diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index c59629b..8d8481f 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -286,7 +286,7 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, if (rw == WRITE && !(flags & BKEY_INVALID_JOURNAL) && - test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_btree_backpointers) { unsigned i, bp_len = 0; for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) @@ -336,7 +336,7 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, } if (!a.v->io_time[READ] && - test_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags)) { + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs) { prt_printf(err, "cached bucket with read_time == 0"); return -BCH_ERR_invalid_bkey; } @@ -551,40 +551,6 @@ err: return ERR_PTR(ret); } -int bch2_alloc_read(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bch_alloc_v4 a; - struct bch_dev *ca; - int ret; - - bch2_trans_init(&trans, c, 0, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - /* - * Not a fsck error because this is checked/repaired by - * bch2_check_alloc_key() which runs later: - */ - if (!bch2_dev_bucket_exists(c, k.k->p)) - continue; - - ca = bch_dev_bkey_exists(c, k.k->p.inode); - - *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; - } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - - if (ret) - bch_err_fn(c, ret); - - return ret; -} - static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset) { *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK; @@ -692,45 +658,67 @@ int bch2_bucket_gens_init(struct bch_fs *c) return ret; } -int bch2_bucket_gens_read(struct bch_fs *c) +int bch2_alloc_read(struct bch_fs *c) { struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - const struct bch_bucket_gens *g; struct bch_dev *ca; - u64 b; int ret; + down_read(&c->gc_lock); bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; - u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; + if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) { + const struct bch_bucket_gens *g; + u64 b; - if (k.k->type != KEY_TYPE_bucket_gens) - continue; + for_each_btree_key(&trans, iter, BTREE_ID_bucket_gens, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; + u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; - g = bkey_s_c_to_bucket_gens(k).v; + if (k.k->type != KEY_TYPE_bucket_gens) + continue; - /* - * Not a fsck error because this is checked/repaired by - * bch2_check_alloc_key() which runs later: - */ - if (!bch2_dev_exists2(c, k.k->p.inode)) - continue; + g = bkey_s_c_to_bucket_gens(k).v; + + /* + * Not a fsck error because this is checked/repaired by + * bch2_check_alloc_key() which runs later: + */ + if (!bch2_dev_exists2(c, k.k->p.inode)) + continue; - ca = bch_dev_bkey_exists(c, k.k->p.inode); + ca = bch_dev_bkey_exists(c, k.k->p.inode); + + for (b = max_t(u64, ca->mi.first_bucket, start); + b < min_t(u64, ca->mi.nbuckets, end); + b++) + *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; + } + bch2_trans_iter_exit(&trans, &iter); + } else { + struct bch_alloc_v4 a; - for (b = max_t(u64, ca->mi.first_bucket, start); - b < min_t(u64, ca->mi.nbuckets, end); - b++) - *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; + for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + /* + * Not a fsck error because this is checked/repaired by + * bch2_check_alloc_key() which runs later: + */ + if (!bch2_dev_bucket_exists(c, k.k->p)) + continue; + + ca = bch_dev_bkey_exists(c, k.k->p.inode); + + *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; + } + bch2_trans_iter_exit(&trans, &iter); } - bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); + up_read(&c->gc_lock); if (ret) bch_err_fn(c, ret); @@ -789,7 +777,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, return ret; if (ca->mi.freespace_initialized && - test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) && + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info && bch2_trans_inconsistent_on(old.k->type != old_type, trans, "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n" " for %s", @@ -1232,8 +1220,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, unsigned i, gens_offset, gens_end_offset; int ret; - if (c->sb.version < bcachefs_metadata_version_bucket_gens && - !c->opts.version_upgrade) + if (c->sb.version < bcachefs_metadata_version_bucket_gens) return 0; bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); @@ -1676,7 +1663,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, } if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { - if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { bch2_trans_inconsistent(trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" "%s", @@ -1689,7 +1676,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, } if (a->v.data_type != BCH_DATA_need_discard) { - if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { bch2_trans_inconsistent(trans, "bucket incorrectly set in need_discard btree\n" "%s", @@ -1857,7 +1844,7 @@ err: bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); bch_err(c, "%s", buf.buf); - if (test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) { bch2_inconsistent_error(c); ret = -EINVAL; } diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index d1bf45a..c0914fe 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -212,7 +212,6 @@ static inline bool bkey_is_alloc(const struct bkey *k) } int bch2_alloc_read(struct bch_fs *); -int bch2_bucket_gens_read(struct bch_fs *); int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 6650c00..fcb7311 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -192,6 +192,7 @@ static inline unsigned open_buckets_reserved(enum bch_watermark watermark) { switch (watermark) { case BCH_WATERMARK_reclaim: + return 0; case BCH_WATERMARK_btree: case BCH_WATERMARK_btree_copygc: return OPEN_BUCKETS_COUNT / 4; @@ -323,7 +324,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc a = bch2_alloc_to_v4(k, &a_convert); if (a->data_type != BCH_DATA_free) { - if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { ob = NULL; goto err; } @@ -339,7 +340,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc } if (genbits != (alloc_freespace_genbits(*a) >> 56) && - test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" " freespace key ", genbits, alloc_freespace_genbits(*a) >> 56); @@ -349,10 +350,9 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; - } - if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) { struct bch_backpointer bp; struct bpos bp_pos = POS_MIN; @@ -555,7 +555,7 @@ alloc: if (s.skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); - if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + if (!ob && freespace && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { freespace = false; goto alloc; } @@ -1193,6 +1193,7 @@ static bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr) bch2_trans_mutex_lock_norelock(trans, &wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) open_bucket_free_unused(c, ob); + wp->ptrs.nr = 0; mutex_unlock(&wp->lock); return true; } diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index f5ce9c9..d412bae 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -104,7 +104,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, orig_k); bch_err(c, "%s", buf.buf); - } else if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { prt_printf(&buf, "backpointer not found when deleting"); prt_newline(&buf); printbuf_indent_add(&buf, 2); @@ -125,7 +125,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, printbuf_exit(&buf); - if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { bch2_inconsistent_error(c); return -EIO; } else { @@ -258,7 +258,7 @@ static void backpointer_not_found(struct btree_trans *trans, bch2_backpointer_to_text(&buf, &bp); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); - if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) + if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) bch_err_ratelimited(c, "%s", buf.buf); else bch2_trans_inconsistent(trans, "%s", buf.buf); @@ -651,9 +651,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1; - if (!bch2_btree_id_root(c, btree_id)->alive) - continue; - bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, depth, BTREE_ITER_ALL_LEVELS| diff --git a/libbcachefs/backpointers.h b/libbcachefs/backpointers.h index 778b677..87e31aa 100644 --- a/libbcachefs/backpointers.h +++ b/libbcachefs/backpointers.h @@ -84,7 +84,7 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, set_bkey_val_u64s(&bp_k->k, 0); } - return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i, !insert); + return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i); } static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level, diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index a8488d4..cfd4a7b 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -564,15 +564,11 @@ enum { /* fsck passes: */ BCH_FS_TOPOLOGY_REPAIR_DONE, - BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ - BCH_FS_CHECK_ALLOC_DONE, - BCH_FS_CHECK_LRUS_DONE, - BCH_FS_CHECK_BACKPOINTERS_DONE, - BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, BCH_FS_FSCK_DONE, BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ BCH_FS_NEED_ANOTHER_GC, + BCH_FS_VERSION_UPGRADE, BCH_FS_HAVE_DELETED_SNAPSHOTS, /* errors: */ @@ -661,6 +657,48 @@ enum bch_write_ref { BCH_WRITE_REF_NR, }; +#define PASS_SILENT BIT(0) +#define PASS_FSCK BIT(1) +#define PASS_UNCLEAN BIT(2) +#define PASS_ALWAYS BIT(3) +#define PASS_UPGRADE(v) ((v) << 4) + +#define BCH_RECOVERY_PASSES() \ + x(alloc_read, PASS_ALWAYS) \ + x(stripes_read, PASS_ALWAYS) \ + x(initialize_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \ + x(snapshots_read, PASS_ALWAYS) \ + x(check_allocations, PASS_FSCK) \ + x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ + x(journal_replay, PASS_ALWAYS) \ + x(check_alloc_info, PASS_FSCK) \ + x(check_lrus, PASS_FSCK) \ + x(check_btree_backpointers, PASS_FSCK) \ + x(check_backpointers_to_extents,PASS_FSCK) \ + x(check_extents_to_backpointers,PASS_FSCK) \ + x(check_alloc_to_lru_refs, PASS_FSCK) \ + x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ + x(bucket_gens_init, PASS_UPGRADE(bcachefs_metadata_version_bucket_gens)) \ + x(fs_upgrade_for_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \ + x(check_snapshot_trees, PASS_FSCK) \ + x(check_snapshots, PASS_FSCK) \ + x(check_subvols, PASS_FSCK) \ + x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN|PASS_SILENT) \ + x(check_inodes, PASS_FSCK|PASS_UNCLEAN) \ + x(check_extents, PASS_FSCK) \ + x(check_dirents, PASS_FSCK) \ + x(check_xattrs, PASS_FSCK) \ + x(check_root, PASS_FSCK) \ + x(check_directory_structure, PASS_FSCK) \ + x(check_nlinks, PASS_FSCK) \ + x(fix_reflink_p, PASS_UPGRADE(bcachefs_metadata_version_reflink_p_fix)) \ + +enum bch_recovery_pass { +#define x(n, when) BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x +}; + struct bch_fs { struct closure cl; @@ -712,6 +750,7 @@ struct bch_fs { u16 version; u16 version_min; + u16 version_upgrade_complete; u8 nr_devices; u8 clean; @@ -991,6 +1030,11 @@ struct bch_fs { /* QUOTAS */ struct bch_memquota_type quotas[QTYP_NR]; + /* RECOVERY */ + u64 journal_replay_seq_start; + u64 journal_replay_seq_end; + enum bch_recovery_pass curr_recovery_pass; + /* DEBUG JUNK */ struct dentry *fs_debug_dir; struct dentry *btree_debug_dir; @@ -1134,6 +1178,12 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev) return dev < c->sb.nr_devices && c->devs[dev]; } +static inline bool bch2_version_upgrading_to(const struct bch_fs *c, unsigned new_version) +{ + return c->sb.version_upgrade_complete < new_version && + c->sb.version >= new_version; +} + #define BKEY_PADDED_ONSTACK(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 49b86bf..8a0f90a 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1747,6 +1747,12 @@ LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33); LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54); +LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56); + +/* flags[4] 56-64 unused: */ + +LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, + struct bch_sb, flags[5], 0, 16); /* * Features: @@ -1814,6 +1820,17 @@ enum bch_sb_compat { /* options: */ +#define BCH_VERSION_UPGRADE_OPTS() \ + x(compatible, 0) \ + x(incompatible, 1) \ + x(none, 2) + +enum bch_version_upgrade_opts { +#define x(t, n) BCH_VERSION_UPGRADE_##t = n, + BCH_VERSION_UPGRADE_OPTS() +#undef x +}; + #define BCH_REPLICAS_MAX 4U #define BCH_BKEY_PTRS_MAX 16U diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index 36a08e3..0f3dc15 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -122,6 +122,16 @@ enum btree_update_flags { #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) #define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) +#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \ + ((1U << KEY_TYPE_alloc)| \ + (1U << KEY_TYPE_alloc_v2)| \ + (1U << KEY_TYPE_alloc_v3)| \ + (1U << KEY_TYPE_alloc_v4)| \ + (1U << KEY_TYPE_stripe)| \ + (1U << KEY_TYPE_inode)| \ + (1U << KEY_TYPE_inode_v2)| \ + (1U << KEY_TYPE_snapshot)) + static inline int bch2_trans_mark_key(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 191d6c1..13c88d9 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -505,21 +505,17 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) unsigned i; int ret = 0; - pr_verbose_init(c->opts, ""); - ret = rhashtable_init(&bc->table, &bch_btree_cache_params); if (ret) - goto out; + goto err; bc->table_init_done = true; bch2_recalc_btree_reserve(c); for (i = 0; i < bc->reserve; i++) - if (!__bch2_btree_node_mem_alloc(c)) { - ret = -BCH_ERR_ENOMEM_fs_btree_cache_init; - goto out; - } + if (!__bch2_btree_node_mem_alloc(c)) + goto err; list_splice_init(&bc->live, &bc->freeable); @@ -530,9 +526,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bc->shrink.to_text = bch2_btree_cache_shrinker_to_text; bc->shrink.seeks = 4; ret = register_shrinker(&bc->shrink, "%s/btree_cache", c->name); -out: - pr_verbose_init(c->opts, "ret %i", ret); - return ret; + if (ret) + goto err; + + return 0; +err: + return -BCH_ERR_ENOMEM_fs_btree_cache_init; } void bch2_fs_btree_cache_init_early(struct btree_cache *bc) @@ -826,7 +825,7 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { struct printbuf buf = PRINTBUF; - if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) + if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) return; prt_printf(&buf, diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 1fc3867..c47d5d8 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1810,7 +1810,7 @@ again: if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) && - !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags) && + c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations && c->opts.fix_errors != FSCK_OPT_NO)) { bch_info(c, "Starting topology repair pass"); ret = bch2_repair_topology(c); @@ -1825,7 +1825,7 @@ again: if (ret == -BCH_ERR_need_topology_repair && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) && - !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { + c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) { set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, true); ret = 0; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index a8197c5..1face38 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -105,8 +105,8 @@ static void btree_bounce_free(struct bch_fs *c, size_t size, vpfree(p, size); } -static void *btree_bounce_alloc(struct bch_fs *c, size_t size, - bool *used_mempool) +static void *btree_bounce_alloc_noprof(struct bch_fs *c, size_t size, + bool *used_mempool) { unsigned flags = memalloc_nofs_save(); void *p; @@ -114,7 +114,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, BUG_ON(size > btree_bytes(c)); *used_mempool = false; - p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); + p = vpmalloc_noprof(size, __GFP_NOWARN|GFP_NOWAIT); if (!p) { *used_mempool = true; p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); @@ -122,6 +122,8 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, memalloc_nofs_restore(flags); return p; } +#define btree_bounce_alloc(_c, _size, _used_mempool) \ + alloc_hooks(btree_bounce_alloc_noprof(_c, _size, _used_mempool)) static void sort_bkey_ptrs(const struct btree *bt, struct bkey_packed **ptrs, unsigned nr) diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index a71db7a..de38981 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -781,6 +781,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, ck->valid = true; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { + EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); set_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_inc(&c->btree_key_cache.nr_dirty); diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 93d2e54..f794c9d 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -112,7 +112,7 @@ int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, struct bkey_i *, enum btree_update_flags); int __must_check bch2_trans_update_buffered(struct btree_trans *, - enum btree_id, struct bkey_i *, bool); + enum btree_id, struct bkey_i *); void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index f7ffd68..6e12e8e 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -272,8 +272,10 @@ inline void bch2_btree_insert_key_leaf(struct btree_trans *trans, bch2_btree_add_journal_pin(c, b, journal_seq); - if (unlikely(!btree_node_dirty(b))) + if (unlikely(!btree_node_dirty(b))) { + EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); set_btree_node_dirty_acct(c, b); + } live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) bset_u64s(t) - old_u64s; @@ -419,7 +421,8 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (!btree_node_type_needs_gc(i->btree_id)) return 0; - if (old_ops->atomic_trigger == new_ops->atomic_trigger) { + if (old_ops->atomic_trigger == new_ops->atomic_trigger && + ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { ret = bch2_mark_key(trans, i->btree_id, i->level, old, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); @@ -461,7 +464,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ if (!i->insert_trigger_run && !i->overwrite_trigger_run && - old_ops->trans_trigger == new_ops->trans_trigger) { + old_ops->trans_trigger == new_ops->trans_trigger && + ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { i->overwrite_trigger_run = true; i->insert_trigger_run = true; return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k, @@ -1720,14 +1724,21 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, - struct bkey_i *k, - bool head) + struct bkey_i *k) { - int ret, pos; + struct btree_write_buffered_key *i; + int ret; EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); + trans_for_each_wb_update(trans, i) { + if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { + bkey_copy(&i->k, k); + return 0; + } + } + if (!trans->wb_updates || trans->nr_wb_updates == trans->wb_updates_size) { struct btree_write_buffered_key *u; @@ -1754,18 +1765,13 @@ int __must_check bch2_trans_update_buffered(struct btree_trans *trans, trans->wb_updates = u; } - if (head) { - memmove(&trans->wb_updates[1], - &trans->wb_updates[0], - sizeof(trans->wb_updates[0]) * trans->nr_wb_updates); - pos = 0; - } else { - pos = trans->nr_wb_updates; - } + trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) { + .btree = btree, + }; - trans->wb_updates[pos] = (struct btree_write_buffered_key) { .btree = btree, }; - bkey_copy(&trans->wb_updates[pos].k, k); + bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k); trans->nr_wb_updates++; + return 0; } @@ -1886,7 +1892,7 @@ int bch2_btree_delete_at_buffered(struct btree_trans *trans, bkey_init(&k->k); k->k.p = pos; - return bch2_trans_update_buffered(trans, btree, k, false); + return bch2_trans_update_buffered(trans, btree, k); } int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c index 3a3e36c..b502263 100644 --- a/libbcachefs/btree_write_buffer.c +++ b/libbcachefs/btree_write_buffer.c @@ -281,7 +281,6 @@ int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans) struct btree_write_buffer *wb = &c->btree_write_buffer; struct btree_write_buffered_key *i; union btree_write_buffer_state old, new; - unsigned offset = 0; int ret = 0; u64 v; @@ -289,8 +288,7 @@ int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans) EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); i->journal_seq = trans->journal_res.seq; - i->journal_offset = trans->journal_res.offset + offset; - offset++; + i->journal_offset = trans->journal_res.offset; } preempt_disable(); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 4aa0209..797ef5e 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -948,12 +948,14 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans, return 0; } -static int __mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +int bch2_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1029,14 +1031,6 @@ static int __mark_extent(struct btree_trans *trans, return 0; } -int bch2_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - return mem_trigger_run_insert_then_overwrite(__mark_extent, trans, btree_id, level, old, new, flags); -} - int bch2_mark_stripe(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_s_c new, @@ -1175,11 +1169,13 @@ int bch2_mark_inode(struct btree_trans *trans, return 0; } -static int __mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +int bch2_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { struct bch_fs *c = trans->c; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bch_fs_usage *fs_usage; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; @@ -1206,14 +1202,6 @@ static int __mark_reservation(struct btree_trans *trans, return 0; } -int bch2_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - return mem_trigger_run_insert_then_overwrite(__mark_reservation, trans, btree_id, level, old, new, flags); -} - static s64 __bch2_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 start, u64 end, @@ -1268,11 +1256,13 @@ fsck_err: return ret; } -static int __mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +int bch2_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { struct bch_fs *c = trans->c; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; @@ -1306,14 +1296,6 @@ static int __mark_reflink_p(struct btree_trans *trans, return ret; } -int bch2_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - return mem_trigger_run_insert_then_overwrite(__mark_reflink_p, trans, btree_id, level, old, new, flags); -} - void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -1515,11 +1497,15 @@ err: return ret; } -static int __trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +int bch2_trans_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_i *new, + unsigned flags) { struct bch_fs *c = trans->c; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE + ? old + : bkey_i_to_s_c(new); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1576,14 +1562,6 @@ static int __trans_mark_extent(struct btree_trans *trans, return ret; } -int bch2_trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) -{ - return trigger_run_insert_then_overwrite(__trans_mark_extent, trans, btree_id, level, old, new, flags); -} - static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, struct bkey_s_c_stripe s, unsigned idx, bool deleting) @@ -1758,10 +1736,15 @@ int bch2_trans_mark_inode(struct btree_trans *trans, return 0; } -static int __trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +int bch2_trans_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) { + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE + ? old + : bkey_i_to_s_c(new); unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; struct replicas_delta_list *d; @@ -1783,16 +1766,7 @@ static int __trans_mark_reservation(struct btree_trans *trans, return 0; } -int bch2_trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) -{ - return trigger_run_insert_then_overwrite(__trans_mark_reservation, trans, btree_id, level, old, new, flags); -} - -static int trans_mark_reflink_p_segment(struct btree_trans *trans, +static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 *idx, unsigned flags) { @@ -1859,36 +1833,33 @@ err: return ret; } -static int __trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx, end_idx; - int ret = 0; - - idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - end_idx = le64_to_cpu(p.v->idx) + p.k->size + - le32_to_cpu(p.v->back_pad); - - while (idx < end_idx && !ret) - ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); - return ret; -} - int bch2_trans_mark_reflink_p(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE + ? old + : bkey_i_to_s_c(new); + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + u64 idx, end_idx; + int ret = 0; + if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; + struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; v->front_pad = v->back_pad = 0; } - return trigger_run_insert_then_overwrite(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); + idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + end_idx = le64_to_cpu(p.v->idx) + p.k->size + + le32_to_cpu(p.v->back_pad); + + while (idx < end_idx && !ret) + ret = __bch2_trans_mark_reflink_p(trans, p, &idx, flags); + + return ret; } static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 400d105..f9d7dda 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -274,20 +274,6 @@ int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -#define mem_trigger_run_insert_then_overwrite(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ -({ \ - int ret = 0; \ - \ - if (_new.k->type) \ - ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \ - if (_old.k->type && !ret) \ - ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ - ret; \ -}) - -#define trigger_run_insert_then_overwrite(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ - mem_trigger_run_insert_then_overwrite(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags) - void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 843e138..a08997a 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -680,8 +680,6 @@ int bch2_fs_encryption_init(struct bch_fs *c) struct bch_key key; int ret = 0; - pr_verbose_init(c->opts, ""); - c->sha256 = crypto_alloc_shash("sha256", 0, 0); ret = PTR_ERR_OR_ZERO(c->sha256); if (ret) { @@ -707,6 +705,5 @@ int bch2_fs_encryption_init(struct bch_fs *c) goto out; out: memzero_explicit(&key, sizeof(key)); - pr_verbose_init(c->opts, "ret %i", ret); return ret; } diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 38a3475..48427a2 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -542,7 +542,7 @@ void bch2_fs_compress_exit(struct bch_fs *c) mempool_exit(&c->compression_bounce[READ]); } -static int _bch2_fs_compress_init(struct bch_fs *c, u64 features) +static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) { size_t decompress_workspace_size = 0; bool decompress_workspace_needed; @@ -612,17 +612,6 @@ static int _bch2_fs_compress_init(struct bch_fs *c, u64 features) return 0; } -static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) -{ - int ret; - - pr_verbose_init(c->opts, ""); - ret = _bch2_fs_compress_init(c, features); - pr_verbose_init(c->opts, "ret %i", ret); - - return ret; -} - int bch2_fs_compress_init(struct bch_fs *c) { u64 f = c->sb.features; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index d35a59e..efbb7cf 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -1024,7 +1024,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c, int ret; if (!bch2_dev_get_ioref(ca, WRITE)) { - s->err = -EROFS; + s->err = -BCH_ERR_erofs_no_writes; return; } @@ -1401,7 +1401,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, return ERR_PTR(ret); if (test_bit(BCH_FS_GOING_RO, &c->flags)) { - h = ERR_PTR(-EROFS); + h = ERR_PTR(-BCH_ERR_erofs_no_writes); goto found; } @@ -1774,7 +1774,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) } goto unlock; found: - h->s->err = -EROFS; + h->s->err = -BCH_ERR_erofs_no_writes; ec_stripe_set_pending(c, h); unlock: mutex_unlock(&h->lock); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 1e06d95..d5277ec 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -175,6 +175,9 @@ x(EROFS, erofs_no_writes) \ x(EROFS, erofs_journal_err) \ x(EROFS, erofs_sb_err) \ + x(EROFS, erofs_unfixed_errors) \ + x(EROFS, erofs_norecovery) \ + x(EROFS, erofs_nochanges) \ x(EROFS, insufficient_devices) \ x(0, operation_blocked) \ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 36e2886..a806005 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -3918,10 +3918,6 @@ void bch2_fs_fsio_exit(struct bch_fs *c) int bch2_fs_fsio_init(struct bch_fs *c) { - int ret = 0; - - pr_verbose_init(c->opts, ""); - if (bioset_init(&c->writepage_bioset, 4, offsetof(struct bch_writepage_io, op.wbio.bio), BIOSET_NEED_BVECS)) @@ -3941,8 +3937,7 @@ int bch2_fs_fsio_init(struct bch_fs *c) 1, offsetof(struct nocow_flush, bio), 0)) return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; - pr_verbose_init(c->opts, "ret %i", ret); - return ret; + return 0; } #endif /* NO_BCACHEFS_FS */ diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h index 2e63cb6..6170d21 100644 --- a/libbcachefs/fs.h +++ b/libbcachefs/fs.h @@ -196,6 +196,8 @@ int bch2_vfs_init(void); #else +#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) do {} while (0) + static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) {} static inline void bch2_vfs_exit(void) {} diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 98fde0b..ddc2782 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -350,7 +350,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, } /* - * The check_dirents pass has already run, dangling dirents + * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ return __lookup_inode(trans, inum, lostfound, &snapshot); @@ -1008,8 +1008,9 @@ fsck_err: } noinline_for_stack -static int check_inodes(struct bch_fs *c, bool full) +int bch2_check_inodes(struct bch_fs *c) { + bool full = c->opts.fsck; struct btree_trans trans; struct btree_iter iter; struct bch_inode_unpacked prev = { 0 }; @@ -1404,8 +1405,7 @@ fsck_err: * Walk extents: verify that extents have a corresponding S_ISREG inode, and * that i_size an i_sectors are consistent */ -noinline_for_stack -static int check_extents(struct bch_fs *c) +int bch2_check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct snapshots_seen s; @@ -1419,8 +1419,6 @@ static int check_extents(struct bch_fs *c) snapshots_seen_init(&s); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - bch_verbose(c, "checking extents"); - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, @@ -1772,8 +1770,7 @@ fsck_err: * Walk dirents: verify that they all have a corresponding S_ISDIR inode, * validate d_type */ -noinline_for_stack -static int check_dirents(struct bch_fs *c) +int bch2_check_dirents(struct bch_fs *c) { struct inode_walker dir = inode_walker_init(); struct inode_walker target = inode_walker_init(); @@ -1784,8 +1781,6 @@ static int check_dirents(struct bch_fs *c) struct bkey_s_c k; int ret = 0; - bch_verbose(c, "checking dirents"); - snapshots_seen_init(&s); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); @@ -1847,8 +1842,7 @@ fsck_err: /* * Walk xattrs: verify that they all have a corresponding inode */ -noinline_for_stack -static int check_xattrs(struct bch_fs *c) +int bch2_check_xattrs(struct bch_fs *c) { struct inode_walker inode = inode_walker_init(); struct bch_hash_info hash_info; @@ -1857,8 +1851,6 @@ static int check_xattrs(struct bch_fs *c) struct bkey_s_c k; int ret = 0; - bch_verbose(c, "checking xattrs"); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs, @@ -1932,13 +1924,10 @@ fsck_err: } /* Get root directory, create if it doesn't exist: */ -noinline_for_stack -static int check_root(struct bch_fs *c) +int bch2_check_root(struct bch_fs *c) { int ret; - bch_verbose(c, "checking root directory"); - ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, @@ -2089,11 +2078,10 @@ fsck_err: /* * Check for unreachable inodes, as well as loops in the directory structure: - * After check_dirents(), if an inode backpointer doesn't exist that means it's + * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's * unreachable: */ -noinline_for_stack -static int check_directory_structure(struct bch_fs *c) +int bch2_check_directory_structure(struct bch_fs *c) { struct btree_trans trans; struct btree_iter iter; @@ -2376,15 +2364,12 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, return 0; } -noinline_for_stack -static int check_nlinks(struct bch_fs *c) +int bch2_check_nlinks(struct bch_fs *c) { struct nlink_table links = { 0 }; u64 this_iter_range_start, next_iter_range_start = 0; int ret = 0; - bch_verbose(c, "checking inode nlinks"); - do { this_iter_range_start = next_iter_range_start; next_iter_range_start = U64_MAX; @@ -2442,8 +2427,7 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN); } -noinline_for_stack -static int fix_reflink_p(struct bch_fs *c) +int bch2_fix_reflink_p(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; @@ -2452,8 +2436,6 @@ static int fix_reflink_p(struct bch_fs *c) if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) return 0; - bch_verbose(c, "fixing reflink_p keys"); - ret = bch2_trans_run(c, for_each_btree_key_commit(&trans, iter, BTREE_ID_extents, POS_MIN, @@ -2466,40 +2448,3 @@ static int fix_reflink_p(struct bch_fs *c) bch_err_fn(c, ret); return ret; } - -/* - * Checks for inconsistencies that shouldn't happen, unless we have a bug. - * Doesn't fix them yet, mainly because they haven't yet been observed: - */ -int bch2_fsck_full(struct bch_fs *c) -{ - int ret; -again: - ret = bch2_fs_check_snapshot_trees(c); - bch2_fs_check_snapshots(c) ?: - bch2_fs_check_subvols(c) ?: - bch2_delete_dead_snapshots(c) ?: - check_inodes(c, true) ?: - check_extents(c) ?: - check_dirents(c) ?: - check_xattrs(c) ?: - check_root(c) ?: - check_directory_structure(c) ?: - check_nlinks(c) ?: - fix_reflink_p(c); - - if (bch2_err_matches(ret, BCH_ERR_need_snapshot_cleanup)) { - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - goto again; - } - - return ret; -} - -int bch2_fsck_walk_inodes_only(struct bch_fs *c) -{ - return bch2_fs_check_snapshots(c) ?: - bch2_fs_check_subvols(c) ?: - bch2_delete_dead_snapshots(c) ?: - check_inodes(c, false); -} diff --git a/libbcachefs/fsck.h b/libbcachefs/fsck.h index 264f270..90c87b5 100644 --- a/libbcachefs/fsck.h +++ b/libbcachefs/fsck.h @@ -2,7 +2,13 @@ #ifndef _BCACHEFS_FSCK_H #define _BCACHEFS_FSCK_H -int bch2_fsck_full(struct bch_fs *); -int bch2_fsck_walk_inodes_only(struct bch_fs *); +int bch2_check_inodes(struct bch_fs *); +int bch2_check_extents(struct bch_fs *); +int bch2_check_dirents(struct bch_fs *); +int bch2_check_xattrs(struct bch_fs *); +int bch2_check_root(struct bch_fs *); +int bch2_check_directory_structure(struct bch_fs *); +int bch2_check_nlinks(struct bch_fs *); +int bch2_fix_reflink_p(struct bch_fs *); #endif /* _BCACHEFS_FSCK_H */ diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 0e9c23b..33762e4 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1645,7 +1645,7 @@ err_bucket_stale: percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); /* We can retry this: */ - ret = BCH_ERR_transaction_restart; + ret = -BCH_ERR_transaction_restart; goto out; } diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index f33ab45..80a612c 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -494,7 +494,7 @@ unlock: } return ret == JOURNAL_ERR_insufficient_devices - ? -EROFS + ? -BCH_ERR_erofs_journal_err : -BCH_ERR_journal_res_get_blocked; } @@ -1220,12 +1220,8 @@ void bch2_fs_journal_exit(struct journal *j) int bch2_fs_journal_init(struct journal *j) { - struct bch_fs *c = container_of(j, struct bch_fs, journal); static struct lock_class_key res_key; unsigned i; - int ret = 0; - - pr_verbose_init(c->opts, ""); spin_lock_init(&j->lock); spin_lock_init(&j->err_lock); @@ -1242,24 +1238,18 @@ int bch2_fs_journal_init(struct journal *j) ((union journal_res_state) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); - if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) { - ret = -BCH_ERR_ENOMEM_journal_pin_fifo; - goto out; - } + if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) + return -BCH_ERR_ENOMEM_journal_pin_fifo; for (i = 0; i < ARRAY_SIZE(j->buf); i++) { j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN; j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL); - if (!j->buf[i].data) { - ret = -BCH_ERR_ENOMEM_journal_buf; - goto out; - } + if (!j->buf[i].data) + return -BCH_ERR_ENOMEM_journal_buf; } j->pin.front = j->pin.back = 1; -out: - pr_verbose_init(c->opts, "ret %i", ret); - return ret; + return 0; } /* debug: */ diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 72486f1..8de83e1 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -845,10 +845,8 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) * expects to find devices marked for journal data on unclean mount. */ ret = bch2_journal_meta(&c->journal); - if (ret) { - mutex_unlock(&c->replicas_gc_lock); - return ret; - } + if (ret) + goto err; seq = 0; spin_lock(&j->lock); @@ -867,7 +865,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) spin_lock(&j->lock); } spin_unlock(&j->lock); - +err: ret = bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index 2387e08..07d1929 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -62,8 +62,7 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, EBUG_ON(lru_pos_time(k->k.p) != time); EBUG_ON(k->k.p.offset != dev_bucket); - return bch2_trans_update_buffered(trans, BTREE_ID_lru, k, - key_type == KEY_TYPE_deleted); + return bch2_trans_update_buffered(trans, BTREE_ID_lru, k); } int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index a05c389..0c0c83f 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -16,6 +16,11 @@ const char * const bch2_error_actions[] = { NULL }; +const char * const bch2_version_upgrade_opts[] = { + BCH_VERSION_UPGRADE_OPTS() + NULL +}; + const char * const bch2_sb_features[] = { BCH_SB_FEATURES() NULL diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index e7cf7e9..e105a74 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -9,6 +9,7 @@ #include "bcachefs_format.h" extern const char * const bch2_error_actions[]; +extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; extern const char * const bch2_btree_ids[]; @@ -388,8 +389,8 @@ enum opt_type { NULL, "Reconstruct alloc btree") \ x(version_upgrade, u8, \ OPT_FS|OPT_MOUNT, \ - OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + OPT_STR(bch2_version_upgrade_opts), \ + BCH_SB_VERSION_UPGRADE, BCH_VERSION_UPGRADE_compatible, \ NULL, "Set superblock to latest version,\n" \ "allowing any new features to be used") \ x(buckets_nouse, u8, \ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 9b49a6b..3b9120b 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -624,11 +624,13 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq, r->journal_seq); } -static int bch2_journal_replay(struct bch_fs *c, u64 start_seq, u64 end_seq) +static int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; struct journal_key **keys_sorted, *k; struct journal *j = &c->journal; + u64 start_seq = c->journal_replay_seq_start; + u64 end_seq = c->journal_replay_seq_start; size_t i; int ret; @@ -1026,7 +1028,7 @@ fsck_err: return ret; } -static int bch2_fs_initialize_subvolumes(struct bch_fs *c) +static int bch2_initialize_subvolumes(struct bch_fs *c) { struct bkey_i_snapshot_tree root_tree; struct bkey_i_snapshot root_snapshot; @@ -1107,6 +1109,118 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) return ret; } +static void check_version_upgrade(struct bch_fs *c) +{ + unsigned version = c->sb.version_upgrade_complete ?: c->sb.version; + + if (version < bcachefs_metadata_required_upgrade_below || + (version < bcachefs_metadata_version_current && + c->opts.version_upgrade != BCH_VERSION_UPGRADE_none)) { + struct printbuf buf = PRINTBUF; + + if (version != c->sb.version) { + prt_str(&buf, "version upgrade to "); + bch2_version_to_text(&buf, c->sb.version); + prt_str(&buf, " incomplete:\n"); + } + + prt_str(&buf, "version "); + bch2_version_to_text(&buf, version); + prt_str(&buf, " prior to "); + bch2_version_to_text(&buf, bcachefs_metadata_required_upgrade_below); + prt_str(&buf, ", upgrade and fsck required"); + + bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + + c->opts.fsck = true; + c->opts.fix_errors = FSCK_OPT_YES; + set_bit(BCH_FS_VERSION_UPGRADE, &c->flags); + } +} + +static int bch2_check_allocations(struct bch_fs *c) +{ + return bch2_gc(c, true, c->opts.norecovery); +} + +static int bch2_set_may_go_rw(struct bch_fs *c) +{ + set_bit(BCH_FS_MAY_GO_RW, &c->flags); + return 0; +} + +struct recovery_pass_fn { + int (*fn)(struct bch_fs *); + const char *name; + unsigned when; +}; + +static struct recovery_pass_fn recovery_passes[] = { +#define x(_fn, _when) { .fn = bch2_##_fn, .name = #_fn, .when = _when }, + BCH_RECOVERY_PASSES() +#undef x +}; + +static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) +{ + struct recovery_pass_fn *p = recovery_passes + c->curr_recovery_pass; + + if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) + return false; + if ((p->when & PASS_FSCK) && c->opts.fsck) + return true; + if ((p->when & PASS_UNCLEAN) && !c->sb.clean) + return true; + if (p->when & PASS_ALWAYS) + return true; + if (p->when >= PASS_UPGRADE(0) && + bch2_version_upgrading_to(c, p->when >> 4)) + return true; + return false; +} + +static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) +{ + int ret; + + c->curr_recovery_pass = pass; + + if (should_run_recovery_pass(c, pass)) { + struct recovery_pass_fn *p = recovery_passes + pass; + + if (!(p->when & PASS_SILENT)) + printk(KERN_INFO bch2_log_msg(c, "%s..."), p->name); + ret = p->fn(c); + if (ret) + return ret; + if (!(p->when & PASS_SILENT)) + printk(KERN_CONT " done\n"); + } + + return 0; +} + +static int bch2_run_recovery_passes(struct bch_fs *c) +{ + int ret = 0; +again: + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_passes)) { + ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); + if (ret) + break; + c->curr_recovery_pass++; + } + + if (bch2_err_matches(ret, BCH_ERR_need_snapshot_cleanup)) { + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); + c->curr_recovery_pass = BCH_RECOVERY_PASS_delete_dead_snapshots; + goto again; + } + + return ret; +} + int bch2_fs_recovery(struct bch_fs *c) { struct bch_sb_field_clean *clean = NULL; @@ -1146,23 +1260,8 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (!c->opts.nochanges && - c->sb.version < bcachefs_metadata_required_upgrade_below) { - struct printbuf buf = PRINTBUF; - - prt_str(&buf, "version "); - bch2_version_to_text(&buf, c->sb.version); - prt_str(&buf, " prior to "); - bch2_version_to_text(&buf, bcachefs_metadata_required_upgrade_below); - prt_str(&buf, ", upgrade and fsck required"); - - bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); - - c->opts.version_upgrade = true; - c->opts.fsck = true; - c->opts.fix_errors = FSCK_OPT_YES; - } + if (!c->opts.nochanges) + check_version_upgrade(c); if (c->opts.fsck && c->opts.norecovery) { bch_err(c, "cannot select both norecovery and fsck"); @@ -1241,6 +1340,9 @@ use_clean: blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1; } + c->journal_replay_seq_start = last_seq; + c->journal_replay_seq_end = blacklist_seq - 1;; + if (c->opts.reconstruct_alloc) { c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); drop_alloc_keys(&c->journal_keys); @@ -1293,147 +1395,10 @@ use_clean: if (ret) goto err; - bch_verbose(c, "starting alloc read"); - down_read(&c->gc_lock); - ret = c->sb.version < bcachefs_metadata_version_bucket_gens - ? bch2_alloc_read(c) - : bch2_bucket_gens_read(c); - up_read(&c->gc_lock); - if (ret) - goto err; - bch_verbose(c, "alloc read done"); - - bch_verbose(c, "starting stripes_read"); - ret = bch2_stripes_read(c); - if (ret) - goto err; - bch_verbose(c, "stripes_read done"); - - if (c->sb.version < bcachefs_metadata_version_snapshot_2) { - ret = bch2_fs_initialize_subvolumes(c); - if (ret) - goto err; - } - - bch_verbose(c, "reading snapshots table"); - ret = bch2_fs_snapshots_start(c); - if (ret) - goto err; - bch_verbose(c, "reading snapshots done"); - - if (c->opts.fsck) { - bool metadata_only = c->opts.norecovery; - - bch_info(c, "checking allocations"); - ret = bch2_gc(c, true, metadata_only); - if (ret) - goto err; - bch_verbose(c, "done checking allocations"); - - set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - - set_bit(BCH_FS_MAY_GO_RW, &c->flags); - - bch_info(c, "starting journal replay, %zu keys", c->journal_keys.nr); - ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1); - if (ret) - goto err; - if (c->opts.verbose || !c->sb.clean) - bch_info(c, "journal replay done"); - - bch_info(c, "checking need_discard and freespace btrees"); - ret = bch2_check_alloc_info(c); - if (ret) - goto err; - bch_verbose(c, "done checking need_discard and freespace btrees"); - - set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); - - bch_info(c, "checking lrus"); - ret = bch2_check_lrus(c); - if (ret) - goto err; - bch_verbose(c, "done checking lrus"); - set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); - - bch_info(c, "checking backpointers to alloc keys"); - ret = bch2_check_btree_backpointers(c); - if (ret) - goto err; - bch_verbose(c, "done checking backpointers to alloc keys"); - - bch_info(c, "checking backpointers to extents"); - ret = bch2_check_backpointers_to_extents(c); - if (ret) - goto err; - bch_verbose(c, "done checking backpointers to extents"); - - bch_info(c, "checking extents to backpointers"); - ret = bch2_check_extents_to_backpointers(c); - if (ret) - goto err; - bch_verbose(c, "done checking extents to backpointers"); - set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); - - bch_info(c, "checking alloc to lru refs"); - ret = bch2_check_alloc_to_lru_refs(c); - if (ret) - goto err; - bch_verbose(c, "done checking alloc to lru refs"); - set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); - } else { - set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); - set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); - set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); - set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); - set_bit(BCH_FS_FSCK_DONE, &c->flags); - - if (c->opts.norecovery) - goto out; - - set_bit(BCH_FS_MAY_GO_RW, &c->flags); - - bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr); - ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1); - if (ret) - goto err; - if (c->opts.verbose || !c->sb.clean) - bch_info(c, "journal replay done"); - } - - ret = bch2_fs_freespace_init(c); + ret = bch2_run_recovery_passes(c); if (ret) goto err; - if (c->sb.version < bcachefs_metadata_version_bucket_gens && - c->opts.version_upgrade) { - bch_info(c, "initializing bucket_gens"); - ret = bch2_bucket_gens_init(c); - if (ret) - goto err; - bch_verbose(c, "bucket_gens init done"); - } - - if (c->sb.version < bcachefs_metadata_version_snapshot_2) { - ret = bch2_fs_upgrade_for_subvolumes(c); - if (ret) - goto err; - } - - if (c->opts.fsck) { - ret = bch2_fsck_full(c); - if (ret) - goto err; - bch_verbose(c, "fsck done"); - } else if (!c->sb.clean) { - bch_verbose(c, "checking for deleted inodes"); - ret = bch2_fsck_walk_inodes_only(c); - if (ret) - goto err; - bch_verbose(c, "check inodes done"); - } - if (enabled_qtypes(c)) { bch_verbose(c, "reading quotas"); ret = bch2_fs_quota_read(c); @@ -1443,9 +1408,8 @@ use_clean: } mutex_lock(&c->sb_lock); - if (c->opts.version_upgrade) { - c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) { + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version); write_sb = true; } @@ -1504,8 +1468,6 @@ out: if (ret) bch_err_fn(c, ret); - else - bch_verbose(c, "ret %s", bch2_err_str(ret)); return ret; err: fsck_err: @@ -1528,20 +1490,15 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - if (c->sb.version < bcachefs_metadata_version_inode_v3) - c->opts.version_upgrade = true; - - if (c->opts.version_upgrade) { + if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); bch2_write_super(c); } mutex_unlock(&c->sb_lock); - set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); - set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); - set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); + c->curr_recovery_pass = ARRAY_SIZE(recovery_passes); set_bit(BCH_FS_MAY_GO_RW, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags); @@ -1589,12 +1546,12 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - ret = bch2_fs_initialize_subvolumes(c); + ret = bch2_initialize_subvolumes(c); if (ret) goto err; bch_verbose(c, "reading snapshots table"); - ret = bch2_fs_snapshots_start(c); + ret = bch2_snapshots_read(c); if (ret) goto err; bch_verbose(c, "reading snapshots done"); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 08a05cc..39f711d 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -97,22 +97,21 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); } -static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags) -{ - if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) { - new->k.type = KEY_TYPE_deleted; - new->k.size = 0; - set_bkey_val_u64s(&new->k, 0);; - *flags &= ~BTREE_TRIGGER_INSERT; - } -} - int bch2_trans_mark_reflink_v(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { - check_indirect_extent_deleting(new, &flags); + if (!(flags & BTREE_TRIGGER_OVERWRITE)) { + struct bkey_i_reflink_v *r = bkey_i_to_reflink_v(new); + + if (!r->v.refcount) { + r->k.type = KEY_TYPE_deleted; + r->k.size = 0; + set_bkey_val_u64s(&r->k, 0); + return 0; + } + } return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); } @@ -127,7 +126,7 @@ int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, } void bch2_indirect_inline_data_to_text(struct printbuf *out, - struct bch_fs *c, struct bkey_s_c k) + struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); unsigned datalen = bkey_inline_data_bytes(k.k); @@ -142,7 +141,16 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { - check_indirect_extent_deleting(new, &flags); + if (!(flags & BTREE_TRIGGER_OVERWRITE)) { + struct bkey_i_indirect_inline_data *r = + bkey_i_to_indirect_inline_data(new); + + if (!r->v.refcount) { + r->k.type = KEY_TYPE_deleted; + r->k.size = 0; + set_bkey_val_u64s(&r->k, 0); + } + } return 0; } diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index d4c1d43..5b591c5 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -462,6 +462,9 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) { lockdep_assert_held(&c->replicas_gc_lock); + if (ret) + goto err; + mutex_lock(&c->sb_lock); percpu_down_write(&c->mark_lock); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index f26397a..f3852c4 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -408,7 +408,7 @@ fsck_err: * And, make sure it points to a subvolume within that snapshot tree, or correct * it to point to the oldest subvolume within that snapshot tree. */ -int bch2_fs_check_snapshot_trees(struct bch_fs *c) +int bch2_check_snapshot_trees(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; @@ -612,7 +612,7 @@ fsck_err: return ret; } -int bch2_fs_check_snapshots(struct bch_fs *c) +int bch2_check_snapshots(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; @@ -692,7 +692,7 @@ fsck_err: return ret; } -int bch2_fs_check_subvols(struct bch_fs *c) +int bch2_check_subvols(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; @@ -713,7 +713,7 @@ void bch2_fs_snapshots_exit(struct bch_fs *c) genradix_free(&c->snapshots); } -int bch2_fs_snapshots_start(struct bch_fs *c) +int bch2_snapshots_read(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; @@ -1151,7 +1151,7 @@ static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - if (!test_bit(BCH_FS_FSCK_DONE, &c->flags)) + if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots) return 0; bch2_delete_dead_snapshots_async(c); diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h index 105410e..daa9a6b 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/subvolume.h @@ -130,12 +130,12 @@ static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 i return ret; } -int bch2_fs_check_snapshot_trees(struct bch_fs *); -int bch2_fs_check_snapshots(struct bch_fs *); -int bch2_fs_check_subvols(struct bch_fs *); +int bch2_check_snapshot_trees(struct bch_fs *); +int bch2_check_snapshots(struct bch_fs *); +int bch2_check_subvols(struct bch_fs *); void bch2_fs_snapshots_exit(struct bch_fs *); -int bch2_fs_snapshots_start(struct bch_fs *); +int bch2_snapshots_read(struct bch_fs *); int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 472f5b2..b174003 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -445,6 +445,7 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.user_uuid = src->user_uuid; c->sb.version = le16_to_cpu(src->version); c->sb.version_min = le16_to_cpu(src->version_min); + c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src) ?: c->sb.version; c->sb.nr_devices = src->nr_devices; c->sb.clean = BCH_SB_CLEAN(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); @@ -611,8 +612,6 @@ int bch2_read_super(const char *path, struct bch_opts *opts, __le64 *i; int ret; - pr_verbose_init(*opts, ""); - memset(sb, 0, sizeof(*sb)); sb->mode = FMODE_READ; sb->have_bio = true; @@ -719,7 +718,6 @@ got_super: goto err_no_print; } out: - pr_verbose_init(*opts, "ret %i", ret); printbuf_exit(&err); return ret; err: @@ -811,7 +809,7 @@ int bch2_write_super(struct bch_fs *c) closure_init_stack(cl); memset(&sb_written, 0, sizeof(sb_written)); - if (c->opts.version_upgrade) { + if (test_bit(BCH_FS_VERSION_UPGRADE, &c->flags)) { c->disk_sb.sb->magic = BCHFS_MAGIC; c->disk_sb.sb->layout.magic = BCHFS_MAGIC; } @@ -1188,7 +1186,19 @@ int bch2_fs_mark_dirty(struct bch_fs *c) mutex_lock(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); + + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); + + if (test_bit(BCH_FS_VERSION_UPGRADE, &c->flags) || + c->sb.version > bcachefs_metadata_version_current) + c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); + + if (test_bit(BCH_FS_VERSION_UPGRADE, &c->flags)) + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); + c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); ret = bch2_write_super(c); mutex_unlock(&c->sb_lock); @@ -1532,6 +1542,11 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, bch2_version_to_text(out, le16_to_cpu(sb->version)); prt_newline(out); + prt_str(out, "Version upgrade complete:"); + prt_tab(out); + bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); + prt_newline(out); + prt_printf(out, "Oldest version on disk:"); prt_tab(out); bch2_version_to_text(out, le16_to_cpu(sb->version_min)); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 6fa805c..426d2ac 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -361,20 +361,21 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) { bch_err(c, "cannot go rw, unfixed btree errors"); - return -EROFS; + return -BCH_ERR_erofs_unfixed_errors; } if (test_bit(BCH_FS_RW, &c->flags)) return 0; + if (c->opts.norecovery) + return -BCH_ERR_erofs_norecovery; + /* * nochanges is used for fsck -n mode - we have to allow going rw * during recovery for that to work: */ - if (c->opts.norecovery || - (c->opts.nochanges && - (!early || c->opts.read_only))) - return -EROFS; + if (c->opts.nochanges && (!early || c->opts.read_only)) + return -BCH_ERR_erofs_nochanges; bch_info(c, "going read-write"); @@ -653,8 +654,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) unsigned i, iter_size; int ret = 0; - pr_verbose_init(opts, ""); - c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO); if (!c) { c = ERR_PTR(-BCH_ERR_ENOMEM_fs_alloc); @@ -865,7 +864,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; out: - pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c)); return c; err: bch2_fs_free(c); @@ -1181,8 +1179,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) struct bch_dev *ca = NULL; int ret = 0; - pr_verbose_init(c->opts, ""); - if (bch2_fs_init_fault("dev_alloc")) goto err; @@ -1193,14 +1189,11 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) ca->fs = c; bch2_dev_attach(c, ca, dev_idx); -out: - pr_verbose_init(c->opts, "ret %i", ret); return ret; err: if (ca) bch2_dev_free(ca); - ret = -BCH_ERR_ENOMEM_dev_alloc; - goto out; + return -BCH_ERR_ENOMEM_dev_alloc; } static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) @@ -1878,8 +1871,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, if (!try_module_get(THIS_MODULE)) return ERR_PTR(-ENODEV); - pr_verbose_init(opts, ""); - if (!nr_devices) { ret = -EINVAL; goto err; @@ -1951,8 +1942,6 @@ out: kfree(sb); printbuf_exit(&errbuf); module_put(THIS_MODULE); - pr_verbose_init(opts, "ret %s (%i)", bch2_err_str(PTR_ERR_OR_ZERO(c)), - PTR_ERR_OR_ZERO(c)); return c; err_print: pr_err("bch_fs_open err opening %s: %s", diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 38886bf..ae4f6de 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -756,10 +756,10 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size) } } -int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) +int bch2_bio_alloc_pages_noprof(struct bio *bio, size_t size, gfp_t gfp_mask) { while (size) { - struct page *page = alloc_pages(gfp_mask, 0); + struct page *page = alloc_pages_noprof(gfp_mask, 0); unsigned len = min_t(size_t, PAGE_SIZE, size); if (!page) diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 70bde2e..ca1b799 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -60,12 +60,13 @@ static inline void vpfree(void *p, size_t size) free_pages((unsigned long) p, get_order(size)); } -static inline void *vpmalloc(size_t size, gfp_t gfp_mask) +static inline void *vpmalloc_noprof(size_t size, gfp_t gfp_mask) { - return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN, - get_order(size)) ?: + return (void *) get_free_pages_noprof(gfp_mask|__GFP_NOWARN, + get_order(size)) ?: __vmalloc(size, gfp_mask); } +#define vpmalloc(_size, _gfp) alloc_hooks(vpmalloc_noprof(_size, _gfp)) static inline void kvpfree(void *p, size_t size) { @@ -75,12 +76,13 @@ static inline void kvpfree(void *p, size_t size) vpfree(p, size); } -static inline void *kvpmalloc(size_t size, gfp_t gfp_mask) +static inline void *kvpmalloc_noprof(size_t size, gfp_t gfp_mask) { return size < PAGE_SIZE - ? kmalloc(size, gfp_mask) - : vpmalloc(size, gfp_mask); + ? kmalloc_noprof(size, gfp_mask) + : vpmalloc_noprof(size, gfp_mask); } +#define kvpmalloc(_size, _gfp) alloc_hooks(kvpmalloc_noprof(_size, _gfp)) int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t); @@ -530,7 +532,9 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) } void bch2_bio_map(struct bio *bio, void *base, size_t); -int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t); +int bch2_bio_alloc_pages_noprof(struct bio *, size_t, gfp_t); +#define bch2_bio_alloc_pages(_bio, _size, _gfp) \ + alloc_hooks(bch2_bio_alloc_pages_noprof(_bio, _size, _gfp)) static inline sector_t bdev_sectors(struct block_device *bdev) { diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 867cc68..70f7800 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -166,31 +166,23 @@ err1: return ret < 0 && bch2_err_matches(ret, ENOENT) ? -ENODATA : ret; } -int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, - const char *name, void *buffer, size_t size, int type) -{ - return bch2_trans_do(c, NULL, NULL, 0, - bch2_xattr_get_trans(&trans, inode, name, buffer, size, type)); -} - int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, + struct bch_inode_unpacked *inode_u, const struct bch_hash_info *hash_info, const char *name, const void *value, size_t size, int type, int flags) { + struct bch_fs *c = trans->c; struct btree_iter inode_iter = { NULL }; - struct bch_inode_unpacked inode_u; int ret; - /* - * We need to do an inode update so that bi_journal_sync gets updated - * and fsync works: - * - * Perhaps we should be updating bi_mtime too? - */ + ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); + if (ret) + return ret; + + inode_u->bi_ctime = bch2_current_time(c); - ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum, BTREE_ITER_INTENT) ?: - bch2_inode_write(trans, &inode_iter, &inode_u); + ret = bch2_inode_write(trans, &inode_iter, inode_u); bch2_trans_iter_exit(trans, &inode_iter); if (ret) @@ -365,9 +357,9 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret; + int ret = bch2_trans_do(c, NULL, NULL, 0, + bch2_xattr_get_trans(&trans, inode, name, buffer, size, handler->flags)); - ret = bch2_xattr_get(c, inode, name, buffer, size, handler->flags); return bch2_err_class(ret); } @@ -380,12 +372,20 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); + struct bch_inode_unpacked inode_u; + struct btree_trans trans; int ret; - ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_xattr_set(&trans, inode_inum(inode), &hash, - name, value, size, + bch2_trans_init(&trans, c, 0, 0); + + ret = commit_do(&trans, NULL, NULL, 0, + bch2_xattr_set(&trans, inode_inum(inode), &inode_u, + &hash, name, value, size, handler->flags, flags)); + if (!ret) + bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME); + bch2_trans_exit(&trans); + return bch2_err_class(ret); } diff --git a/libbcachefs/xattr.h b/libbcachefs/xattr.h index 214cbba..f5a52e3 100644 --- a/libbcachefs/xattr.h +++ b/libbcachefs/xattr.h @@ -38,11 +38,9 @@ struct xattr_handler; struct bch_hash_info; struct bch_inode_info; -int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *, - const char *, void *, size_t, int); - +/* Exported for cmd_migrate.c in tools: */ int bch2_xattr_set(struct btree_trans *, subvol_inum, - const struct bch_hash_info *, + struct bch_inode_unpacked *, const struct bch_hash_info *, const char *, const void *, size_t, int, int); ssize_t bch2_xattr_list(struct dentry *, char *, size_t);