X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbcachefs.h;h=82b0706a89365ee4ebdf2f1a77069c3cab93d2c5;hb=87179c7a6e2a210ea57951d444a3055e883d08fa;hp=7f9c1087f168ffab421bd04b195c08f10a42350e;hpb=da6a35689518599b381c285cd9505ab8d58f7c73;p=bcachefs-tools-debian diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 7f9c108..82b0706 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -208,14 +208,18 @@ #include "fifo.h" #include "nocow_locking_types.h" #include "opts.h" +#include "seqmutex.h" #include "util.h" #ifdef CONFIG_BCACHEFS_DEBUG #define BCH_WRITE_REF_DEBUG #endif +#ifndef dynamic_fault #define dynamic_fault(...) 0 -#define race_fault(...) 0 +#endif + +#define race_fault(...) dynamic_fault("bcachefs:race") #define trace_and_count(_c, _name, ...) \ do { \ @@ -287,6 +291,11 @@ do { \ #define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \ printk_ratelimited(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) +#define bch_err_fn(_c, _ret) \ + bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret)) +#define bch_err_msg(_c, _ret, _msg) \ + bch_err(_c, "%s(): error " _msg " %s", __func__, bch2_err_str(_ret)) + #define bch_verbose(c, fmt, ...) \ do { \ if ((c)->opts.verbose) \ @@ -442,6 +451,7 @@ enum gc_phase { GC_PHASE_BTREE_need_discard, GC_PHASE_BTREE_backpointers, GC_PHASE_BTREE_bucket_gens, + GC_PHASE_BTREE_snapshot_trees, GC_PHASE_PENDING_DELETE, }; @@ -479,7 +489,7 @@ struct bch_dev { * Committed by bch2_write_super() -> bch_fs_mi_update() */ struct bch_member_cpu mi; - uuid_le uuid; + __uuid_t uuid; char name[BDEVNAME_SIZE]; struct bch_sb_handle disk_sb; @@ -516,9 +526,6 @@ struct bch_dev { unsigned nr_open_buckets; unsigned nr_btree_reserve; - open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT]; - open_bucket_idx_t open_buckets_partial_nr; - size_t inc_gen_needs_gc; size_t inc_gen_really_needs_gc; size_t buckets_waiting_on_journal; @@ -556,12 +563,6 @@ enum { BCH_FS_CLEAN_SHUTDOWN, /* fsck passes: */ - BCH_FS_TOPOLOGY_REPAIR_DONE, - BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ - BCH_FS_CHECK_ALLOC_DONE, - BCH_FS_CHECK_LRUS_DONE, - BCH_FS_CHECK_BACKPOINTERS_DONE, - BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, BCH_FS_FSCK_DONE, BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ BCH_FS_NEED_ANOTHER_GC, @@ -629,18 +630,6 @@ struct btree_path_buf { #define REPLICAS_DELTA_LIST_MAX (1U << 16) -struct snapshot_t { - u32 parent; - u32 children[2]; - u32 subvol; /* Nonzero only if a subvolume points to this node: */ - u32 equiv; -}; - -typedef struct { - u32 subvol; - u64 inum; -} subvol_inum; - #define BCACHEFS_ROOT_SUBVOL_INUM \ ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) @@ -655,7 +644,6 @@ typedef struct { x(fallocate) \ x(discard) \ x(invalidate) \ - x(move) \ x(delete_dead_snapshots) \ x(snapshot_delete_pagecache) \ x(sysfs) @@ -667,6 +655,48 @@ enum bch_write_ref { BCH_WRITE_REF_NR, }; +#define PASS_SILENT BIT(0) +#define PASS_FSCK BIT(1) +#define PASS_UNCLEAN BIT(2) +#define PASS_ALWAYS BIT(3) + +#define BCH_RECOVERY_PASSES() \ + x(alloc_read, PASS_ALWAYS) \ + x(stripes_read, PASS_ALWAYS) \ + x(initialize_subvolumes, 0) \ + x(snapshots_read, PASS_ALWAYS) \ + x(check_topology, 0) \ + x(check_allocations, PASS_FSCK) \ + x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ + x(journal_replay, PASS_ALWAYS) \ + x(check_alloc_info, PASS_FSCK) \ + x(check_lrus, PASS_FSCK) \ + x(check_btree_backpointers, PASS_FSCK) \ + x(check_backpointers_to_extents,PASS_FSCK) \ + x(check_extents_to_backpointers,PASS_FSCK) \ + x(check_alloc_to_lru_refs, PASS_FSCK) \ + x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ + x(bucket_gens_init, 0) \ + x(check_snapshot_trees, PASS_FSCK) \ + x(check_snapshots, PASS_FSCK) \ + x(check_subvols, PASS_FSCK) \ + x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN) \ + x(fs_upgrade_for_subvolumes, 0) \ + x(check_inodes, PASS_FSCK|PASS_UNCLEAN) \ + x(check_extents, PASS_FSCK) \ + x(check_dirents, PASS_FSCK) \ + x(check_xattrs, PASS_FSCK) \ + x(check_root, PASS_FSCK) \ + x(check_directory_structure, PASS_FSCK) \ + x(check_nlinks, PASS_FSCK) \ + x(fix_reflink_p, 0) \ + +enum bch_recovery_pass { +#define x(n, when) BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x +}; + struct bch_fs { struct closure cl; @@ -713,11 +743,12 @@ struct bch_fs { /* Updated by bch2_sb_update():*/ struct { - uuid_le uuid; - uuid_le user_uuid; + __uuid_t uuid; + __uuid_t user_uuid; u16 version; u16 version_min; + u16 version_upgrade_complete; u8 nr_devices; u8 clean; @@ -743,9 +774,10 @@ struct bch_fs { struct mutex sb_lock; /* snapshot.c: */ - GENRADIX(struct snapshot_t) snapshots; - struct bch_snapshot_table __rcu *snapshot_table; + struct snapshot_table __rcu *snapshots; + size_t snapshot_table_size; struct mutex snapshot_table_lock; + struct work_struct snapshot_delete_work; struct work_struct snapshot_wait_for_pagecache_and_delete_work; snapshot_id_list snapshots_unlinked; @@ -755,7 +787,8 @@ struct bch_fs { struct bio_set btree_bio; struct workqueue_struct *io_complete_wq; - struct btree_root btree_roots[BTREE_ID_NR]; + struct btree_root btree_roots_known[BTREE_ID_NR]; + DARRAY(struct btree_root) btree_roots_extra; struct mutex btree_root_lock; struct btree_cache btree_cache; @@ -791,7 +824,7 @@ struct bch_fs { } btree_write_stats[BTREE_WRITE_TYPE_NR]; /* btree_iter.c: */ - struct mutex btree_trans_lock; + struct seqmutex btree_trans_lock; struct list_head btree_trans_list; mempool_t btree_paths_pool; mempool_t btree_trans_mem_pool; @@ -809,6 +842,12 @@ struct bch_fs { struct workqueue_struct *btree_io_complete_wq; /* copygc needs its own workqueue for index updates.. */ struct workqueue_struct *copygc_wq; + /* + * Use a dedicated wq for write ref holder tasks. Required to avoid + * dependency problems with other wq tasks that can block on ref + * draining, such as read-only transition. + */ + struct workqueue_struct *write_ref_wq; /* ALLOCATION */ struct bch_devs_mask rw_devs[BCH_DATA_NR]; @@ -859,6 +898,9 @@ struct bch_fs { struct open_bucket open_buckets[OPEN_BUCKETS_COUNT]; open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT]; + open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT]; + open_bucket_idx_t open_buckets_partial_nr; + struct write_point btree_write_point; struct write_point rebalance_write_point; @@ -922,20 +964,24 @@ struct bch_fs { mempool_t large_bkey_pool; + /* MOVE.C */ + struct list_head moving_context_list; + struct mutex moving_context_lock; + + struct list_head data_progress_list; + struct mutex data_progress_lock; + /* REBALANCE */ struct bch_fs_rebalance rebalance; /* COPYGC */ struct task_struct *copygc_thread; struct write_point copygc_write_point; + s64 copygc_wait_at; s64 copygc_wait; bool copygc_running; wait_queue_head_t copygc_running_wq; - /* DATA PROGRESS STATS */ - struct list_head data_progress_list; - struct mutex data_progress_lock; - /* STRIPES: */ GENRADIX(struct stripe) stripes; GENRADIX(struct gc_stripe) gc_stripes; @@ -952,20 +998,23 @@ struct bch_fs { struct list_head ec_stripe_new_list; struct mutex ec_stripe_new_lock; + wait_queue_head_t ec_stripe_new_wait; struct work_struct ec_stripe_create_work; u64 ec_stripe_hint; - struct bio_set ec_bioset; - struct work_struct ec_stripe_delete_work; - struct llist_head ec_stripe_delete_list; + + struct bio_set ec_bioset; /* REFLINK */ - u64 reflink_hint; reflink_gc_table reflink_gc_table; size_t reflink_gc_nr; + /* fs.c */ + struct list_head vfs_inodes_list; + struct mutex vfs_inodes_lock; + /* VFS IO PATH - fs-io.c */ struct bio_set writepage_bioset; struct bio_set dio_write_bioset; @@ -980,6 +1029,13 @@ struct bch_fs { /* QUOTAS */ struct bch_memquota_type quotas[QTYP_NR]; + /* RECOVERY */ + u64 journal_replay_seq_start; + u64 journal_replay_seq_end; + enum bch_recovery_pass curr_recovery_pass; + /* bitmap of explicitly enabled recovery passes: */ + u64 recovery_passes_explicit; + /* DEBUG JUNK */ struct dentry *fs_debug_dir; struct dentry *btree_debug_dir; @@ -1123,4 +1179,23 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev) return dev < c->sb.nr_devices && c->devs[dev]; } +/* + * For when we need to rewind recovery passes and run a pass we skipped: + */ +static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + c->recovery_passes_explicit |= BIT_ULL(pass); + + if (c->curr_recovery_pass >= pass) { + c->curr_recovery_pass = pass; + return -BCH_ERR_restart_recovery; + } else { + return 0; + } +} + +#define BKEY_PADDED_ONSTACK(key, pad) \ + struct { struct bkey_i key; __u64 key ## _pad[pad]; } + #endif /* _BCACHEFS_H */