X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_types.h;h=d530307046f4cf93bdb4c4063409a9fff5e705c4;hb=799439a88ab7afe99e5052894c20ea77133a1551;hp=cab3de0def4e3407d27914a0f29384072ac7c6d2;hpb=ae43a58d97fc00e31770142da832fb8a249808eb;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index cab3de0..d530307 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -4,12 +4,14 @@ #include #include -#include -//#include "bkey_methods.h" +#include "btree_key_cache_types.h" #include "buckets_types.h" #include "darray.h" +#include "errcode.h" #include "journal_types.h" +#include "replicas_types.h" +#include "six.h" struct open_bucket; struct btree_update; @@ -160,18 +162,8 @@ struct btree_cache { /* Number of elements in live + freeable lists */ unsigned used; unsigned reserve; - unsigned freed; - unsigned not_freed_lock_intent; - unsigned not_freed_lock_write; - unsigned not_freed_dirty; - unsigned not_freed_read_in_flight; - unsigned not_freed_write_in_flight; - unsigned not_freed_noevict; - unsigned not_freed_write_blocked; - unsigned not_freed_will_make_reachable; - unsigned not_freed_access_bit; atomic_t dirty; - struct shrinker shrink; + struct shrinker *shrink; /* * If we need to allocate memory for a new btree node and that @@ -192,31 +184,33 @@ struct btree_node_iter { /* * Iterate over all possible positions, synthesizing deleted keys for holes: */ -#define BTREE_ITER_SLOTS (1 << 0) -#define BTREE_ITER_ALL_LEVELS (1 << 1) +static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0; /* * Indicates that intent locks should be taken on leaf nodes, because we expect * to be doing updates: */ -#define BTREE_ITER_INTENT (1 << 2) +static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1; /* * Causes the btree iterator code to prefetch additional btree nodes from disk: */ -#define BTREE_ITER_PREFETCH (1 << 3) +static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2; /* * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for * @pos or the first key strictly greater than @pos */ -#define BTREE_ITER_IS_EXTENTS (1 << 4) -#define BTREE_ITER_NOT_EXTENTS (1 << 5) -#define BTREE_ITER_CACHED (1 << 6) -#define BTREE_ITER_WITH_KEY_CACHE (1 << 7) -#define BTREE_ITER_WITH_UPDATES (1 << 8) -#define BTREE_ITER_WITH_JOURNAL (1 << 9) -#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 10) -#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11) -#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12) -#define BTREE_ITER_NOPRESERVE (1 << 13) +static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3; +static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4; +static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5; +static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6; +static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7; +static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8; +static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9; +static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; +static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11; +static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12; +static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13; +static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14; +#define __BTREE_ITER_FLAGS_END 15 enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, @@ -224,16 +218,21 @@ enum btree_path_uptodate { BTREE_ITER_NEED_TRAVERSE = 2, }; +#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG) +#define TRACK_PATH_ALLOCATED +#endif + +typedef u16 btree_path_idx_t; + struct btree_path { - u8 idx; - u8 sorted_idx; + btree_path_idx_t sorted_idx; u8 ref; u8 intent_ref; /* btree_iter_copy starts here: */ struct bpos pos; - enum btree_id btree_id:4; + enum btree_id btree_id:5; bool cached:1; bool preserve:1; enum btree_path_uptodate uptodate:2; @@ -243,7 +242,7 @@ struct btree_path { */ bool should_be_locked:1; unsigned level:3, - locks_want:4; + locks_want:3; u8 nodes_locked; struct btree_path_level { @@ -254,7 +253,7 @@ struct btree_path { u64 lock_taken_time; #endif } l[BTREE_MAX_DEPTH]; -#ifdef CONFIG_BCACHEFS_DEBUG +#ifdef TRACK_PATH_ALLOCATED unsigned long ip_allocated; #endif }; @@ -264,6 +263,15 @@ static inline struct btree_path_level *path_l(struct btree_path *path) return path->l + path->level; } +static inline unsigned long btree_path_ip_allocated(struct btree_path *path) +{ +#ifdef TRACK_PATH_ALLOCATED + return path->ip_allocated; +#else + return _THIS_IP_; +#endif +} + /* * @pos - iterator's current position * @level - current btree depth @@ -273,13 +281,12 @@ static inline struct btree_path_level *path_l(struct btree_path *path) */ struct btree_iter { struct btree_trans *trans; - struct btree_path *path; - struct btree_path *update_path; - struct btree_path *key_cache_path; + btree_path_idx_t path; + btree_path_idx_t update_path; + btree_path_idx_t key_cache_path; - enum btree_id btree_id:4; - unsigned min_depth:3; - unsigned advanced:1; + enum btree_id btree_id:8; + u8 min_depth; /* btree_iter_copy starts here: */ u16 flags; @@ -288,7 +295,6 @@ struct btree_iter { unsigned snapshot; struct bpos pos; - struct bpos pos_after_commit; /* * Current unpacked key - so that bch2_btree_iter_next()/ * bch2_btree_iter_next_slot() can correctly advance pos. @@ -297,37 +303,11 @@ struct btree_iter { /* BTREE_ITER_WITH_JOURNAL: */ size_t journal_idx; - struct bpos journal_pos; -#ifdef CONFIG_BCACHEFS_DEBUG +#ifdef TRACK_PATH_ALLOCATED unsigned long ip_allocated; #endif }; -struct btree_key_cache_freelist { - struct bkey_cached *objs[16]; - unsigned nr; -}; - -struct btree_key_cache { - struct mutex lock; - struct rhashtable table; - bool table_init_done; - struct list_head freed_pcpu; - struct list_head freed_nonpcpu; - struct shrinker shrink; - unsigned shrink_iter; - struct btree_key_cache_freelist __percpu *pcpu_freed; - - atomic_long_t nr_freed; - atomic_long_t nr_keys; - atomic_long_t nr_dirty; -}; - -struct bkey_cached_key { - u32 btree_id; - struct bpos pos; -} __packed __aligned(4); - #define BKEY_CACHED_ACCESSED 0 #define BKEY_CACHED_DIRTY 1 @@ -343,8 +323,8 @@ struct bkey_cached { struct rhash_head hash; struct list_head list; - struct journal_preres res; struct journal_entry_pin journal; + u64 seq; struct bkey_i *k; }; @@ -370,19 +350,16 @@ struct btree_insert_entry { * to the size of the key being overwritten in the btree: */ u8 old_btree_u64s; + btree_path_idx_t path; struct bkey_i *k; - struct btree_path *path; /* key being overwritten: */ struct bkey old_k; const struct bch_val *old_v; unsigned long ip_allocated; }; -#ifndef CONFIG_LOCKDEP -#define BTREE_ITER_MAX 64 -#else -#define BTREE_ITER_MAX 32 -#endif +#define BTREE_ITER_INITIAL 64 +#define BTREE_ITER_MAX (1U << 10) struct btree_trans_commit_hook; typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); @@ -396,66 +373,108 @@ struct btree_trans_commit_hook { #define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 +struct btree_trans_paths { + unsigned long nr_paths; + struct btree_path paths[]; +}; + struct btree_trans { struct bch_fs *c; - const char *fn; - struct closure ref; - struct list_head list; - u64 last_begin_time; - u8 lock_may_not_fail; - u8 lock_must_abort; - struct btree_bkey_cached_common *locking; - struct six_lock_waiter locking_wait; + unsigned long *paths_allocated; + struct btree_path *paths; + btree_path_idx_t *sorted; + struct btree_insert_entry *updates; - int srcu_idx; + void *mem; + unsigned mem_top; + unsigned mem_bytes; + btree_path_idx_t nr_sorted; + btree_path_idx_t nr_paths; + btree_path_idx_t nr_paths_max; u8 fn_idx; - u8 nr_sorted; u8 nr_updates; - u8 traverse_all_idx; + u8 lock_must_abort; + bool lock_may_not_fail:1; + bool srcu_held:1; bool used_mempool:1; bool in_traverse_all:1; + bool paths_sorted:1; bool memory_allocation_failure:1; - bool is_initial_gc:1; + bool journal_transaction_names:1; bool journal_replay_not_finished:1; + bool notrace_relock_fail:1; + bool write_locked:1; enum bch_errcode restarted:16; u32 restart_count; - unsigned long last_restarted_ip; - - /* - * For when bch2_trans_update notices we'll be splitting a compressed - * extent: - */ - unsigned extra_journal_res; - unsigned nr_max_paths; - - u64 paths_allocated; - unsigned mem_top; - unsigned mem_max; - unsigned mem_bytes; - void *mem; + u64 last_begin_time; + unsigned long last_begin_ip; + unsigned long last_restarted_ip; + unsigned long srcu_lock_time; - u8 sorted[BTREE_ITER_MAX]; - struct btree_path *paths; - struct btree_insert_entry *updates; + const char *fn; + struct btree_bkey_cached_common *locking; + struct six_lock_waiter locking_wait; + int srcu_idx; /* update path: */ + u16 journal_entries_u64s; + u16 journal_entries_size; + struct jset_entry *journal_entries; + struct btree_trans_commit_hook *hooks; - DARRAY(u64) extra_journal_entries; struct journal_entry_pin *journal_pin; struct journal_res journal_res; - struct journal_preres journal_preres; u64 *journal_seq; struct disk_reservation *disk_res; - unsigned flags; unsigned journal_u64s; - unsigned journal_preres_u64s; + unsigned extra_disk_res; /* XXX kill */ struct replicas_delta_list *fs_usage_deltas; + + /* Entries before this are zeroed out on every bch2_trans_get() call */ + + struct list_head list; + struct closure ref; + + unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)]; + struct btree_trans_paths trans_paths; + struct btree_path _paths[BTREE_ITER_INITIAL]; + btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4]; + struct btree_insert_entry _updates[BTREE_ITER_INITIAL]; }; +static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) +{ + return trans->paths + iter->path; +} + +static inline struct btree_path *btree_iter_key_cache_path(struct btree_trans *trans, struct btree_iter *iter) +{ + return iter->key_cache_path + ? trans->paths + iter->key_cache_path + : NULL; +} + +#define BCH_BTREE_WRITE_TYPES() \ + x(initial, 0) \ + x(init_next_bset, 1) \ + x(cache_reclaim, 2) \ + x(journal_reclaim, 3) \ + x(interior, 4) + +enum btree_write_type { +#define x(t, n) BTREE_WRITE_##t, + BCH_BTREE_WRITE_TYPES() +#undef x + BTREE_WRITE_TYPE_NR, +}; + +#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1) +#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR)) + #define BTREE_FLAGS() \ x(read_in_flight) \ x(read_error) \ @@ -475,6 +494,8 @@ struct btree_trans { x(never_write) enum btree_flags { + /* First bits for btree node write type */ + BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1, #define x(flag) BTREE_NODE_##flag, BTREE_FLAGS() #undef x @@ -603,16 +624,17 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) } enum btree_node_type { -#define x(kwd, val) BKEY_TYPE_##kwd = val, + BKEY_TYPE_btree, +#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1, BCH_BTREE_IDS() #undef x - BKEY_TYPE_btree, + BKEY_TYPE_NR }; /* Type of a key in btree @id at level @level: */ static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) { - return level ? BKEY_TYPE_btree : (enum btree_node_type) id; + return level ? BKEY_TYPE_btree : (unsigned) id + 1; } /* Type of keys @b contains: */ @@ -621,57 +643,78 @@ static inline enum btree_node_type btree_node_type(struct btree *b) return __btree_node_type(b->c.level, b->c.btree_id); } +const char *bch2_btree_node_type_str(enum btree_node_type); + #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ - ((1U << BKEY_TYPE_extents)| \ - (1U << BKEY_TYPE_alloc)| \ - (1U << BKEY_TYPE_inodes)| \ - (1U << BKEY_TYPE_stripes)| \ - (1U << BKEY_TYPE_reflink)| \ - (1U << BKEY_TYPE_btree)) + (BIT_ULL(BKEY_TYPE_extents)| \ + BIT_ULL(BKEY_TYPE_alloc)| \ + BIT_ULL(BKEY_TYPE_inodes)| \ + BIT_ULL(BKEY_TYPE_stripes)| \ + BIT_ULL(BKEY_TYPE_reflink)| \ + BIT_ULL(BKEY_TYPE_btree)) #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ - ((1U << BKEY_TYPE_alloc)| \ - (1U << BKEY_TYPE_inodes)| \ - (1U << BKEY_TYPE_stripes)| \ - (1U << BKEY_TYPE_snapshots)) + (BIT_ULL(BKEY_TYPE_alloc)| \ + BIT_ULL(BKEY_TYPE_inodes)| \ + BIT_ULL(BKEY_TYPE_stripes)| \ + BIT_ULL(BKEY_TYPE_snapshots)) #define BTREE_NODE_TYPE_HAS_TRIGGERS \ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) -#define BTREE_ID_IS_EXTENTS \ - ((1U << BTREE_ID_extents)| \ - (1U << BTREE_ID_reflink)| \ - (1U << BTREE_ID_freespace)) +static inline bool btree_node_type_needs_gc(enum btree_node_type type) +{ + return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type); +} static inline bool btree_node_type_is_extents(enum btree_node_type type) { - return (1U << type) & BTREE_ID_IS_EXTENTS; -} + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) + BCH_BTREE_IDS() +#undef x + ; -#define BTREE_ID_HAS_SNAPSHOTS \ - ((1U << BTREE_ID_extents)| \ - (1U << BTREE_ID_inodes)| \ - (1U << BTREE_ID_dirents)| \ - (1U << BTREE_ID_xattrs)) + return (1U << type) & mask; +} -#define BTREE_ID_HAS_PTRS \ - ((1U << BTREE_ID_extents)| \ - (1U << BTREE_ID_reflink)) +static inline bool btree_id_is_extents(enum btree_id btree) +{ + return btree_node_type_is_extents(__btree_node_type(0, btree)); +} static inline bool btree_type_has_snapshots(enum btree_id id) { - return (1 << id) & BTREE_ID_HAS_SNAPSHOTS; + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) + BCH_BTREE_IDS() +#undef x + ; + + return (1U << id) & mask; } -static inline bool btree_type_has_ptrs(enum btree_id id) +static inline bool btree_type_has_snapshot_field(enum btree_id id) { - return (1 << id) & BTREE_ID_HAS_PTRS; + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) + BCH_BTREE_IDS() +#undef x + ; + + return (1U << id) & mask; } -static inline bool btree_node_type_needs_gc(enum btree_node_type type) +static inline bool btree_type_has_ptrs(enum btree_id id) { - return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) + BCH_BTREE_IDS() +#undef x + ; + + return (1U << id) & mask; } struct btree_root { @@ -684,15 +727,6 @@ struct btree_root { s8 error; }; -enum btree_insert_ret { - BTREE_INSERT_OK, - /* leaf node needs to be split */ - BTREE_INSERT_BTREE_NODE_FULL, - BTREE_INSERT_NEED_MARK_REPLICAS, - BTREE_INSERT_NEED_JOURNAL_RES, - BTREE_INSERT_NEED_JOURNAL_RECLAIM, -}; - enum btree_gc_coalesce_fail_reason { BTREE_GC_COALESCE_FAIL_RESERVE_GET, BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,