X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbtree_types.h;h=6b6333df88f2efb701ad409058cb4b164a2acb21;hb=15b24c732749339e3f65f030e7e68624b1b4bfbd;hp=f942ccf62ff428a07bca323703e8e0a3b04542d8;hpb=b422ff58ba8eedcfef3b67b66468660f07b0cfc1;p=bcachefs-tools-debian diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index f942ccf..6b6333d 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -6,9 +6,12 @@ #include #include -#include "bkey_methods.h" +//#include "bkey_methods.h" #include "buckets_types.h" +#include "darray.h" +#include "errcode.h" #include "journal_types.h" +#include "replicas_types.h" struct open_bucket; struct btree_update; @@ -62,6 +65,7 @@ struct btree_bkey_cached_common { struct six_lock lock; u8 level; u8 btree_id; + bool cached; }; struct btree { @@ -152,11 +156,22 @@ struct btree_cache { struct mutex lock; struct list_head live; struct list_head freeable; - struct list_head freed; + struct list_head freed_pcpu; + struct list_head freed_nonpcpu; /* Number of elements in live + freeable lists */ unsigned used; unsigned reserve; + unsigned freed; + unsigned not_freed_lock_intent; + unsigned not_freed_lock_write; + unsigned not_freed_dirty; + unsigned not_freed_read_in_flight; + unsigned not_freed_write_in_flight; + unsigned not_freed_noevict; + unsigned not_freed_write_blocked; + unsigned not_freed_will_make_reachable; + unsigned not_freed_access_bit; atomic_t dirty; struct shrinker shrink; @@ -176,60 +191,96 @@ struct btree_node_iter { } data[MAX_BSETS]; }; -enum btree_iter_type { - BTREE_ITER_KEYS, - BTREE_ITER_NODES, - BTREE_ITER_CACHED, -}; - -#define BTREE_ITER_TYPE ((1 << 2) - 1) - /* * Iterate over all possible positions, synthesizing deleted keys for holes: */ -#define BTREE_ITER_SLOTS (1 << 2) +static const u16 BTREE_ITER_SLOTS = 1 << 0; +static const u16 BTREE_ITER_ALL_LEVELS = 1 << 1; /* * Indicates that intent locks should be taken on leaf nodes, because we expect * to be doing updates: */ -#define BTREE_ITER_INTENT (1 << 3) +static const u16 BTREE_ITER_INTENT = 1 << 2; /* * Causes the btree iterator code to prefetch additional btree nodes from disk: */ -#define BTREE_ITER_PREFETCH (1 << 4) -/* - * Indicates that this iterator should not be reused until transaction commit, - * either because a pending update references it or because the update depends - * on that particular key being locked (e.g. by the str_hash code, for hash - * table consistency) - */ -#define BTREE_ITER_KEEP_UNTIL_COMMIT (1 << 5) +static const u16 BTREE_ITER_PREFETCH = 1 << 3; /* * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for * @pos or the first key strictly greater than @pos */ -#define BTREE_ITER_IS_EXTENTS (1 << 6) -#define BTREE_ITER_ERROR (1 << 7) -#define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 8) -#define BTREE_ITER_CACHED_NOFILL (1 << 9) -#define BTREE_ITER_CACHED_NOCREATE (1 << 10) -#define BTREE_ITER_NOT_EXTENTS (1 << 11) -#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12) - -enum btree_iter_uptodate { +static const u16 BTREE_ITER_IS_EXTENTS = 1 << 4; +static const u16 BTREE_ITER_NOT_EXTENTS = 1 << 5; +static const u16 BTREE_ITER_CACHED = 1 << 6; +static const u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7; +static const u16 BTREE_ITER_WITH_UPDATES = 1 << 8; +static const u16 BTREE_ITER_WITH_JOURNAL = 1 << 9; +static const u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; +static const u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11; +static const u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12; +static const u16 BTREE_ITER_NOPRESERVE = 1 << 13; +static const u16 BTREE_ITER_CACHED_NOFILL = 1 << 14; +static const u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15; +#define __BTREE_ITER_FLAGS_END 16 + +enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, - BTREE_ITER_NEED_PEEK = 1, - BTREE_ITER_NEED_RELOCK = 2, - BTREE_ITER_NEED_TRAVERSE = 3, + BTREE_ITER_NEED_RELOCK = 1, + BTREE_ITER_NEED_TRAVERSE = 2, }; -#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1) -#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2) -#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3) -#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4) -#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5) -#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6) -#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7) +#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG) +#define TRACK_PATH_ALLOCATED +#endif + +struct btree_path { + u8 idx; + u8 sorted_idx; + u8 ref; + u8 intent_ref; + + /* btree_iter_copy starts here: */ + struct bpos pos; + + enum btree_id btree_id:5; + bool cached:1; + bool preserve:1; + enum btree_path_uptodate uptodate:2; + /* + * When true, failing to relock this path will cause the transaction to + * restart: + */ + bool should_be_locked:1; + unsigned level:3, + locks_want:3; + u8 nodes_locked; + + struct btree_path_level { + struct btree *b; + struct btree_node_iter iter; + u32 lock_seq; +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + u64 lock_taken_time; +#endif + } l[BTREE_MAX_DEPTH]; +#ifdef TRACK_PATH_ALLOCATED + unsigned long ip_allocated; +#endif +}; + +static inline struct btree_path_level *path_l(struct btree_path *path) +{ + return path->l + path->level; +} + +static inline unsigned long btree_path_ip_allocated(struct btree_path *path) +{ +#ifdef TRACK_PATH_ALLOCATED + return path->ip_allocated; +#else + return _THIS_IP_; +#endif +} /* * @pos - iterator's current position @@ -240,63 +291,51 @@ enum btree_iter_uptodate { */ struct btree_iter { struct btree_trans *trans; - struct bpos pos; - /* what we're searching for/what the iterator actually points to: */ - struct bpos real_pos; - struct bpos pos_after_commit; - /* When we're filtering by snapshot, the snapshot ID we're looking for: */ - unsigned snapshot; + struct btree_path *path; + struct btree_path *update_path; + struct btree_path *key_cache_path; - u16 flags; - u8 idx; + enum btree_id btree_id:8; + unsigned min_depth:3; + unsigned advanced:1; - enum btree_id btree_id:4; - enum btree_iter_uptodate uptodate:4; - unsigned level:4, - min_depth:4, - locks_want:4, - nodes_locked:4, - nodes_intent_locked:4; + /* btree_iter_copy starts here: */ + u16 flags; - struct btree_iter_level { - struct btree *b; - struct btree_node_iter iter; - u32 lock_seq; - } l[BTREE_MAX_DEPTH]; + /* When we're filtering by snapshot, the snapshot ID we're looking for: */ + unsigned snapshot; + struct bpos pos; /* * Current unpacked key - so that bch2_btree_iter_next()/ * bch2_btree_iter_next_slot() can correctly advance pos. */ struct bkey k; + + /* BTREE_ITER_WITH_JOURNAL: */ + size_t journal_idx; + struct bpos journal_pos; +#ifdef TRACK_PATH_ALLOCATED unsigned long ip_allocated; +#endif }; -static inline enum btree_iter_type -btree_iter_type(const struct btree_iter *iter) -{ - return iter->flags & BTREE_ITER_TYPE; -} - -static inline bool btree_iter_is_cached(const struct btree_iter *iter) -{ - return btree_iter_type(iter) == BTREE_ITER_CACHED; -} - -static inline struct btree_iter_level *iter_l(struct btree_iter *iter) -{ - return iter->l + iter->level; -} +struct btree_key_cache_freelist { + struct bkey_cached *objs[16]; + unsigned nr; +}; struct btree_key_cache { struct mutex lock; struct rhashtable table; bool table_init_done; - struct list_head freed; + struct list_head freed_pcpu; + struct list_head freed_nonpcpu; struct shrinker shrink; unsigned shrink_iter; + struct btree_key_cache_freelist __percpu *pcpu_freed; - size_t nr_freed; + atomic_long_t nr_freed; atomic_long_t nr_keys; atomic_long_t nr_dirty; }; @@ -304,7 +343,7 @@ struct btree_key_cache { struct bkey_cached_key { u32 btree_id; struct bpos pos; -} __attribute__((packed, aligned(4))); +} __packed __aligned(4); #define BKEY_CACHED_ACCESSED 0 #define BKEY_CACHED_DIRTY 1 @@ -313,7 +352,7 @@ struct bkey_cached { struct btree_bkey_cached_common c; unsigned long flags; - u8 u64s; + u16 u64s; bool valid; u32 btree_trans_barrier_seq; struct bkey_cached_key key; @@ -323,26 +362,42 @@ struct bkey_cached { struct journal_preres res; struct journal_entry_pin journal; + u64 seq; struct bkey_i *k; }; +static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) +{ + return !b->cached + ? container_of(b, struct btree, c)->key.k.p + : container_of(b, struct bkey_cached, c)->key.pos; +} + struct btree_insert_entry { - unsigned trigger_flags; + unsigned flags; u8 bkey_type; enum btree_id btree_id:8; - u8 level; - unsigned trans_triggers_run:1; - unsigned is_extent:1; + u8 level:4; + bool cached:1; + bool insert_trigger_run:1; + bool overwrite_trigger_run:1; + bool key_cache_already_flushed:1; + /* + * @old_k may be a key from the journal; @old_btree_u64s always refers + * to the size of the key being overwritten in the btree: + */ + u8 old_btree_u64s; struct bkey_i *k; - struct btree_iter *iter; + struct btree_path *path; + u64 seq; + /* key being overwritten: */ + struct bkey old_k; + const struct bch_val *old_v; + unsigned long ip_allocated; }; -#ifndef CONFIG_LOCKDEP #define BTREE_ITER_MAX 64 -#else -#define BTREE_ITER_MAX 32 -#endif struct btree_trans_commit_hook; typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); @@ -352,55 +407,120 @@ struct btree_trans_commit_hook { struct btree_trans_commit_hook *next; }; +#define BTREE_TRANS_MEM_MAX (1U << 16) + +#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 + struct btree_trans { struct bch_fs *c; -#ifdef CONFIG_BCACHEFS_DEBUG + const char *fn; + struct closure ref; struct list_head list; - struct btree *locking; - unsigned locking_iter_idx; - struct bpos locking_pos; - u8 locking_btree_id; - u8 locking_level; - pid_t pid; -#endif - unsigned long ip; + u64 last_begin_time; + + u8 lock_may_not_fail; + u8 lock_must_abort; + struct btree_bkey_cached_common *locking; + struct six_lock_waiter locking_wait; + int srcu_idx; + u8 fn_idx; + u8 nr_sorted; u8 nr_updates; - u8 nr_updates2; - unsigned used_mempool:1; - unsigned error:1; - unsigned in_traverse_all:1; + u8 nr_wb_updates; + u8 wb_updates_size; + bool used_mempool:1; + bool in_traverse_all:1; + bool paths_sorted:1; + bool memory_allocation_failure:1; + bool journal_transaction_names:1; + bool journal_replay_not_finished:1; + bool is_initial_gc:1; + bool notrace_relock_fail:1; + enum bch_errcode restarted:16; + u32 restart_count; + unsigned long last_begin_ip; + unsigned long last_restarted_ip; + unsigned long srcu_lock_time; - u64 iters_linked; - u64 iters_live; - u64 iters_touched; + /* + * For when bch2_trans_update notices we'll be splitting a compressed + * extent: + */ + unsigned extra_journal_res; + unsigned nr_max_paths; + + u64 paths_allocated; unsigned mem_top; + unsigned mem_max; unsigned mem_bytes; void *mem; - struct btree_iter *iters; + u8 sorted[BTREE_ITER_MAX + 8]; + struct btree_path *paths; struct btree_insert_entry *updates; - struct btree_insert_entry *updates2; + struct btree_write_buffered_key *wb_updates; /* update path: */ struct btree_trans_commit_hook *hooks; - struct jset_entry *extra_journal_entries; - unsigned extra_journal_entry_u64s; + darray_u64 extra_journal_entries; struct journal_entry_pin *journal_pin; struct journal_res journal_res; struct journal_preres journal_preres; u64 *journal_seq; struct disk_reservation *disk_res; - unsigned flags; unsigned journal_u64s; unsigned journal_preres_u64s; struct replicas_delta_list *fs_usage_deltas; }; -#define BTREE_FLAG(flag) \ +#define BCH_BTREE_WRITE_TYPES() \ + x(initial, 0) \ + x(init_next_bset, 1) \ + x(cache_reclaim, 2) \ + x(journal_reclaim, 3) \ + x(interior, 4) + +enum btree_write_type { +#define x(t, n) BTREE_WRITE_##t, + BCH_BTREE_WRITE_TYPES() +#undef x + BTREE_WRITE_TYPE_NR, +}; + +#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1) +#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR)) + +#define BTREE_FLAGS() \ + x(read_in_flight) \ + x(read_error) \ + x(dirty) \ + x(need_write) \ + x(write_blocked) \ + x(will_make_reachable) \ + x(noevict) \ + x(write_idx) \ + x(accessed) \ + x(write_in_flight) \ + x(write_in_flight_inner) \ + x(just_written) \ + x(dying) \ + x(fake) \ + x(need_rewrite) \ + x(never_write) + +enum btree_flags { + /* First bits for btree node write type */ + BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1, +#define x(flag) BTREE_NODE_##flag, + BTREE_FLAGS() +#undef x +}; + +#define x(flag) \ static inline bool btree_node_ ## flag(struct btree *b) \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ \ @@ -410,34 +530,8 @@ static inline void set_btree_node_ ## flag(struct btree *b) \ static inline void clear_btree_node_ ## flag(struct btree *b) \ { clear_bit(BTREE_NODE_ ## flag, &b->flags); } -enum btree_flags { - BTREE_NODE_read_in_flight, - BTREE_NODE_read_error, - BTREE_NODE_dirty, - BTREE_NODE_need_write, - BTREE_NODE_noevict, - BTREE_NODE_write_idx, - BTREE_NODE_accessed, - BTREE_NODE_write_in_flight, - BTREE_NODE_just_written, - BTREE_NODE_dying, - BTREE_NODE_fake, - BTREE_NODE_need_rewrite, - BTREE_NODE_never_write, -}; - -BTREE_FLAG(read_in_flight); -BTREE_FLAG(read_error); -BTREE_FLAG(need_write); -BTREE_FLAG(noevict); -BTREE_FLAG(write_idx); -BTREE_FLAG(accessed); -BTREE_FLAG(write_in_flight); -BTREE_FLAG(just_written); -BTREE_FLAG(dying); -BTREE_FLAG(fake); -BTREE_FLAG(need_rewrite); -BTREE_FLAG(never_write); +BTREE_FLAGS() +#undef x static inline struct btree_write *btree_current_write(struct btree *b) { @@ -549,7 +643,7 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) } enum btree_node_type { -#define x(kwd, val) BKEY_TYPE_##kwd = val, +#define x(kwd, val, ...) BKEY_TYPE_##kwd = val, BCH_BTREE_IDS() #undef x BKEY_TYPE_btree, @@ -567,87 +661,65 @@ static inline enum btree_node_type btree_node_type(struct btree *b) return __btree_node_type(b->c.level, b->c.btree_id); } -static inline bool btree_node_type_is_extents(enum btree_node_type type) -{ - switch (type) { - case BKEY_TYPE_extents: - case BKEY_TYPE_reflink: - return true; - default: - return false; - } -} - -static inline bool btree_node_is_extents(struct btree *b) -{ - return btree_node_type_is_extents(btree_node_type(b)); -} - -static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter) -{ - return __btree_node_type(iter->level, iter->btree_id); -} - -static inline bool btree_iter_is_extents(struct btree_iter *iter) -{ - return btree_node_type_is_extents(btree_iter_key_type(iter)); -} - #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ - ((1U << BKEY_TYPE_extents)| \ - (1U << BKEY_TYPE_inodes)| \ - (1U << BKEY_TYPE_stripes)| \ - (1U << BKEY_TYPE_reflink)| \ - (1U << BKEY_TYPE_btree)) + (BIT(BKEY_TYPE_extents)| \ + BIT(BKEY_TYPE_alloc)| \ + BIT(BKEY_TYPE_inodes)| \ + BIT(BKEY_TYPE_stripes)| \ + BIT(BKEY_TYPE_reflink)| \ + BIT(BKEY_TYPE_btree)) #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ - ((1U << BKEY_TYPE_alloc)| \ - (1U << BKEY_TYPE_stripes)) + (BIT(BKEY_TYPE_alloc)| \ + BIT(BKEY_TYPE_inodes)| \ + BIT(BKEY_TYPE_stripes)| \ + BIT(BKEY_TYPE_snapshots)) #define BTREE_NODE_TYPE_HAS_TRIGGERS \ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) -#define BTREE_ID_HAS_SNAPSHOTS \ - ((1U << BTREE_ID_extents)| \ - (1U << BTREE_ID_inodes)| \ - (1U << BTREE_ID_dirents)| \ - (1U << BTREE_ID_xattrs)) - -#define BTREE_ID_HAS_PTRS \ - ((1U << BTREE_ID_extents)| \ - (1U << BTREE_ID_reflink)) - -static inline bool btree_type_has_snapshots(enum btree_id id) +static inline bool btree_node_type_needs_gc(enum btree_node_type type) { - return (1 << id) & BTREE_ID_HAS_SNAPSHOTS; + return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); } -enum btree_trigger_flags { - __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ - - __BTREE_TRIGGER_INSERT, - __BTREE_TRIGGER_OVERWRITE, - __BTREE_TRIGGER_OVERWRITE_SPLIT, +static inline bool btree_node_type_is_extents(enum btree_node_type type) +{ + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << nr) + BCH_BTREE_IDS() +#undef x + ; - __BTREE_TRIGGER_GC, - __BTREE_TRIGGER_BUCKET_INVALIDATE, - __BTREE_TRIGGER_NOATOMIC, -}; + return (1U << type) & mask; +} -#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) +static inline bool btree_id_is_extents(enum btree_id btree) +{ + return btree_node_type_is_extents((enum btree_node_type) btree); +} -#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) -#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) -#define BTREE_TRIGGER_OVERWRITE_SPLIT (1U << __BTREE_TRIGGER_OVERWRITE_SPLIT) +static inline bool btree_type_has_snapshots(enum btree_id id) +{ + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) + BCH_BTREE_IDS() +#undef x + ; -#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) -#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) -#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) + return (1U << id) & mask; +} -static inline bool btree_node_type_needs_gc(enum btree_node_type type) +static inline bool btree_type_has_ptrs(enum btree_id id) { - return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) + BCH_BTREE_IDS() +#undef x + ; + + return (1U << id) & mask; } struct btree_root { @@ -660,21 +732,6 @@ struct btree_root { s8 error; }; -/* - * Optional hook that will be called just prior to a btree node update, when - * we're holding the write lock and we know what key is about to be overwritten: - */ - -enum btree_insert_ret { - BTREE_INSERT_OK, - /* leaf node needs to be split */ - BTREE_INSERT_BTREE_NODE_FULL, - BTREE_INSERT_ENOSPC, - BTREE_INSERT_NEED_MARK_REPLICAS, - BTREE_INSERT_NEED_JOURNAL_RES, - BTREE_INSERT_NEED_JOURNAL_RECLAIM, -}; - enum btree_gc_coalesce_fail_reason { BTREE_GC_COALESCE_FAIL_RESERVE_GET, BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC, @@ -686,8 +743,4 @@ enum btree_node_sibling { btree_next_sib, }; -typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *, - struct btree *, - struct btree_node_iter *); - #endif /* _BCACHEFS_BTREE_TYPES_H */