#ifndef _BCACHEFS_BTREE_TYPES_H
#define _BCACHEFS_BTREE_TYPES_H
+#include <linux/darray_types.h>
#include <linux/list.h>
#include <linux/rhashtable.h>
-#include <linux/six.h>
-#include "bkey_methods.h"
+#include "bbpos_types.h"
+#include "btree_key_cache_types.h"
#include "buckets_types.h"
-#include "darray.h"
+#include "errcode.h"
#include "journal_types.h"
+#include "replicas_types.h"
+#include "six.h"
struct open_bucket;
struct btree_update;
struct six_lock lock;
u8 level;
u8 btree_id;
+ bool cached;
};
struct btree {
unsigned used;
unsigned reserve;
atomic_t dirty;
- struct shrinker shrink;
+ struct shrinker *shrink;
/*
* If we need to allocate memory for a new btree node and that
*/
struct task_struct *alloc_lock;
struct closure_waitlist alloc_wait;
+
+ struct bbpos pinned_nodes_start;
+ struct bbpos pinned_nodes_end;
+ u64 pinned_nodes_leaf_mask;
+ u64 pinned_nodes_interior_mask;
};
struct btree_node_iter {
/*
* Iterate over all possible positions, synthesizing deleted keys for holes:
*/
-#define BTREE_ITER_SLOTS (1 << 0)
-#define BTREE_ITER_ALL_LEVELS (1 << 1)
+static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0;
/*
* Indicates that intent locks should be taken on leaf nodes, because we expect
* to be doing updates:
*/
-#define BTREE_ITER_INTENT (1 << 2)
+static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1;
/*
* Causes the btree iterator code to prefetch additional btree nodes from disk:
*/
-#define BTREE_ITER_PREFETCH (1 << 3)
+static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2;
/*
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
* @pos or the first key strictly greater than @pos
*/
-#define BTREE_ITER_IS_EXTENTS (1 << 4)
-#define BTREE_ITER_NOT_EXTENTS (1 << 5)
-#define BTREE_ITER_CACHED (1 << 6)
-#define BTREE_ITER_WITH_KEY_CACHE (1 << 7)
-#define BTREE_ITER_WITH_UPDATES (1 << 8)
-#define BTREE_ITER_WITH_JOURNAL (1 << 9)
-#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 10)
-#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
-#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12)
-#define BTREE_ITER_NOPRESERVE (1 << 13)
+static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3;
+static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4;
+static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5;
+static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6;
+static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7;
+static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8;
+static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9;
+static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
+static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11;
+static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12;
+static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13;
+static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14;
+#define __BTREE_ITER_FLAGS_END 15
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
BTREE_ITER_NEED_TRAVERSE = 2,
};
+#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG)
+#define TRACK_PATH_ALLOCATED
+#endif
+
+typedef u16 btree_path_idx_t;
+
struct btree_path {
- u8 idx;
- u8 sorted_idx;
+ btree_path_idx_t sorted_idx;
u8 ref;
u8 intent_ref;
/* btree_iter_copy starts here: */
struct bpos pos;
- enum btree_id btree_id:4;
+ enum btree_id btree_id:5;
bool cached:1;
bool preserve:1;
enum btree_path_uptodate uptodate:2;
*/
bool should_be_locked:1;
unsigned level:3,
- locks_want:4,
- nodes_locked:4,
- nodes_intent_locked:4;
+ locks_want:3;
+ u8 nodes_locked;
struct btree_path_level {
struct btree *b;
u64 lock_taken_time;
#endif
} l[BTREE_MAX_DEPTH];
-#ifdef CONFIG_BCACHEFS_DEBUG
+#ifdef TRACK_PATH_ALLOCATED
unsigned long ip_allocated;
#endif
};
return path->l + path->level;
}
+static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
+{
+#ifdef TRACK_PATH_ALLOCATED
+ return path->ip_allocated;
+#else
+ return _THIS_IP_;
+#endif
+}
+
/*
* @pos - iterator's current position
* @level - current btree depth
*/
struct btree_iter {
struct btree_trans *trans;
- struct btree_path *path;
- struct btree_path *update_path;
- struct btree_path *key_cache_path;
+ btree_path_idx_t path;
+ btree_path_idx_t update_path;
+ btree_path_idx_t key_cache_path;
- enum btree_id btree_id:4;
- unsigned min_depth:3;
- unsigned advanced:1;
+ enum btree_id btree_id:8;
+ u8 min_depth;
/* btree_iter_copy starts here: */
u16 flags;
unsigned snapshot;
struct bpos pos;
- struct bpos pos_after_commit;
/*
* Current unpacked key - so that bch2_btree_iter_next()/
* bch2_btree_iter_next_slot() can correctly advance pos.
/* BTREE_ITER_WITH_JOURNAL: */
size_t journal_idx;
- struct bpos journal_pos;
-#ifdef CONFIG_BCACHEFS_DEBUG
+#ifdef TRACK_PATH_ALLOCATED
unsigned long ip_allocated;
#endif
};
-struct btree_key_cache_freelist {
- struct bkey_cached *objs[16];
- unsigned nr;
-};
-
-struct btree_key_cache {
- struct mutex lock;
- struct rhashtable table;
- bool table_init_done;
- struct list_head freed;
- struct shrinker shrink;
- unsigned shrink_iter;
- struct btree_key_cache_freelist __percpu *pcpu_freed;
-
- atomic_long_t nr_freed;
- atomic_long_t nr_keys;
- atomic_long_t nr_dirty;
-};
-
-struct bkey_cached_key {
- u32 btree_id;
- struct bpos pos;
-} __attribute__((packed, aligned(4)));
-
#define BKEY_CACHED_ACCESSED 0
#define BKEY_CACHED_DIRTY 1
struct rhash_head hash;
struct list_head list;
- struct journal_preres res;
struct journal_entry_pin journal;
+ u64 seq;
struct bkey_i *k;
};
+static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
+{
+ return !b->cached
+ ? container_of(b, struct btree, c)->key.k.p
+ : container_of(b, struct bkey_cached, c)->key.pos;
+}
+
struct btree_insert_entry {
unsigned flags;
u8 bkey_type;
* to the size of the key being overwritten in the btree:
*/
u8 old_btree_u64s;
+ btree_path_idx_t path;
struct bkey_i *k;
- struct btree_path *path;
/* key being overwritten: */
struct bkey old_k;
const struct bch_val *old_v;
unsigned long ip_allocated;
};
-#ifndef CONFIG_LOCKDEP
-#define BTREE_ITER_MAX 64
-#else
-#define BTREE_ITER_MAX 32
-#endif
+#define BTREE_ITER_INITIAL 64
+#define BTREE_ITER_MAX (1U << 10)
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000
+struct btree_trans_paths {
+ unsigned long nr_paths;
+ struct btree_path paths[];
+};
+
struct btree_trans {
struct bch_fs *c;
- const char *fn;
- struct list_head list;
- u64 last_begin_time;
- struct btree_bkey_cached_common *locking;
- unsigned locking_path_idx;
- struct bpos locking_pos;
- u8 locking_btree_id;
- u8 locking_level;
- u8 locking_lock_type;
- struct task_struct *task;
- int srcu_idx;
+ unsigned long *paths_allocated;
+ struct btree_path *paths;
+ btree_path_idx_t *sorted;
+ struct btree_insert_entry *updates;
+
+ void *mem;
+ unsigned mem_top;
+ unsigned mem_bytes;
+
+ btree_path_idx_t nr_sorted;
+ btree_path_idx_t nr_paths;
+ btree_path_idx_t nr_paths_max;
u8 fn_idx;
- u8 nr_sorted;
u8 nr_updates;
- u8 traverse_all_idx;
+ u8 lock_must_abort;
+ bool lock_may_not_fail:1;
+ bool srcu_held:1;
bool used_mempool:1;
bool in_traverse_all:1;
+ bool paths_sorted:1;
bool memory_allocation_failure:1;
- bool is_initial_gc:1;
+ bool journal_transaction_names:1;
+ bool journal_replay_not_finished:1;
+ bool notrace_relock_fail:1;
+ bool write_locked:1;
enum bch_errcode restarted:16;
u32 restart_count;
- unsigned long last_restarted_ip;
-
- /*
- * For when bch2_trans_update notices we'll be splitting a compressed
- * extent:
- */
- unsigned extra_journal_res;
-
- u64 paths_allocated;
- unsigned mem_top;
- unsigned mem_bytes;
- void *mem;
+ u64 last_begin_time;
+ unsigned long last_begin_ip;
+ unsigned long last_restarted_ip;
+ unsigned long srcu_lock_time;
- u8 sorted[BTREE_ITER_MAX];
- struct btree_path *paths;
- struct btree_insert_entry *updates;
+ const char *fn;
+ struct btree_bkey_cached_common *locking;
+ struct six_lock_waiter locking_wait;
+ int srcu_idx;
/* update path: */
+ u16 journal_entries_u64s;
+ u16 journal_entries_size;
+ struct jset_entry *journal_entries;
+
struct btree_trans_commit_hook *hooks;
- DARRAY(u64) extra_journal_entries;
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
- struct journal_preres journal_preres;
u64 *journal_seq;
struct disk_reservation *disk_res;
- unsigned flags;
+
+ struct bch_fs_usage_base fs_usage_delta;
+
unsigned journal_u64s;
- unsigned journal_preres_u64s;
+ unsigned extra_disk_res; /* XXX kill */
struct replicas_delta_list *fs_usage_deltas;
+
+ /* Entries before this are zeroed out on every bch2_trans_get() call */
+
+ struct list_head list;
+ struct closure ref;
+
+ unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)];
+ struct btree_trans_paths trans_paths;
+ struct btree_path _paths[BTREE_ITER_INITIAL];
+ btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4];
+ struct btree_insert_entry _updates[BTREE_ITER_INITIAL];
};
+static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter)
+{
+ return trans->paths + iter->path;
+}
+
+static inline struct btree_path *btree_iter_key_cache_path(struct btree_trans *trans, struct btree_iter *iter)
+{
+ return iter->key_cache_path
+ ? trans->paths + iter->key_cache_path
+ : NULL;
+}
+
+#define BCH_BTREE_WRITE_TYPES() \
+ x(initial, 0) \
+ x(init_next_bset, 1) \
+ x(cache_reclaim, 2) \
+ x(journal_reclaim, 3) \
+ x(interior, 4)
+
+enum btree_write_type {
+#define x(t, n) BTREE_WRITE_##t,
+ BCH_BTREE_WRITE_TYPES()
+#undef x
+ BTREE_WRITE_TYPE_NR,
+};
+
+#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
+#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR))
+
#define BTREE_FLAGS() \
x(read_in_flight) \
x(read_error) \
x(never_write)
enum btree_flags {
+ /* First bits for btree node write type */
+ BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1,
#define x(flag) BTREE_NODE_##flag,
BTREE_FLAGS()
#undef x
}
enum btree_node_type {
-#define x(kwd, val) BKEY_TYPE_##kwd = val,
+ BKEY_TYPE_btree,
+#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1,
BCH_BTREE_IDS()
#undef x
- BKEY_TYPE_btree,
+ BKEY_TYPE_NR
};
/* Type of a key in btree @id at level @level: */
static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
{
- return level ? BKEY_TYPE_btree : (enum btree_node_type) id;
+ return level ? BKEY_TYPE_btree : (unsigned) id + 1;
}
/* Type of keys @b contains: */
return __btree_node_type(b->c.level, b->c.btree_id);
}
+const char *bch2_btree_node_type_str(enum btree_node_type);
+
#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
- ((1U << BKEY_TYPE_extents)| \
- (1U << BKEY_TYPE_alloc)| \
- (1U << BKEY_TYPE_inodes)| \
- (1U << BKEY_TYPE_stripes)| \
- (1U << BKEY_TYPE_reflink)| \
- (1U << BKEY_TYPE_btree))
-
-#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
- ((1U << BKEY_TYPE_alloc)| \
- (1U << BKEY_TYPE_inodes)| \
- (1U << BKEY_TYPE_stripes)| \
- (1U << BKEY_TYPE_snapshots))
+ (BIT_ULL(BKEY_TYPE_extents)| \
+ BIT_ULL(BKEY_TYPE_alloc)| \
+ BIT_ULL(BKEY_TYPE_inodes)| \
+ BIT_ULL(BKEY_TYPE_stripes)| \
+ BIT_ULL(BKEY_TYPE_reflink)| \
+ BIT_ULL(BKEY_TYPE_subvolumes)| \
+ BIT_ULL(BKEY_TYPE_btree))
+
+#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \
+ (BIT_ULL(BKEY_TYPE_alloc)| \
+ BIT_ULL(BKEY_TYPE_inodes)| \
+ BIT_ULL(BKEY_TYPE_stripes)| \
+ BIT_ULL(BKEY_TYPE_snapshots))
#define BTREE_NODE_TYPE_HAS_TRIGGERS \
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
- BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
+ BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS)
-#define BTREE_ID_IS_EXTENTS \
- ((1U << BTREE_ID_extents)| \
- (1U << BTREE_ID_reflink)| \
- (1U << BTREE_ID_freespace))
+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+{
+ return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type);
+}
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
- return (1U << type) & BTREE_ID_IS_EXTENTS;
-}
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
+ BCH_BTREE_IDS()
+#undef x
+ ;
-#define BTREE_ID_HAS_SNAPSHOTS \
- ((1U << BTREE_ID_extents)| \
- (1U << BTREE_ID_inodes)| \
- (1U << BTREE_ID_dirents)| \
- (1U << BTREE_ID_xattrs))
+ return (1U << type) & mask;
+}
-#define BTREE_ID_HAS_PTRS \
- ((1U << BTREE_ID_extents)| \
- (1U << BTREE_ID_reflink))
+static inline bool btree_id_is_extents(enum btree_id btree)
+{
+ return btree_node_type_is_extents(__btree_node_type(0, btree));
+}
static inline bool btree_type_has_snapshots(enum btree_id id)
{
- return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
}
-static inline bool btree_type_has_ptrs(enum btree_id id)
+static inline bool btree_type_has_snapshot_field(enum btree_id id)
{
- return (1 << id) & BTREE_ID_HAS_PTRS;
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
}
-static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+static inline bool btree_type_has_ptrs(enum btree_id id)
{
- return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
}
struct btree_root {
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
u8 level;
u8 alive;
- s8 error;
-};
-
-enum btree_insert_ret {
- BTREE_INSERT_OK,
- /* leaf node needs to be split */
- BTREE_INSERT_BTREE_NODE_FULL,
- BTREE_INSERT_NEED_MARK_REPLICAS,
- BTREE_INSERT_NEED_JOURNAL_RES,
- BTREE_INSERT_NEED_JOURNAL_RECLAIM,
+ s16 error;
};
enum btree_gc_coalesce_fail_reason {
btree_next_sib,
};
+struct get_locks_fail {
+ unsigned l;
+ struct btree *b;
+};
+
#endif /* _BCACHEFS_BTREE_TYPES_H */