#include <linux/list.h>
#include <linux/rhashtable.h>
-#include <linux/six.h>
-#include "bkey_methods.h"
+#include "btree_key_cache_types.h"
#include "buckets_types.h"
#include "darray.h"
+#include "errcode.h"
#include "journal_types.h"
+#include "replicas_types.h"
+#include "six.h"
struct open_bucket;
struct btree_update;
struct six_lock lock;
u8 level;
u8 btree_id;
+ bool cached;
};
struct btree {
unsigned used;
unsigned reserve;
atomic_t dirty;
- struct shrinker shrink;
+ struct shrinker *shrink;
/*
* If we need to allocate memory for a new btree node and that
/*
* Iterate over all possible positions, synthesizing deleted keys for holes:
*/
-#define BTREE_ITER_SLOTS (1 << 0)
-#define BTREE_ITER_ALL_LEVELS (1 << 1)
+static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0;
/*
* Indicates that intent locks should be taken on leaf nodes, because we expect
* to be doing updates:
*/
-#define BTREE_ITER_INTENT (1 << 2)
+static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1;
/*
* Causes the btree iterator code to prefetch additional btree nodes from disk:
*/
-#define BTREE_ITER_PREFETCH (1 << 3)
+static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2;
/*
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
* @pos or the first key strictly greater than @pos
*/
-#define BTREE_ITER_IS_EXTENTS (1 << 4)
-#define BTREE_ITER_NOT_EXTENTS (1 << 5)
-#define BTREE_ITER_CACHED (1 << 6)
-#define BTREE_ITER_WITH_KEY_CACHE (1 << 7)
-#define BTREE_ITER_WITH_UPDATES (1 << 8)
-#define BTREE_ITER_WITH_JOURNAL (1 << 9)
-#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 10)
-#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
-#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12)
-#define BTREE_ITER_NOPRESERVE (1 << 13)
+static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3;
+static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4;
+static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5;
+static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6;
+static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7;
+static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8;
+static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9;
+static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
+static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11;
+static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12;
+static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13;
+static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14;
+#define __BTREE_ITER_FLAGS_END 15
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
BTREE_ITER_NEED_TRAVERSE = 2,
};
+#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG)
+#define TRACK_PATH_ALLOCATED
+#endif
+
struct btree_path {
u8 idx;
u8 sorted_idx;
u8 ref;
u8 intent_ref;
+ u32 alloc_seq;
+ u32 downgrade_seq;
/* btree_iter_copy starts here: */
struct bpos pos;
- enum btree_id btree_id:4;
+ enum btree_id btree_id:5;
bool cached:1;
bool preserve:1;
enum btree_path_uptodate uptodate:2;
*/
bool should_be_locked:1;
unsigned level:3,
- locks_want:4,
- nodes_locked:4,
- nodes_intent_locked:4;
+ locks_want:3;
+ u8 nodes_locked;
struct btree_path_level {
struct btree *b;
u64 lock_taken_time;
#endif
} l[BTREE_MAX_DEPTH];
-#ifdef CONFIG_BCACHEFS_DEBUG
+#ifdef TRACK_PATH_ALLOCATED
unsigned long ip_allocated;
#endif
};
return path->l + path->level;
}
+static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
+{
+#ifdef TRACK_PATH_ALLOCATED
+ return path->ip_allocated;
+#else
+ return _THIS_IP_;
+#endif
+}
+
/*
* @pos - iterator's current position
* @level - current btree depth
struct btree_path *update_path;
struct btree_path *key_cache_path;
- enum btree_id btree_id:4;
- unsigned min_depth:3;
- unsigned advanced:1;
+ enum btree_id btree_id:8;
+ u8 min_depth;
/* btree_iter_copy starts here: */
u16 flags;
unsigned snapshot;
struct bpos pos;
- struct bpos pos_after_commit;
/*
* Current unpacked key - so that bch2_btree_iter_next()/
* bch2_btree_iter_next_slot() can correctly advance pos.
/* BTREE_ITER_WITH_JOURNAL: */
size_t journal_idx;
- struct bpos journal_pos;
-#ifdef CONFIG_BCACHEFS_DEBUG
+#ifdef TRACK_PATH_ALLOCATED
unsigned long ip_allocated;
#endif
};
-struct btree_key_cache_freelist {
- struct bkey_cached *objs[16];
- unsigned nr;
-};
-
-struct btree_key_cache {
- struct mutex lock;
- struct rhashtable table;
- bool table_init_done;
- struct list_head freed;
- struct shrinker shrink;
- unsigned shrink_iter;
- struct btree_key_cache_freelist __percpu *pcpu_freed;
-
- atomic_long_t nr_freed;
- atomic_long_t nr_keys;
- atomic_long_t nr_dirty;
-};
-
-struct bkey_cached_key {
- u32 btree_id;
- struct bpos pos;
-} __attribute__((packed, aligned(4)));
-
#define BKEY_CACHED_ACCESSED 0
#define BKEY_CACHED_DIRTY 1
struct rhash_head hash;
struct list_head list;
- struct journal_preres res;
struct journal_entry_pin journal;
+ u64 seq;
struct bkey_i *k;
};
+static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
+{
+ return !b->cached
+ ? container_of(b, struct btree, c)->key.k.p
+ : container_of(b, struct bkey_cached, c)->key.pos;
+}
+
struct btree_insert_entry {
unsigned flags;
u8 bkey_type;
unsigned long ip_allocated;
};
-#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
-#else
-#define BTREE_ITER_MAX 32
-#endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
struct btree_trans {
struct bch_fs *c;
const char *fn;
+ struct closure ref;
struct list_head list;
u64 last_begin_time;
+
+ u8 lock_may_not_fail;
+ u8 lock_must_abort;
struct btree_bkey_cached_common *locking;
- unsigned locking_path_idx;
- struct bpos locking_pos;
- u8 locking_btree_id;
- u8 locking_level;
- u8 locking_lock_type;
- struct task_struct *task;
+ struct six_lock_waiter locking_wait;
+
int srcu_idx;
+ u8 fn_idx;
u8 nr_sorted;
u8 nr_updates;
- u8 traverse_all_idx;
+ u8 nr_wb_updates;
+ u8 wb_updates_size;
+ bool srcu_held:1;
bool used_mempool:1;
bool in_traverse_all:1;
+ bool paths_sorted:1;
bool memory_allocation_failure:1;
- bool is_initial_gc:1;
+ bool journal_transaction_names:1;
+ bool journal_replay_not_finished:1;
+ bool notrace_relock_fail:1;
+ bool write_locked:1;
enum bch_errcode restarted:16;
u32 restart_count;
+ unsigned long last_begin_ip;
unsigned long last_restarted_ip;
+ unsigned long srcu_lock_time;
/*
* For when bch2_trans_update notices we'll be splitting a compressed
* extent:
*/
unsigned extra_journal_res;
+ unsigned nr_max_paths;
- u64 paths_allocated;
+ unsigned long paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)];
unsigned mem_top;
+ unsigned mem_max;
unsigned mem_bytes;
void *mem;
- u8 sorted[BTREE_ITER_MAX];
- struct btree_path *paths;
- struct btree_insert_entry *updates;
+ u8 sorted[BTREE_ITER_MAX + 8];
+ struct btree_path paths[BTREE_ITER_MAX];
+ struct btree_insert_entry updates[BTREE_ITER_MAX];
+ struct btree_write_buffered_key *wb_updates;
/* update path: */
struct btree_trans_commit_hook *hooks;
- DARRAY(u64) extra_journal_entries;
+ darray_u64 extra_journal_entries;
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
- struct journal_preres journal_preres;
u64 *journal_seq;
struct disk_reservation *disk_res;
- unsigned flags;
unsigned journal_u64s;
- unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
- int lock_name_idx;
};
+#define BCH_BTREE_WRITE_TYPES() \
+ x(initial, 0) \
+ x(init_next_bset, 1) \
+ x(cache_reclaim, 2) \
+ x(journal_reclaim, 3) \
+ x(interior, 4)
+
+enum btree_write_type {
+#define x(t, n) BTREE_WRITE_##t,
+ BCH_BTREE_WRITE_TYPES()
+#undef x
+ BTREE_WRITE_TYPE_NR,
+};
+
+#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
+#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR))
+
#define BTREE_FLAGS() \
x(read_in_flight) \
x(read_error) \
x(never_write)
enum btree_flags {
+ /* First bits for btree node write type */
+ BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1,
#define x(flag) BTREE_NODE_##flag,
BTREE_FLAGS()
#undef x
}
enum btree_node_type {
-#define x(kwd, val) BKEY_TYPE_##kwd = val,
+ BKEY_TYPE_btree,
+#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1,
BCH_BTREE_IDS()
#undef x
- BKEY_TYPE_btree,
+ BKEY_TYPE_NR
};
/* Type of a key in btree @id at level @level: */
static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
{
- return level ? BKEY_TYPE_btree : (enum btree_node_type) id;
+ return level ? BKEY_TYPE_btree : (unsigned) id + 1;
}
/* Type of keys @b contains: */
return __btree_node_type(b->c.level, b->c.btree_id);
}
+const char *bch2_btree_node_type_str(enum btree_node_type);
+
#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
- ((1U << BKEY_TYPE_extents)| \
- (1U << BKEY_TYPE_alloc)| \
- (1U << BKEY_TYPE_inodes)| \
- (1U << BKEY_TYPE_stripes)| \
- (1U << BKEY_TYPE_reflink)| \
- (1U << BKEY_TYPE_btree))
+ (BIT_ULL(BKEY_TYPE_extents)| \
+ BIT_ULL(BKEY_TYPE_alloc)| \
+ BIT_ULL(BKEY_TYPE_inodes)| \
+ BIT_ULL(BKEY_TYPE_stripes)| \
+ BIT_ULL(BKEY_TYPE_reflink)| \
+ BIT_ULL(BKEY_TYPE_btree))
#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
- ((1U << BKEY_TYPE_alloc)| \
- (1U << BKEY_TYPE_inodes)| \
- (1U << BKEY_TYPE_stripes)| \
- (1U << BKEY_TYPE_snapshots))
+ (BIT_ULL(BKEY_TYPE_alloc)| \
+ BIT_ULL(BKEY_TYPE_inodes)| \
+ BIT_ULL(BKEY_TYPE_stripes)| \
+ BIT_ULL(BKEY_TYPE_snapshots))
#define BTREE_NODE_TYPE_HAS_TRIGGERS \
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
-#define BTREE_ID_IS_EXTENTS \
- ((1U << BTREE_ID_extents)| \
- (1U << BTREE_ID_reflink)| \
- (1U << BTREE_ID_freespace))
+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+{
+ return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type);
+}
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
- return (1U << type) & BTREE_ID_IS_EXTENTS;
-}
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
+ BCH_BTREE_IDS()
+#undef x
+ ;
-#define BTREE_ID_HAS_SNAPSHOTS \
- ((1U << BTREE_ID_extents)| \
- (1U << BTREE_ID_inodes)| \
- (1U << BTREE_ID_dirents)| \
- (1U << BTREE_ID_xattrs))
+ return (1U << type) & mask;
+}
-#define BTREE_ID_HAS_PTRS \
- ((1U << BTREE_ID_extents)| \
- (1U << BTREE_ID_reflink))
+static inline bool btree_id_is_extents(enum btree_id btree)
+{
+ return btree_node_type_is_extents(__btree_node_type(0, btree));
+}
static inline bool btree_type_has_snapshots(enum btree_id id)
{
- return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
}
-static inline bool btree_type_has_ptrs(enum btree_id id)
+static inline bool btree_type_has_snapshot_field(enum btree_id id)
{
- return (1 << id) & BTREE_ID_HAS_PTRS;
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
}
-static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+static inline bool btree_type_has_ptrs(enum btree_id id)
{
- return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
}
struct btree_root {
s8 error;
};
-enum btree_insert_ret {
- BTREE_INSERT_OK,
- /* leaf node needs to be split */
- BTREE_INSERT_BTREE_NODE_FULL,
- BTREE_INSERT_NEED_MARK_REPLICAS,
- BTREE_INSERT_NEED_JOURNAL_RES,
- BTREE_INSERT_NEED_JOURNAL_RECLAIM,
-};
-
enum btree_gc_coalesce_fail_reason {
BTREE_GC_COALESCE_FAIL_RESERVE_GET,
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,