-#ifndef _BCACHE_BTREE_TYPES_H
-#define _BCACHE_BTREE_TYPES_H
+#ifndef _BCACHEFS_BTREE_TYPES_H
+#define _BCACHEFS_BTREE_TYPES_H
#include <linux/list.h>
#include <linux/rhashtable.h>
-#include <linux/semaphore.h>
-#include <linux/workqueue.h>
#include "bkey_methods.h"
#include "journal_types.h"
#include "six.h"
struct open_bucket;
-struct btree_interior_update;
+struct btree_update;
#define MAX_BSETS 3U
struct closure_waitlist wait;
};
+struct btree_ob_ref {
+ u8 nr;
+ u8 refs[BCH_REPLICAS_MAX];
+};
+
+struct btree_alloc {
+ struct btree_ob_ref ob;
+ BKEY_PADDED(k);
+};
+
struct btree {
/* Hottest entries first */
struct rhash_head hash;
* node to point to them: we update the parent in memory immediately,
* but then we must wait until the children have been written out before
* the update to the parent can be written - this is a list of the
- * btree_interior_updates that are blocking this node from being
+ * btree_updates that are blocking this node from being
* written:
*/
struct list_head write_blocked;
* another write - because that write also won't yet be reachable and
* marking it as completed before it's reachable would be incorrect:
*/
- struct list_head reachable;
+ unsigned long will_make_reachable;
- struct open_bucket *ob;
+ struct btree_ob_ref ob;
/* lru list */
struct list_head list;
#endif
};
+struct btree_cache {
+ struct rhashtable table;
+ bool table_init_done;
+ /*
+ * We never free a struct btree, except on shutdown - we just put it on
+ * the btree_cache_freed list and reuse it later. This simplifies the
+ * code, and it doesn't cost us much memory as the memory usage is
+ * dominated by buffers that hold the actual btree node data and those
+ * can be freed - and the number of struct btrees allocated is
+ * effectively bounded.
+ *
+ * btree_cache_freeable effectively is a small cache - we use it because
+ * high order page allocations can be rather expensive, and it's quite
+ * common to delete and allocate btree nodes in quick succession. It
+ * should never grow past ~2-3 nodes in practice.
+ */
+ struct mutex lock;
+ struct list_head live;
+ struct list_head freeable;
+ struct list_head freed;
+
+ /* Number of elements in live + freeable lists */
+ unsigned used;
+ unsigned reserve;
+ struct shrinker shrink;
+
+ /*
+ * If we need to allocate memory for a new btree node and that
+ * allocation fails, we can cannibalize another node in the btree cache
+ * to satisfy the allocation - lock to guarantee only one thread does
+ * this at a time:
+ */
+ struct task_struct *alloc_lock;
+ struct closure_waitlist alloc_wait;
+};
+
+struct btree_node_iter {
+ u8 is_extents;
+
+ struct btree_node_iter_set {
+ u16 k, end;
+ } data[MAX_BSETS];
+};
+
+enum btree_iter_type {
+ BTREE_ITER_KEYS,
+ BTREE_ITER_SLOTS,
+ BTREE_ITER_NODES,
+};
+
+#define BTREE_ITER_TYPE ((1 << 2) - 1)
+
+#define BTREE_ITER_INTENT (1 << 2)
+#define BTREE_ITER_PREFETCH (1 << 3)
+/*
+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
+ * @pos or the first key strictly greater than @pos
+ */
+#define BTREE_ITER_IS_EXTENTS (1 << 4)
+/*
+ * indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
+ */
+#define BTREE_ITER_AT_END_OF_LEAF (1 << 5)
+#define BTREE_ITER_ERROR (1 << 6)
+
+enum btree_iter_uptodate {
+ BTREE_ITER_UPTODATE = 0,
+ BTREE_ITER_NEED_PEEK = 1,
+ BTREE_ITER_NEED_RELOCK = 2,
+ BTREE_ITER_NEED_TRAVERSE = 3,
+};
+
+/*
+ * @pos - iterator's current position
+ * @level - current btree depth
+ * @locks_want - btree level below which we start taking intent locks
+ * @nodes_locked - bitmask indicating which nodes in @nodes are locked
+ * @nodes_intent_locked - bitmask indicating which locks are intent locks
+ */
+struct btree_iter {
+ struct bch_fs *c;
+ struct bpos pos;
+
+ u8 flags;
+ enum btree_iter_uptodate uptodate:4;
+ enum btree_id btree_id:4;
+ unsigned level:4,
+ locks_want:4,
+ nodes_locked:4,
+ nodes_intent_locked:4;
+
+ struct btree_iter_level {
+ struct btree *b;
+ struct btree_node_iter iter;
+ } l[BTREE_MAX_DEPTH];
+
+ u32 lock_seq[BTREE_MAX_DEPTH];
+
+ /*
+ * Current unpacked key - so that bch2_btree_iter_next()/
+ * bch2_btree_iter_next_slot() can correctly advance pos.
+ */
+ struct bkey k;
+
+ /*
+ * Circular linked list of linked iterators: linked iterators share
+ * locks (e.g. two linked iterators may have the same node intent
+ * locked, or read and write locked, at the same time), and insertions
+ * through one iterator won't invalidate the other linked iterators.
+ */
+
+ /* Must come last: */
+ struct btree_iter *next;
+};
+
#define BTREE_FLAG(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
{ clear_bit(BTREE_NODE_ ## flag, &b->flags); }
enum btree_flags {
+ BTREE_NODE_read_in_flight,
BTREE_NODE_read_error,
- BTREE_NODE_write_error,
BTREE_NODE_dirty,
BTREE_NODE_need_write,
BTREE_NODE_noevict,
BTREE_NODE_accessed,
BTREE_NODE_write_in_flight,
BTREE_NODE_just_written,
+ BTREE_NODE_dying,
+ BTREE_NODE_fake,
};
+BTREE_FLAG(read_in_flight);
BTREE_FLAG(read_error);
-BTREE_FLAG(write_error);
BTREE_FLAG(dirty);
BTREE_FLAG(need_write);
BTREE_FLAG(noevict);
BTREE_FLAG(accessed);
BTREE_FLAG(write_in_flight);
BTREE_FLAG(just_written);
+BTREE_FLAG(dying);
+BTREE_FLAG(fake);
static inline struct btree_write *btree_current_write(struct btree *b)
{
static inline const struct bkey_ops *btree_node_ops(struct btree *b)
{
- return bch2_bkey_ops[btree_node_type(b)];
+ return &bch2_bkey_ops[btree_node_type(b)];
}
static inline bool btree_node_has_ptrs(struct btree *b)
struct btree_root {
struct btree *b;
- struct btree_interior_update *as;
+ struct btree_update *as;
/* On disk root - see async splits: */
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
struct btree_iter;
struct btree_node_iter;
-enum extent_insert_hook_ret {
- BTREE_HOOK_DO_INSERT,
- BTREE_HOOK_NO_INSERT,
- BTREE_HOOK_RESTART_TRANS,
-};
-
-struct extent_insert_hook {
- enum extent_insert_hook_ret
- (*fn)(struct extent_insert_hook *, struct bpos, struct bpos,
- struct bkey_s_c, const struct bkey_i *);
-};
-
enum btree_insert_ret {
BTREE_INSERT_OK,
/* extent spanned multiple leaf nodes: have to traverse to next node: */
BTREE_INSERT_NEED_GC_LOCK,
};
+struct extent_insert_hook {
+ enum btree_insert_ret
+ (*fn)(struct extent_insert_hook *, struct bpos, struct bpos,
+ struct bkey_s_c, const struct bkey_i *);
+};
+
enum btree_gc_coalesce_fail_reason {
BTREE_GC_COALESCE_FAIL_RESERVE_GET,
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
};
+enum btree_node_sibling {
+ btree_prev_sib,
+ btree_next_sib,
+};
+
typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *,
struct btree *,
struct btree_node_iter *);
-#endif /* _BCACHE_BTREE_TYPES_H */
+#endif /* _BCACHEFS_BTREE_TYPES_H */