+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BSET_H
#define _BCACHEFS_BSET_H
#include <linux/kernel.h>
#include <linux/types.h>
-#include "bcachefs_format.h"
+#include "bcachefs.h"
#include "bkey.h"
#include "bkey_methods.h"
#include "btree_types.h"
* first key in that range of bytes again.
*/
-extern bool bch2_expensive_debug_checks;
-
-static inline bool btree_keys_expensive_checks(const struct btree *b)
-{
-#ifdef CONFIG_BCACHEFS_DEBUG
- return bch2_expensive_debug_checks || *b->expensive_debug_checks;
-#else
- return false;
-#endif
-}
-
-struct btree_node_iter;
-struct btree_node_iter_set;
-
enum bset_aux_tree_type {
BSET_NO_AUX_TREE,
BSET_RO_AUX_TREE,
}
}
-typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
-
-static inline struct bkey
-bkey_unpack_key_format_checked(const struct btree *b,
- const struct bkey_packed *src)
-{
- struct bkey dst;
-
-#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
- {
- compiled_unpack_fn unpack_fn = b->aux_data;
- unpack_fn(&dst, src);
-
- if (btree_keys_expensive_checks(b)) {
- struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
-
- /*
- * hack around a harmless race when compacting whiteouts
- * for a write:
- */
- dst2.needs_whiteout = dst.needs_whiteout;
-
- BUG_ON(memcmp(&dst, &dst2, sizeof(dst)));
- }
- }
-#else
- dst = __bch2_bkey_unpack_key(&b->format, src);
-#endif
- return dst;
-}
-
-/**
- * bkey_unpack_key -- unpack just the key, not the value
+/*
+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
+ * it used to be 64, but I realized the lookup code would touch slightly less
+ * memory if it was 128.
+ *
+ * It definites the number of bytes (in struct bset) per struct bkey_float in
+ * the auxiliar search tree - when we're done searching the bset_float tree we
+ * have this many bytes left that we do a linear search over.
+ *
+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
+ * we're touching one fewer cacheline in the bset tree in exchange for one more
+ * cacheline in the linear search - but the linear search might stop before it
+ * gets to the second cacheline.
*/
-static inline struct bkey bkey_unpack_key(const struct btree *b,
- const struct bkey_packed *src)
-{
- return likely(bkey_packed(src))
- ? bkey_unpack_key_format_checked(b, src)
- : *packed_to_bkey_c(src);
-}
-static inline struct bpos
-bkey_unpack_pos_format_checked(const struct btree *b,
- const struct bkey_packed *src)
-{
-#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
- return bkey_unpack_key_format_checked(b, src).p;
-#else
- return __bkey_unpack_pos(&b->format, src);
-#endif
-}
+#define BSET_CACHELINE 256
-static inline struct bpos bkey_unpack_pos(const struct btree *b,
- const struct bkey_packed *src)
+static inline size_t btree_keys_cachelines(const struct btree *b)
{
- return likely(bkey_packed(src))
- ? bkey_unpack_pos_format_checked(b, src)
- : packed_to_bkey_c(src)->p;
+ return (1U << b->byte_order) / BSET_CACHELINE;
}
-/* Disassembled bkeys */
-
-static inline struct bkey_s_c bkey_disassemble(struct btree *b,
- const struct bkey_packed *k,
- struct bkey *u)
+static inline size_t btree_aux_data_bytes(const struct btree *b)
{
- *u = bkey_unpack_key(b, k);
-
- return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
+ return btree_keys_cachelines(b) * 8;
}
-/* non const version: */
-static inline struct bkey_s __bkey_disassemble(struct btree *b,
- struct bkey_packed *k,
- struct bkey *u)
+static inline size_t btree_aux_data_u64s(const struct btree *b)
{
- *u = bkey_unpack_key(b, k);
-
- return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
+ return btree_aux_data_bytes(b) / sizeof(u64);
}
-#define for_each_bset(_b, _t) \
+#define for_each_bset(_b, _t) \
for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
-static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
+#define bset_tree_for_each_key(_b, _t, _k) \
+ for (_k = btree_bkey_first(_b, _t); \
+ _k != btree_bkey_last(_b, _t); \
+ _k = bkey_p_next(_k))
+
+static inline bool bset_has_ro_aux_tree(const struct bset_tree *t)
{
return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
}
return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
}
-void bch2_btree_keys_free(struct btree *);
-int bch2_btree_keys_alloc(struct btree *, unsigned, gfp_t);
-void bch2_btree_keys_init(struct btree *, bool *);
+void bch2_btree_keys_init(struct btree *);
void bch2_bset_init_first(struct btree *, struct bset *);
-void bch2_bset_init_next(struct btree *, struct bset *);
+void bch2_bset_init_next(struct bch_fs *, struct btree *,
+ struct btree_node_entry *);
void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
-void bch2_bset_fix_invalidated_key(struct btree *, struct bset_tree *,
- struct bkey_packed *);
void bch2_bset_insert(struct btree *, struct btree_node_iter *,
struct bkey_packed *, struct bkey_i *, unsigned);
static inline int bkey_cmp_p_or_unp(const struct btree *b,
const struct bkey_packed *l,
const struct bkey_packed *r_packed,
- struct bpos *r)
+ const struct bpos *r)
{
EBUG_ON(r_packed && !bkey_packed(r_packed));
if (unlikely(!bkey_packed(l)))
- return bkey_cmp(packed_to_bkey_c(l)->p, *r);
+ return bpos_cmp(packed_to_bkey_c(l)->p, *r);
if (likely(r_packed))
return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
}
-/* Returns true if @k is after iterator position @pos */
-static inline bool btree_iter_pos_cmp(struct bpos pos, const struct bkey *k,
- bool strictly_greater)
+static inline struct bset_tree *
+bch2_bkey_to_bset_inlined(struct btree *b, struct bkey_packed *k)
{
- int cmp = bkey_cmp(k->p, pos);
+ unsigned offset = __btree_node_key_to_offset(b, k);
+ struct bset_tree *t;
- return cmp > 0 ||
- (cmp == 0 && !strictly_greater && !bkey_deleted(k));
+ for_each_bset(b, t)
+ if (offset <= t->end_offset) {
+ EBUG_ON(offset < btree_bkey_first_offset(t));
+ return t;
+ }
+
+ BUG();
}
-static inline bool btree_iter_pos_cmp_packed(const struct btree *b,
- struct bpos *pos,
- const struct bkey_packed *k,
- bool strictly_greater)
-{
- int cmp = bkey_cmp_left_packed(b, k, pos);
+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
- return cmp > 0 ||
- (cmp == 0 && !strictly_greater && !bkey_deleted(k));
-}
+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
+ struct bkey_packed *, unsigned);
-static inline bool btree_iter_pos_cmp_p_or_unp(const struct btree *b,
- struct bpos pos,
- const struct bkey_packed *pos_packed,
- const struct bkey_packed *k,
- bool strictly_greater)
+static inline struct bkey_packed *
+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
{
- int cmp = bkey_cmp_p_or_unp(b, k, pos_packed, &pos);
-
- return cmp > 0 ||
- (cmp == 0 && !strictly_greater && !bkey_deleted(k));
+ return bch2_bkey_prev_filter(b, t, k, 0);
}
-struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
-struct bkey_packed *bch2_bkey_prev_all(struct btree *, struct bset_tree *,
- struct bkey_packed *);
-struct bkey_packed *bch2_bkey_prev(struct btree *, struct bset_tree *,
- struct bkey_packed *);
-
-enum bch_extent_overlap {
- BCH_EXTENT_OVERLAP_ALL = 0,
- BCH_EXTENT_OVERLAP_BACK = 1,
- BCH_EXTENT_OVERLAP_FRONT = 2,
- BCH_EXTENT_OVERLAP_MIDDLE = 3,
-};
-
-/* Returns how k overlaps with m */
-static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
- const struct bkey *m)
+static inline struct bkey_packed *
+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
{
- int cmp1 = bkey_cmp(k->p, m->p) < 0;
- int cmp2 = bkey_cmp(bkey_start_pos(k),
- bkey_start_pos(m)) > 0;
-
- return (cmp1 << 1) + cmp2;
+ return bch2_bkey_prev_filter(b, t, k, 1);
}
/* Btree key iteration */
-struct btree_node_iter {
- u8 is_extents;
- u16 used;
-
- struct btree_node_iter_set {
- u16 k, end;
- } data[MAX_BSETS];
-};
-
-static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter,
- bool is_extents)
-{
- iter->used = 0;
- iter->is_extents = is_extents;
-}
-
void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
const struct bkey_packed *,
const struct bkey_packed *);
void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
- struct bpos, bool, bool);
+ struct bpos *);
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
- struct btree *, bool);
+ struct btree *);
struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
struct btree *,
struct bset_tree *);
void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *);
+void bch2_btree_node_iter_set_drop(struct btree_node_iter *,
+ struct btree_node_iter_set *);
void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *);
-#define btree_node_iter_for_each(_iter, _set) \
- for (_set = (_iter)->data; \
- _set < (_iter)->data + (_iter)->used; \
+#define btree_node_iter_for_each(_iter, _set) \
+ for (_set = (_iter)->data; \
+ _set < (_iter)->data + ARRAY_SIZE((_iter)->data) && \
+ (_set)->k != (_set)->end; \
_set++)
+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter,
+ unsigned i)
+{
+ return iter->data[i].k == iter->data[i].end;
+}
+
static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
{
- return !iter->used;
+ return __btree_node_iter_set_end(iter, 0);
}
-static inline int __btree_node_iter_cmp(bool is_extents,
- struct btree *b,
- struct bkey_packed *l,
- struct bkey_packed *r)
+/*
+ * When keys compare equal, deleted keys compare first:
+ *
+ * XXX: only need to compare pointers for keys that are both within a
+ * btree_node_iterator - we need to break ties for prev() to work correctly
+ */
+static inline int bkey_iter_cmp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
{
- /*
- * For non extents, when keys compare equal the deleted keys have to
- * come first - so that bch2_btree_node_iter_next_check() can detect
- * duplicate nondeleted keys (and possibly other reasons?)
- *
- * For extents, bkey_deleted() is used as a proxy for k->size == 0, so
- * deleted keys have to sort last.
- */
- return bkey_cmp_packed(b, l, r) ?: is_extents
- ? (int) bkey_deleted(l) - (int) bkey_deleted(r)
- : (int) bkey_deleted(r) - (int) bkey_deleted(l);
+ return bch2_bkey_cmp_packed(b, l, r)
+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
+ ?: cmp_int(l, r);
}
-static inline int btree_node_iter_cmp(struct btree_node_iter *iter,
- struct btree *b,
+static inline int btree_node_iter_cmp(const struct btree *b,
struct btree_node_iter_set l,
struct btree_node_iter_set r)
{
- return __btree_node_iter_cmp(iter->is_extents, b,
+ return bkey_iter_cmp(b,
__btree_node_offset_to_key(b, l.k),
__btree_node_offset_to_key(b, r.k));
}
-static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
- struct btree *b,
- const struct bkey_packed *k,
- const struct bkey_packed *end)
+/* These assume r (the search key) is not a deleted key: */
+static inline int bkey_iter_pos_cmp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bpos *r)
{
- if (k != end)
- iter->data[iter->used++] = (struct btree_node_iter_set) {
- __btree_node_key_to_offset(b, k),
- __btree_node_key_to_offset(b, end)
- };
+ return bkey_cmp_left_packed(b, l, r)
+ ?: -((int) bkey_deleted(l));
+}
+
+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r_packed,
+ const struct bpos *r)
+{
+ return bkey_cmp_p_or_unp(b, l, r_packed, r)
+ ?: -((int) bkey_deleted(l));
}
static inline struct bkey_packed *
__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
- struct btree *b)
+ struct btree *b)
{
return __btree_node_offset_to_key(b, iter->data->k);
}
static inline struct bkey_packed *
-bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
- struct btree *b)
+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, struct btree *b)
{
- return bch2_btree_node_iter_end(iter)
- ? NULL
- : __bch2_btree_node_iter_peek_all(iter, b);
+ return !bch2_btree_node_iter_end(iter)
+ ? __btree_node_offset_to_key(b, iter->data->k)
+ : NULL;
}
static inline struct bkey_packed *
bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
{
- struct bkey_packed *ret;
+ struct bkey_packed *k;
- while ((ret = bch2_btree_node_iter_peek_all(iter, b)) &&
- bkey_deleted(ret))
+ while ((k = bch2_btree_node_iter_peek_all(iter, b)) &&
+ bkey_deleted(k))
bch2_btree_node_iter_advance(iter, b);
- return ret;
+ return k;
}
static inline struct bkey_packed *
}
struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
- struct btree *);
+ struct btree *);
struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *,
- struct btree *);
-
-/*
- * Iterates over all _live_ keys - skipping deleted (and potentially
- * overlapping) keys
- */
-#define for_each_btree_node_key(b, k, iter, _is_extents) \
- for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
- ((k) = bch2_btree_node_iter_peek(iter, b)); \
- bch2_btree_node_iter_advance(iter, b))
+ struct btree *);
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
struct btree *,
struct bkey *);
-#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\
- for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
+#define for_each_btree_node_key(b, k, iter) \
+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
+ (k = bch2_btree_node_iter_peek((iter), (b))); \
+ bch2_btree_node_iter_advance(iter, b))
+
+#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
(k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
bch2_btree_node_iter_advance(iter, b))
n->unpacked_keys += sign;
}
+static inline void btree_keys_account_val_delta(struct btree *b,
+ struct bkey_packed *k,
+ int delta)
+{
+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
+
+ b->nr.live_u64s += delta;
+ b->nr.bset_u64s[t - b->set] += delta;
+}
+
#define btree_keys_account_key_add(_nr, _bset_idx, _k) \
btree_keys_account_key(_nr, _bset_idx, _k, 1)
#define btree_keys_account_key_drop(_nr, _bset_idx, _k) \
btree_keys_account_key(_nr, _bset_idx, _k, -1)
+#define btree_account_key_add(_b, _k) \
+ btree_keys_account_key(&(_b)->nr, \
+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
+#define btree_account_key_drop(_b, _k) \
+ btree_keys_account_key(&(_b)->nr, \
+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
+
struct bset_stats {
struct {
size_t nr, bytes;
} sets[BSET_TREE_NR_TYPES];
size_t floats;
- size_t failed_unpacked;
- size_t failed_prev;
- size_t failed_overflow;
+ size_t failed;
};
-void bch2_btree_keys_stats(struct btree *, struct bset_stats *);
-int bch2_bkey_print_bfloat(struct btree *, struct bkey_packed *,
- char *, size_t);
+void bch2_btree_keys_stats(const struct btree *, struct bset_stats *);
+void bch2_bfloat_to_text(struct printbuf *, struct btree *,
+ struct bkey_packed *);
/* Debug stuff */
-void bch2_dump_bset(struct btree *, struct bset *, unsigned);
-void bch2_dump_btree_node(struct btree *);
+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
+void bch2_dump_btree_node(struct bch_fs *, struct btree *);
void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
#ifdef CONFIG_BCACHEFS_DEBUG
void __bch2_verify_btree_nr_keys(struct btree *);
void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
-void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
- struct bkey_packed *);
+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
+ struct bkey_packed *, unsigned);
#else
static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b) {}
-static inline void bch2_verify_key_order(struct btree *b,
- struct btree_node_iter *iter,
- struct bkey_packed *where) {}
+static inline void bch2_verify_insert_pos(struct btree *b,
+ struct bkey_packed *where,
+ struct bkey_packed *insert,
+ unsigned clobber_u64s) {}
#endif
static inline void bch2_verify_btree_nr_keys(struct btree *b)
{
- if (btree_keys_expensive_checks(b))
+ if (bch2_debug_check_btree_accounting)
__bch2_verify_btree_nr_keys(b);
}