-#ifndef _BCACHE_ALLOC_TYPES_H
-#define _BCACHE_ALLOC_TYPES_H
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_TYPES_H
+#define _BCACHEFS_ALLOC_TYPES_H
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include "clock_types.h"
+#include "fifo.h"
+
+struct bucket_alloc_state {
+ u64 buckets_seen;
+ u64 skipped_open;
+ u64 skipped_need_journal_commit;
+ u64 skipped_nocow;
+ u64 skipped_nouse;
+};
+
+#define BCH_WATERMARKS() \
+ x(stripe) \
+ x(normal) \
+ x(copygc) \
+ x(btree) \
+ x(btree_copygc) \
+ x(reclaim)
+
+enum bch_watermark {
+#define x(name) BCH_WATERMARK_##name,
+ BCH_WATERMARKS()
+#undef x
+ BCH_WATERMARK_NR,
+};
+
+#define BCH_WATERMARK_BITS 3
+#define BCH_WATERMARK_MASK ~(~0U << BCH_WATERMARK_BITS)
+
+#define OPEN_BUCKETS_COUNT 1024
+
+#define WRITE_POINT_HASH_NR 32
+#define WRITE_POINT_MAX 32
/*
- * There's two of these clocks, one for reads and one for writes:
- *
- * All fields protected by bucket_lock
+ * 0 is never a valid open_bucket_idx_t:
*/
-struct prio_clock {
+typedef u16 open_bucket_idx_t;
+
+struct open_bucket {
+ spinlock_t lock;
+ atomic_t pin;
+ open_bucket_idx_t freelist;
+ open_bucket_idx_t hash;
+
/*
- * "now" in (read/write) IO time - incremented whenever we do X amount
- * of reads or writes.
- *
- * Goes with the bucket read/write prios: when we read or write to a
- * bucket we reset the bucket's prio to the current hand; thus hand -
- * prio = time since bucket was last read/written.
- *
- * The units are some amount (bytes/sectors) of data read/written, and
- * the units can change on the fly if we need to rescale to fit
- * everything in a u16 - your only guarantee is that the units are
- * consistent.
+ * When an open bucket has an ec_stripe attached, this is the index of
+ * the block in the stripe this open_bucket corresponds to:
*/
- u16 hand;
- u16 min_prio;
+ u8 ec_idx;
+ enum bch_data_type data_type:6;
+ unsigned valid:1;
+ unsigned on_partial_list:1;
+
+ u8 dev;
+ u8 gen;
+ u32 sectors_free;
+ u64 bucket;
+ struct ec_stripe_new *ec;
+};
- int rw;
+#define OPEN_BUCKET_LIST_MAX 15
- struct io_timer rescale;
+struct open_buckets {
+ open_bucket_idx_t nr;
+ open_bucket_idx_t v[OPEN_BUCKET_LIST_MAX];
};
-/* There is one reserve for each type of btree, one for prios and gens
- * and one for moving GC */
-enum alloc_reserve {
- RESERVE_PRIO,
- RESERVE_BTREE,
- RESERVE_METADATA_LAST = RESERVE_BTREE,
- RESERVE_MOVINGGC,
-
- RESERVE_NONE,
- RESERVE_NR,
+struct dev_stripe_state {
+ u64 next_alloc[BCH_SB_MEMBERS_MAX];
};
-static inline bool allocation_is_metadata(enum alloc_reserve id)
-{
- return id <= RESERVE_METADATA_LAST;
-}
+#define WRITE_POINT_STATES() \
+ x(stopped) \
+ x(waiting_io) \
+ x(waiting_work) \
+ x(running)
+
+enum write_point_state {
+#define x(n) WRITE_POINT_##n,
+ WRITE_POINT_STATES()
+#undef x
+ WRITE_POINT_STATE_NR
+};
-struct dev_group {
- spinlock_t lock;
- unsigned nr;
- unsigned cur_device;
+struct write_point {
struct {
- u64 weight;
- struct bch_dev *dev;
- } d[BCH_SB_MEMBERS_MAX];
-};
+ struct hlist_node node;
+ struct mutex lock;
+ u64 last_used;
+ unsigned long write_point;
+ enum bch_data_type data_type;
-/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
-#define OPEN_BUCKETS_COUNT 256
+ /* calculated based on how many pointers we're actually going to use: */
+ unsigned sectors_free;
-#define WRITE_POINT_COUNT 16
+ struct open_buckets ptrs;
+ struct dev_stripe_state stripe;
-struct open_bucket {
- struct list_head list;
- struct mutex lock;
- atomic_t pin;
- bool has_full_ptrs;
- /*
- * recalculated every time we allocate from this open_bucket based on
- * how many pointers we're actually going to use:
- */
- unsigned sectors_free;
- unsigned nr_ptrs;
- struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
- unsigned ptr_offset[BCH_REPLICAS_MAX];
-};
+ u64 sectors_allocated;
+ } __aligned(SMP_CACHE_BYTES);
-struct write_point {
- struct open_bucket *b;
+ struct {
+ struct work_struct index_update_work;
- /*
- * Throttle writes to this write point if tier 0 is full?
- */
- bool throttle;
+ struct list_head writes;
+ spinlock_t writes_lock;
- /*
- * If not NULL, cache group for tiering, promotion and moving GC -
- * always allocates a single replica
- */
- struct dev_group *group;
+ enum write_point_state state;
+ u64 last_state_change;
+ u64 time[WRITE_POINT_STATE_NR];
+ } __aligned(SMP_CACHE_BYTES);
+};
- /*
- * Otherwise do a normal replicated bucket allocation that could come
- * from any device in tier 0 (foreground write)
- */
+struct write_point_specifier {
+ unsigned long v;
};
-#endif /* _BCACHE_ALLOC_TYPES_H */
+#endif /* _BCACHEFS_ALLOC_TYPES_H */