X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Falloc_types.h;h=b91b7a46105608d089828db3bd65d1cc359475af;hb=d101ad4a61ce48c498936b28eedcf0e01a568d49;hp=1bf48ef93a7390734c4bb9a8e0a1d042b39c402d;hpb=5ec39af8eaba49aee7bafa44c661da39e2f40dc3;p=bcachefs-tools-debian

diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h
index 1bf48ef..b91b7a4 100644
--- a/libbcachefs/alloc_types.h
+++ b/libbcachefs/alloc_types.h
@@ -1,102 +1,126 @@
-#ifndef _BCACHE_ALLOC_TYPES_H
-#define _BCACHE_ALLOC_TYPES_H
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_TYPES_H
+#define _BCACHEFS_ALLOC_TYPES_H
 
 #include <linux/mutex.h>
+#include <linux/spinlock.h>
 
 #include "clock_types.h"
+#include "fifo.h"
+
+struct bucket_alloc_state {
+	u64	buckets_seen;
+	u64	skipped_open;
+	u64	skipped_need_journal_commit;
+	u64	skipped_nocow;
+	u64	skipped_nouse;
+};
+
+#define BCH_WATERMARKS()		\
+	x(stripe)			\
+	x(normal)			\
+	x(copygc)			\
+	x(btree)			\
+	x(btree_copygc)			\
+	x(reclaim)
+
+enum bch_watermark {
+#define x(name)	BCH_WATERMARK_##name,
+	BCH_WATERMARKS()
+#undef x
+	BCH_WATERMARK_NR,
+};
+
+#define BCH_WATERMARK_BITS	3
+#define BCH_WATERMARK_MASK	~(~0U << BCH_WATERMARK_BITS)
+
+#define OPEN_BUCKETS_COUNT	1024
+
+#define WRITE_POINT_HASH_NR	32
+#define WRITE_POINT_MAX		32
 
 /*
- * There's two of these clocks, one for reads and one for writes:
- *
- * All fields protected by bucket_lock
+ * 0 is never a valid open_bucket_idx_t:
  */
-struct prio_clock {
+typedef u16			open_bucket_idx_t;
+
+struct open_bucket {
+	spinlock_t		lock;
+	atomic_t		pin;
+	open_bucket_idx_t	freelist;
+	open_bucket_idx_t	hash;
+
 	/*
-	 * "now" in (read/write) IO time - incremented whenever we do X amount
-	 * of reads or writes.
-	 *
-	 * Goes with the bucket read/write prios: when we read or write to a
-	 * bucket we reset the bucket's prio to the current hand; thus hand -
-	 * prio = time since bucket was last read/written.
-	 *
-	 * The units are some amount (bytes/sectors) of data read/written, and
-	 * the units can change on the fly if we need to rescale to fit
-	 * everything in a u16 - your only guarantee is that the units are
-	 * consistent.
+	 * When an open bucket has an ec_stripe attached, this is the index of
+	 * the block in the stripe this open_bucket corresponds to:
 	 */
-	u16			hand;
-	u16			min_prio;
+	u8			ec_idx;
+	enum bch_data_type	data_type:6;
+	unsigned		valid:1;
+	unsigned		on_partial_list:1;
+
+	u8			dev;
+	u8			gen;
+	u32			sectors_free;
+	u64			bucket;
+	struct ec_stripe_new	*ec;
+};
 
-	int			rw;
+#define OPEN_BUCKET_LIST_MAX	15
 
-	struct io_timer		rescale;
+struct open_buckets {
+	open_bucket_idx_t	nr;
+	open_bucket_idx_t	v[OPEN_BUCKET_LIST_MAX];
 };
 
-/* There is one reserve for each type of btree, one for prios and gens
- * and one for moving GC */
-enum alloc_reserve {
-	RESERVE_PRIO,
-	RESERVE_BTREE,
-	RESERVE_METADATA_LAST = RESERVE_BTREE,
-	RESERVE_MOVINGGC,
-
-	RESERVE_NONE,
-	RESERVE_NR,
+struct dev_stripe_state {
+	u64			next_alloc[BCH_SB_MEMBERS_MAX];
 };
 
-static inline bool allocation_is_metadata(enum alloc_reserve id)
-{
-	return id <= RESERVE_METADATA_LAST;
-}
+#define WRITE_POINT_STATES()		\
+	x(stopped)			\
+	x(waiting_io)			\
+	x(waiting_work)			\
+	x(running)
+
+enum write_point_state {
+#define x(n)	WRITE_POINT_##n,
+	WRITE_POINT_STATES()
+#undef x
+	WRITE_POINT_STATE_NR
+};
 
-struct dev_group {
-	spinlock_t		lock;
-	unsigned		nr;
-	unsigned		cur_device;
+struct write_point {
 	struct {
-		u64		weight;
-		struct bch_dev	*dev;
-	}			d[BCH_SB_MEMBERS_MAX];
-};
+		struct hlist_node	node;
+		struct mutex		lock;
+		u64			last_used;
+		unsigned long		write_point;
+		enum bch_data_type	data_type;
 
-/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
-#define OPEN_BUCKETS_COUNT	256
+		/* calculated based on how many pointers we're actually going to use: */
+		unsigned		sectors_free;
 
-#define WRITE_POINT_COUNT	16
+		struct open_buckets	ptrs;
+		struct dev_stripe_state	stripe;
 
-struct open_bucket {
-	struct list_head	list;
-	struct mutex		lock;
-	atomic_t		pin;
-	bool			has_full_ptrs;
-	/*
-	 * recalculated every time we allocate from this open_bucket based on
-	 * how many pointers we're actually going to use:
-	 */
-	unsigned		sectors_free;
-	unsigned		nr_ptrs;
-	struct bch_extent_ptr	ptrs[BCH_REPLICAS_MAX];
-	unsigned		ptr_offset[BCH_REPLICAS_MAX];
-};
+		u64			sectors_allocated;
+	} __aligned(SMP_CACHE_BYTES);
 
-struct write_point {
-	struct open_bucket	*b;
+	struct {
+		struct work_struct	index_update_work;
 
-	/*
-	 * Throttle writes to this write point if tier 0 is full?
-	 */
-	bool			throttle;
+		struct list_head	writes;
+		spinlock_t		writes_lock;
 
-	/*
-	 * If not NULL, cache group for tiering, promotion and moving GC -
-	 * always allocates a single replica
-	 */
-	struct dev_group	*group;
+		enum write_point_state	state;
+		u64			last_state_change;
+		u64			time[WRITE_POINT_STATE_NR];
+	} __aligned(SMP_CACHE_BYTES);
+};
 
-	/*
-	 * Otherwise do a normal replicated bucket allocation that could come
-	 * from any device in tier 0 (foreground write)
-	 */
+struct write_point_specifier {
+	unsigned long		v;
 };
 
-#endif /* _BCACHE_ALLOC_TYPES_H */
+#endif /* _BCACHEFS_ALLOC_TYPES_H */