X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Falloc_types.h;h=b91b7a46105608d089828db3bd65d1cc359475af;hb=d101ad4a61ce48c498936b28eedcf0e01a568d49;hp=1bf48ef93a7390734c4bb9a8e0a1d042b39c402d;hpb=5ec39af8eaba49aee7bafa44c661da39e2f40dc3;p=bcachefs-tools-debian diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h index 1bf48ef..b91b7a4 100644 --- a/libbcachefs/alloc_types.h +++ b/libbcachefs/alloc_types.h @@ -1,102 +1,126 @@ -#ifndef _BCACHE_ALLOC_TYPES_H -#define _BCACHE_ALLOC_TYPES_H +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ALLOC_TYPES_H +#define _BCACHEFS_ALLOC_TYPES_H #include +#include #include "clock_types.h" +#include "fifo.h" + +struct bucket_alloc_state { + u64 buckets_seen; + u64 skipped_open; + u64 skipped_need_journal_commit; + u64 skipped_nocow; + u64 skipped_nouse; +}; + +#define BCH_WATERMARKS() \ + x(stripe) \ + x(normal) \ + x(copygc) \ + x(btree) \ + x(btree_copygc) \ + x(reclaim) + +enum bch_watermark { +#define x(name) BCH_WATERMARK_##name, + BCH_WATERMARKS() +#undef x + BCH_WATERMARK_NR, +}; + +#define BCH_WATERMARK_BITS 3 +#define BCH_WATERMARK_MASK ~(~0U << BCH_WATERMARK_BITS) + +#define OPEN_BUCKETS_COUNT 1024 + +#define WRITE_POINT_HASH_NR 32 +#define WRITE_POINT_MAX 32 /* - * There's two of these clocks, one for reads and one for writes: - * - * All fields protected by bucket_lock + * 0 is never a valid open_bucket_idx_t: */ -struct prio_clock { +typedef u16 open_bucket_idx_t; + +struct open_bucket { + spinlock_t lock; + atomic_t pin; + open_bucket_idx_t freelist; + open_bucket_idx_t hash; + /* - * "now" in (read/write) IO time - incremented whenever we do X amount - * of reads or writes. - * - * Goes with the bucket read/write prios: when we read or write to a - * bucket we reset the bucket's prio to the current hand; thus hand - - * prio = time since bucket was last read/written. - * - * The units are some amount (bytes/sectors) of data read/written, and - * the units can change on the fly if we need to rescale to fit - * everything in a u16 - your only guarantee is that the units are - * consistent. + * When an open bucket has an ec_stripe attached, this is the index of + * the block in the stripe this open_bucket corresponds to: */ - u16 hand; - u16 min_prio; + u8 ec_idx; + enum bch_data_type data_type:6; + unsigned valid:1; + unsigned on_partial_list:1; + + u8 dev; + u8 gen; + u32 sectors_free; + u64 bucket; + struct ec_stripe_new *ec; +}; - int rw; +#define OPEN_BUCKET_LIST_MAX 15 - struct io_timer rescale; +struct open_buckets { + open_bucket_idx_t nr; + open_bucket_idx_t v[OPEN_BUCKET_LIST_MAX]; }; -/* There is one reserve for each type of btree, one for prios and gens - * and one for moving GC */ -enum alloc_reserve { - RESERVE_PRIO, - RESERVE_BTREE, - RESERVE_METADATA_LAST = RESERVE_BTREE, - RESERVE_MOVINGGC, - - RESERVE_NONE, - RESERVE_NR, +struct dev_stripe_state { + u64 next_alloc[BCH_SB_MEMBERS_MAX]; }; -static inline bool allocation_is_metadata(enum alloc_reserve id) -{ - return id <= RESERVE_METADATA_LAST; -} +#define WRITE_POINT_STATES() \ + x(stopped) \ + x(waiting_io) \ + x(waiting_work) \ + x(running) + +enum write_point_state { +#define x(n) WRITE_POINT_##n, + WRITE_POINT_STATES() +#undef x + WRITE_POINT_STATE_NR +}; -struct dev_group { - spinlock_t lock; - unsigned nr; - unsigned cur_device; +struct write_point { struct { - u64 weight; - struct bch_dev *dev; - } d[BCH_SB_MEMBERS_MAX]; -}; + struct hlist_node node; + struct mutex lock; + u64 last_used; + unsigned long write_point; + enum bch_data_type data_type; -/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */ -#define OPEN_BUCKETS_COUNT 256 + /* calculated based on how many pointers we're actually going to use: */ + unsigned sectors_free; -#define WRITE_POINT_COUNT 16 + struct open_buckets ptrs; + struct dev_stripe_state stripe; -struct open_bucket { - struct list_head list; - struct mutex lock; - atomic_t pin; - bool has_full_ptrs; - /* - * recalculated every time we allocate from this open_bucket based on - * how many pointers we're actually going to use: - */ - unsigned sectors_free; - unsigned nr_ptrs; - struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX]; - unsigned ptr_offset[BCH_REPLICAS_MAX]; -}; + u64 sectors_allocated; + } __aligned(SMP_CACHE_BYTES); -struct write_point { - struct open_bucket *b; + struct { + struct work_struct index_update_work; - /* - * Throttle writes to this write point if tier 0 is full? - */ - bool throttle; + struct list_head writes; + spinlock_t writes_lock; - /* - * If not NULL, cache group for tiering, promotion and moving GC - - * always allocates a single replica - */ - struct dev_group *group; + enum write_point_state state; + u64 last_state_change; + u64 time[WRITE_POINT_STATE_NR]; + } __aligned(SMP_CACHE_BYTES); +}; - /* - * Otherwise do a normal replicated bucket allocation that could come - * from any device in tier 0 (foreground write) - */ +struct write_point_specifier { + unsigned long v; }; -#endif /* _BCACHE_ALLOC_TYPES_H */ +#endif /* _BCACHEFS_ALLOC_TYPES_H */