]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/alloc_types.h
Update bcachefs sources to dab31ca168 bcachefs: Add some logging for btree node rewri...
[bcachefs-tools-debian] / libbcachefs / alloc_types.h
index 1bf48ef93a7390734c4bb9a8e0a1d042b39c402d..2e6f4806925871bf513f17235e6680e067e4471f 100644 (file)
-#ifndef _BCACHE_ALLOC_TYPES_H
-#define _BCACHE_ALLOC_TYPES_H
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_TYPES_H
+#define _BCACHEFS_ALLOC_TYPES_H
 
 #include <linux/mutex.h>
+#include <linux/spinlock.h>
 
 #include "clock_types.h"
+#include "fifo.h"
+
+struct bucket_alloc_state {
+       u64     buckets_seen;
+       u64     skipped_open;
+       u64     skipped_need_journal_commit;
+       u64     skipped_nocow;
+       u64     skipped_nouse;
+};
+
+struct ec_bucket_buf;
+
+#define BCH_ALLOC_RESERVES()           \
+       x(btree_movinggc)               \
+       x(btree)                        \
+       x(movinggc)                     \
+       x(none)
+
+enum alloc_reserve {
+#define x(name)        RESERVE_##name,
+       BCH_ALLOC_RESERVES()
+#undef x
+};
+
+#define OPEN_BUCKETS_COUNT     1024
+
+#define WRITE_POINT_HASH_NR    32
+#define WRITE_POINT_MAX                32
 
 /*
- * There's two of these clocks, one for reads and one for writes:
- *
- * All fields protected by bucket_lock
+ * 0 is never a valid open_bucket_idx_t:
  */
-struct prio_clock {
+typedef u16                    open_bucket_idx_t;
+
+struct open_bucket {
+       spinlock_t              lock;
+       atomic_t                pin;
+       open_bucket_idx_t       freelist;
+       open_bucket_idx_t       hash;
+
        /*
-        * "now" in (read/write) IO time - incremented whenever we do X amount
-        * of reads or writes.
-        *
-        * Goes with the bucket read/write prios: when we read or write to a
-        * bucket we reset the bucket's prio to the current hand; thus hand -
-        * prio = time since bucket was last read/written.
-        *
-        * The units are some amount (bytes/sectors) of data read/written, and
-        * the units can change on the fly if we need to rescale to fit
-        * everything in a u16 - your only guarantee is that the units are
-        * consistent.
+        * When an open bucket has an ec_stripe attached, this is the index of
+        * the block in the stripe this open_bucket corresponds to:
         */
-       u16                     hand;
-       u16                     min_prio;
+       u8                      ec_idx;
+       enum bch_data_type      data_type:8;
+       unsigned                valid:1;
+       unsigned                on_partial_list:1;
+       unsigned                alloc_reserve:3;
+
+       u8                      dev;
+       u8                      gen;
+       u32                     sectors_free;
+       u64                     bucket;
+       struct ec_stripe_new    *ec;
+};
 
-       int                     rw;
+#define OPEN_BUCKET_LIST_MAX   15
 
-       struct io_timer         rescale;
+struct open_buckets {
+       open_bucket_idx_t       nr;
+       open_bucket_idx_t       v[OPEN_BUCKET_LIST_MAX];
 };
 
-/* There is one reserve for each type of btree, one for prios and gens
- * and one for moving GC */
-enum alloc_reserve {
-       RESERVE_PRIO,
-       RESERVE_BTREE,
-       RESERVE_METADATA_LAST = RESERVE_BTREE,
-       RESERVE_MOVINGGC,
-
-       RESERVE_NONE,
-       RESERVE_NR,
+struct dev_stripe_state {
+       u64                     next_alloc[BCH_SB_MEMBERS_MAX];
 };
 
-static inline bool allocation_is_metadata(enum alloc_reserve id)
-{
-       return id <= RESERVE_METADATA_LAST;
-}
+#define WRITE_POINT_STATES()           \
+       x(stopped)                      \
+       x(waiting_io)                   \
+       x(waiting_work)                 \
+       x(running)
+
+enum write_point_state {
+#define x(n)   WRITE_POINT_##n,
+       WRITE_POINT_STATES()
+#undef x
+       WRITE_POINT_STATE_NR
+};
 
-struct dev_group {
-       spinlock_t              lock;
-       unsigned                nr;
-       unsigned                cur_device;
+struct write_point {
        struct {
-               u64             weight;
-               struct bch_dev  *dev;
-       }                       d[BCH_SB_MEMBERS_MAX];
-};
+               struct hlist_node       node;
+               struct mutex            lock;
+               u64                     last_used;
+               unsigned long           write_point;
+               enum bch_data_type      data_type;
 
-/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
-#define OPEN_BUCKETS_COUNT     256
+               /* calculated based on how many pointers we're actually going to use: */
+               unsigned                sectors_free;
 
-#define WRITE_POINT_COUNT      16
+               struct open_buckets     ptrs;
+               struct dev_stripe_state stripe;
 
-struct open_bucket {
-       struct list_head        list;
-       struct mutex            lock;
-       atomic_t                pin;
-       bool                    has_full_ptrs;
-       /*
-        * recalculated every time we allocate from this open_bucket based on
-        * how many pointers we're actually going to use:
-        */
-       unsigned                sectors_free;
-       unsigned                nr_ptrs;
-       struct bch_extent_ptr   ptrs[BCH_REPLICAS_MAX];
-       unsigned                ptr_offset[BCH_REPLICAS_MAX];
-};
+               u64                     sectors_allocated;
+       } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
-struct write_point {
-       struct open_bucket      *b;
+       struct {
+               struct work_struct      index_update_work;
 
-       /*
-        * Throttle writes to this write point if tier 0 is full?
-        */
-       bool                    throttle;
+               struct list_head        writes;
+               spinlock_t              writes_lock;
 
-       /*
-        * If not NULL, cache group for tiering, promotion and moving GC -
-        * always allocates a single replica
-        */
-       struct dev_group        *group;
+               enum write_point_state  state;
+               u64                     last_state_change;
+               u64                     time[WRITE_POINT_STATE_NR];
+       } __attribute__((__aligned__(SMP_CACHE_BYTES)));
+};
 
-       /*
-        * Otherwise do a normal replicated bucket allocation that could come
-        * from any device in tier 0 (foreground write)
-        */
+struct write_point_specifier {
+       unsigned long           v;
 };
 
-#endif /* _BCACHE_ALLOC_TYPES_H */
+#endif /* _BCACHEFS_ALLOC_TYPES_H */