X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fjournal_types.h;h=8eea12a03c06e424918e592c85ee02c5b8769a75;hb=fec2d17f20a2fd290a1f85eee0a5e1a1c5e9ddfd;hp=ebc340adbe2319218074e579b43c0eed58c0468c;hpb=5ec39af8eaba49aee7bafa44c661da39e2f40dc3;p=bcachefs-tools-debian diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index ebc340a..8eea12a 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -1,10 +1,12 @@ -#ifndef _BCACHE_JOURNAL_TYPES_H -#define _BCACHE_JOURNAL_TYPES_H +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_JOURNAL_TYPES_H +#define _BCACHEFS_JOURNAL_TYPES_H #include #include #include "alloc_types.h" +#include "super_types.h" #include "fifo.h" struct journal_res; @@ -15,14 +17,16 @@ struct journal_res; */ struct journal_buf { struct jset *data; - struct closure_waitlist wait; - /* - * ugh, prio_buckets are stupid - need to convert them to new - * transaction machinery when it arrives - */ - unsigned nr_prio_buckets; + BKEY_PADDED(key); + struct closure_waitlist wait; + + unsigned buf_size; /* size in bytes of @data */ + unsigned sectors; /* maximum size for current entry */ + unsigned disk_sectors; /* maximum size entry could have been, if + buf_size was bigger */ + unsigned u64s_reserved; /* bloom filter: */ unsigned long has_inode[1024 / sizeof(unsigned long)]; }; @@ -34,34 +38,20 @@ struct journal_buf { struct journal_entry_pin_list { struct list_head list; + struct list_head flushed; atomic_t count; + struct bch_devs_list devs; }; struct journal; struct journal_entry_pin; -typedef void (*journal_pin_flush_fn)(struct journal *j, struct journal_entry_pin *); +typedef void (*journal_pin_flush_fn)(struct journal *j, + struct journal_entry_pin *, u64); struct journal_entry_pin { struct list_head list; journal_pin_flush_fn flush; - struct journal_entry_pin_list *pin_list; -}; - -/* corresponds to a btree node with a blacklisted bset: */ -struct blacklisted_node { - __le64 seq; - enum btree_id btree_id; - struct bpos pos; -}; - -struct journal_seq_blacklist { - struct list_head list; - u64 seq; - bool written; - struct journal_entry_pin pin; - - struct blacklisted_node *entries; - size_t nr_entries; + u64 seq; }; struct journal_res { @@ -72,6 +62,14 @@ struct journal_res { u64 seq; }; +/* + * For reserving space in the journal prior to getting a reservation on a + * particular journal entry: + */ +struct journal_preres { + unsigned u64s; +}; + union journal_res_state { struct { atomic64_t counter; @@ -90,11 +88,28 @@ union journal_res_state { }; }; -/* 4 mb, in bytes: */ -#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) +union journal_preres_state { + struct { + atomic64_t counter; + }; + + struct { + u64 v; + }; + + struct { + u32 reserved; + u32 remaining; + }; +}; + +/* bytes: */ +#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */ +#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */ /* * We stash some journal state as sentinal values in cur_entry_offset: + * note - cur_entry_offset is in units of u64s */ #define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1) @@ -111,6 +126,8 @@ enum { JOURNAL_REPLAY_DONE, JOURNAL_STARTED, JOURNAL_NEED_WRITE, + JOURNAL_NOT_EMPTY, + JOURNAL_MAY_GET_UNRESERVED, }; /* Embedded in struct bch_fs */ @@ -120,10 +137,23 @@ struct journal { unsigned long flags; union journal_res_state reservations; + + /* Max size of current journal entry */ unsigned cur_entry_u64s; - unsigned prev_buf_sectors; - unsigned cur_buf_sectors; - unsigned entry_size_max; /* bytes */ + unsigned cur_entry_sectors; + + /* + * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if + * insufficient devices: + */ + int cur_entry_error; + + union journal_preres_state prereserved; + + /* Reserved space in journal entry to be used just prior to write */ + unsigned entry_u64s_reserved; + + unsigned buf_size_want; /* * Two journal entries -- one is currently open for new entries, the @@ -133,8 +163,13 @@ struct journal { spinlock_t lock; + /* if nonzero, we may not open a new journal entry: */ + unsigned blocked; + /* Used when waiting because the journal was full */ wait_queue_head_t wait; + struct closure_waitlist async_wait; + struct closure_waitlist preres_wait; struct closure io; struct delayed_work write_work; @@ -142,7 +177,8 @@ struct journal { /* Sequence number of most recent journal entry (last entry in @pin) */ atomic64_t seq; - /* last_seq from the most recent journal entry written */ + /* seq, last_seq from the most recent journal entry successfully written */ + u64 seq_ondisk; u64 last_seq_ondisk; /* @@ -161,33 +197,26 @@ struct journal { * needed. When all journal entries in the oldest journal bucket are no * longer needed, the bucket can be discarded and reused. */ - DECLARE_FIFO(struct journal_entry_pin_list, pin); - struct journal_entry_pin_list *cur_pin_list; - - /* - * Protects the pin lists - the fifo itself is still protected by - * j->lock though: - */ - spinlock_t pin_lock; + struct { + u64 front, back, size, mask; + struct journal_entry_pin_list *data; + } pin; - struct mutex blacklist_lock; - struct list_head seq_blacklist; + u64 replay_journal_seq; + u64 replay_journal_seq_end; - BKEY_PADDED(key); - struct dev_group devs; + struct write_point wp; + spinlock_t err_lock; struct delayed_work reclaim_work; + struct mutex reclaim_lock; unsigned long last_flushed; + struct journal_entry_pin *flush_in_progress; + wait_queue_head_t pin_flush_wait; - /* protects advancing ja->last_idx: */ - struct mutex reclaim_lock; - - /* - * ugh: need to get prio_buckets converted over to the eventual new - * transaction machinery - */ - __le64 prio_buckets[BCH_SB_MEMBERS_MAX]; - unsigned nr_prio_buckets; + /* protects advancing ja->discard_idx: */ + struct mutex discard_lock; + bool can_discard; unsigned write_delay_ms; unsigned reclaim_delay_ms; @@ -219,17 +248,15 @@ struct journal_device { unsigned sectors_free; - /* Journal bucket we're currently writing to */ - unsigned cur_idx; - - /* Last journal bucket that still contains an open journal entry */ - /* - * j->lock and j->reclaim_lock must both be held to modify, j->lock - * sufficient to read: + * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx: */ - unsigned last_idx; + unsigned discard_idx; /* Next bucket to discard */ + unsigned dirty_idx_ondisk; + unsigned dirty_idx; + unsigned cur_idx; /* Journal bucket we're currently writing to */ unsigned nr; + u64 *buckets; /* Bio for journal reads/writes to this device */ @@ -239,4 +266,11 @@ struct journal_device { struct closure read; }; -#endif /* _BCACHE_JOURNAL_TYPES_H */ +/* + * journal_entry_res - reserve space in every journal entry: + */ +struct journal_entry_res { + unsigned u64s; +}; + +#endif /* _BCACHEFS_JOURNAL_TYPES_H */