]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/journal_types.h
Update bcachefs sources to 313b24b652 bcachefs: Fix an assertion
[bcachefs-tools-debian] / libbcachefs / journal_types.h
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _BCACHEFS_JOURNAL_TYPES_H
3 #define _BCACHEFS_JOURNAL_TYPES_H
4
5 #include <linux/cache.h>
6 #include <linux/workqueue.h>
7
8 #include "alloc_types.h"
9 #include "super_types.h"
10 #include "fifo.h"
11
12 #define JOURNAL_BUF_BITS        2
13 #define JOURNAL_BUF_NR          (1U << JOURNAL_BUF_BITS)
14 #define JOURNAL_BUF_MASK        (JOURNAL_BUF_NR - 1)
15
16 /*
17  * We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to
18  * the journal that are being staged or in flight.
19  */
20 struct journal_buf {
21         struct jset             *data;
22
23         __BKEY_PADDED(key, BCH_REPLICAS_MAX);
24
25         struct closure_waitlist wait;
26
27         unsigned                buf_size;       /* size in bytes of @data */
28         unsigned                sectors;        /* maximum size for current entry */
29         unsigned                disk_sectors;   /* maximum size entry could have been, if
30                                                    buf_size was bigger */
31         unsigned                u64s_reserved;
32         bool                    noflush;        /* write has already been kicked off, and was noflush */
33         bool                    must_flush;     /* something wants a flush */
34         bool                    separate_flush;
35         /* bloom filter: */
36         unsigned long           has_inode[1024 / sizeof(unsigned long)];
37 };
38
39 /*
40  * Something that makes a journal entry dirty - i.e. a btree node that has to be
41  * flushed:
42  */
43
44 struct journal_entry_pin_list {
45         struct list_head                list;
46         struct list_head                flushed;
47         atomic_t                        count;
48         struct bch_devs_list            devs;
49 };
50
51 struct journal;
52 struct journal_entry_pin;
53 typedef void (*journal_pin_flush_fn)(struct journal *j,
54                                 struct journal_entry_pin *, u64);
55
56 struct journal_entry_pin {
57         struct list_head                list;
58         journal_pin_flush_fn            flush;
59         u64                             seq;
60 };
61
62 struct journal_res {
63         bool                    ref;
64         u8                      idx;
65         u16                     u64s;
66         u32                     offset;
67         u64                     seq;
68 };
69
70 /*
71  * For reserving space in the journal prior to getting a reservation on a
72  * particular journal entry:
73  */
74 struct journal_preres {
75         unsigned                u64s;
76 };
77
78 union journal_res_state {
79         struct {
80                 atomic64_t      counter;
81         };
82
83         struct {
84                 u64             v;
85         };
86
87         struct {
88                 u64             cur_entry_offset:20,
89                                 idx:2,
90                                 unwritten_idx:2,
91                                 buf0_count:10,
92                                 buf1_count:10,
93                                 buf2_count:10,
94                                 buf3_count:10;
95         };
96 };
97
98 union journal_preres_state {
99         struct {
100                 atomic64_t      counter;
101         };
102
103         struct {
104                 u64             v;
105         };
106
107         struct {
108                 u32             reserved;
109                 u32             remaining;
110         };
111 };
112
113 /* bytes: */
114 #define JOURNAL_ENTRY_SIZE_MIN          (64U << 10) /* 64k */
115 #define JOURNAL_ENTRY_SIZE_MAX          (4U  << 20) /* 4M */
116
117 /*
118  * We stash some journal state as sentinal values in cur_entry_offset:
119  * note - cur_entry_offset is in units of u64s
120  */
121 #define JOURNAL_ENTRY_OFFSET_MAX        ((1U << 20) - 1)
122
123 #define JOURNAL_ENTRY_CLOSED_VAL        (JOURNAL_ENTRY_OFFSET_MAX - 1)
124 #define JOURNAL_ENTRY_ERROR_VAL         (JOURNAL_ENTRY_OFFSET_MAX)
125
126 struct journal_space {
127         /* Units of 512 bytes sectors: */
128         unsigned        next_entry; /* How big the next journal entry can be */
129         unsigned        total;
130 };
131
132 enum journal_space_from {
133         journal_space_discarded,
134         journal_space_clean_ondisk,
135         journal_space_clean,
136         journal_space_total,
137         journal_space_nr,
138 };
139
140 /*
141  * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
142  * either because something's waiting on the write to complete or because it's
143  * been dirty too long and the timer's expired.
144  */
145
146 enum {
147         JOURNAL_REPLAY_DONE,
148         JOURNAL_STARTED,
149         JOURNAL_RECLAIM_STARTED,
150         JOURNAL_NEED_WRITE,
151         JOURNAL_MAY_GET_UNRESERVED,
152         JOURNAL_MAY_SKIP_FLUSH,
153 };
154
155 /* Embedded in struct bch_fs */
156 struct journal {
157         /* Fastpath stuff up front: */
158
159         unsigned long           flags;
160
161         union journal_res_state reservations;
162
163         /* Max size of current journal entry */
164         unsigned                cur_entry_u64s;
165         unsigned                cur_entry_sectors;
166
167         /*
168          * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
169          * insufficient devices:
170          */
171         enum {
172                 cur_entry_ok,
173                 cur_entry_blocked,
174                 cur_entry_journal_full,
175                 cur_entry_journal_pin_full,
176                 cur_entry_journal_stuck,
177                 cur_entry_insufficient_devices,
178         }                       cur_entry_error;
179
180         union journal_preres_state prereserved;
181
182         /* Reserved space in journal entry to be used just prior to write */
183         unsigned                entry_u64s_reserved;
184
185         unsigned                buf_size_want;
186
187         /*
188          * Two journal entries -- one is currently open for new entries, the
189          * other is possibly being written out.
190          */
191         struct journal_buf      buf[JOURNAL_BUF_NR];
192
193         spinlock_t              lock;
194
195         /* if nonzero, we may not open a new journal entry: */
196         unsigned                blocked;
197
198         /* Used when waiting because the journal was full */
199         wait_queue_head_t       wait;
200         struct closure_waitlist async_wait;
201         struct closure_waitlist preres_wait;
202
203         struct closure          io;
204         struct delayed_work     write_work;
205
206         /* Sequence number of most recent journal entry (last entry in @pin) */
207         atomic64_t              seq;
208
209         /* seq, last_seq from the most recent journal entry successfully written */
210         u64                     seq_ondisk;
211         u64                     flushed_seq_ondisk;
212         u64                     last_seq_ondisk;
213         u64                     err_seq;
214         u64                     last_empty_seq;
215
216         /*
217          * FIFO of journal entries whose btree updates have not yet been
218          * written out.
219          *
220          * Each entry is a reference count. The position in the FIFO is the
221          * entry's sequence number relative to @seq.
222          *
223          * The journal entry itself holds a reference count, put when the
224          * journal entry is written out. Each btree node modified by the journal
225          * entry also holds a reference count, put when the btree node is
226          * written.
227          *
228          * When a reference count reaches zero, the journal entry is no longer
229          * needed. When all journal entries in the oldest journal bucket are no
230          * longer needed, the bucket can be discarded and reused.
231          */
232         struct {
233                 u64 front, back, size, mask;
234                 struct journal_entry_pin_list *data;
235         }                       pin;
236
237         struct journal_space    space[journal_space_nr];
238
239         u64                     replay_journal_seq;
240         u64                     replay_journal_seq_end;
241
242         struct write_point      wp;
243         spinlock_t              err_lock;
244
245         struct mutex            reclaim_lock;
246         struct task_struct      *reclaim_thread;
247         bool                    reclaim_kicked;
248         u64                     nr_direct_reclaim;
249         u64                     nr_background_reclaim;
250
251         unsigned long           last_flushed;
252         struct journal_entry_pin *flush_in_progress;
253         wait_queue_head_t       pin_flush_wait;
254
255         /* protects advancing ja->discard_idx: */
256         struct mutex            discard_lock;
257         bool                    can_discard;
258
259         unsigned                write_delay_ms;
260         unsigned                reclaim_delay_ms;
261         unsigned long           last_flush_write;
262
263         u64                     res_get_blocked_start;
264         u64                     need_write_time;
265         u64                     write_start_time;
266
267         u64                     nr_flush_writes;
268         u64                     nr_noflush_writes;
269
270         struct time_stats       *write_time;
271         struct time_stats       *delay_time;
272         struct time_stats       *blocked_time;
273         struct time_stats       *flush_seq_time;
274
275 #ifdef CONFIG_DEBUG_LOCK_ALLOC
276         struct lockdep_map      res_map;
277 #endif
278 };
279
280 /*
281  * Embedded in struct bch_dev. First three fields refer to the array of journal
282  * buckets, in bch_sb.
283  */
284 struct journal_device {
285         /*
286          * For each journal bucket, contains the max sequence number of the
287          * journal writes it contains - so we know when a bucket can be reused.
288          */
289         u64                     *bucket_seq;
290
291         unsigned                sectors_free;
292
293         /*
294          * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
295          */
296         unsigned                discard_idx;            /* Next bucket to discard */
297         unsigned                dirty_idx_ondisk;
298         unsigned                dirty_idx;
299         unsigned                cur_idx;                /* Journal bucket we're currently writing to */
300         unsigned                nr;
301
302         u64                     *buckets;
303
304         /* Bio for journal reads/writes to this device */
305         struct bio              *bio;
306
307         /* for bch_journal_read_device */
308         struct closure          read;
309 };
310
311 /*
312  * journal_entry_res - reserve space in every journal entry:
313  */
314 struct journal_entry_res {
315         unsigned                u64s;
316 };
317
318 #endif /* _BCACHEFS_JOURNAL_TYPES_H */