3 #include "btree_iter.h"
5 #include "journal_seq_blacklist.h"
9 * journal_seq_blacklist machinery:
11 * To guarantee order of btree updates after a crash, we need to detect when a
12 * btree node entry (bset) is newer than the newest journal entry that was
13 * successfully written, and ignore it - effectively ignoring any btree updates
14 * that didn't make it into the journal.
16 * If we didn't do this, we might have two btree nodes, a and b, both with
17 * updates that weren't written to the journal yet: if b was updated after a,
18 * but b was flushed and not a - oops; on recovery we'll find that the updates
19 * to b happened, but not the updates to a that happened before it.
21 * Ignoring bsets that are newer than the newest journal entry is always safe,
22 * because everything they contain will also have been journalled - and must
23 * still be present in the journal on disk until a journal entry has been
24 * written _after_ that bset was written.
26 * To accomplish this, bsets record the newest journal sequence number they
27 * contain updates for; then, on startup, the btree code queries the journal
28 * code to ask "Is this sequence number newer than the newest journal entry? If
31 * When this happens, we must blacklist that journal sequence number: the
32 * journal must not write any entries with that sequence number, and it must
33 * record that it was blacklisted so that a) on recovery we don't think we have
34 * missing journal entries and b) so that the btree code continues to ignore
35 * that bset, until that btree node is rewritten.
39 blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
42 ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
43 sizeof(struct journal_seq_blacklist_entry))
47 static unsigned sb_blacklist_u64s(unsigned nr)
49 struct bch_sb_field_journal_seq_blacklist *bl;
51 return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
54 static struct bch_sb_field_journal_seq_blacklist *
55 blacklist_entry_try_merge(struct bch_fs *c,
56 struct bch_sb_field_journal_seq_blacklist *bl,
59 unsigned nr = blacklist_nr_entries(bl);
61 if (le64_to_cpu(bl->start[i].end) >=
62 le64_to_cpu(bl->start[i + 1].start)) {
63 bl->start[i].end = bl->start[i + 1].end;
65 memmove(&bl->start[i],
67 sizeof(bl->start[0]) * (nr - i));
69 bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
70 sb_blacklist_u64s(nr));
77 int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
79 struct bch_sb_field_journal_seq_blacklist *bl;
83 mutex_lock(&c->sb_lock);
84 bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
85 nr = blacklist_nr_entries(bl);
88 for (i = 0; i < nr; i++) {
89 struct journal_seq_blacklist_entry *e =
92 if (start == le64_to_cpu(e->start) &&
93 end == le64_to_cpu(e->end))
96 if (start <= le64_to_cpu(e->start) &&
97 end >= le64_to_cpu(e->end)) {
98 e->start = cpu_to_le64(start);
99 e->end = cpu_to_le64(end);
102 bl = blacklist_entry_try_merge(c,
105 bl = blacklist_entry_try_merge(c,
112 bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
113 sb_blacklist_u64s(nr + 1));
119 bl->start[nr].start = cpu_to_le64(start);
120 bl->start[nr].end = cpu_to_le64(end);
122 c->disk_sb.sb->features[0] |=
123 1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
125 ret = bch2_write_super(c);
127 mutex_unlock(&c->sb_lock);
132 static int journal_seq_blacklist_table_cmp(const void *_l,
133 const void *_r, size_t size)
135 const struct journal_seq_blacklist_table_entry *l = _l;
136 const struct journal_seq_blacklist_table_entry *r = _r;
138 return cmp_int(l->start, r->start);
141 bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
144 struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
145 struct journal_seq_blacklist_table_entry search = { .start = seq };
151 idx = eytzinger0_find_le(t->entries, t->nr,
152 sizeof(t->entries[0]),
153 journal_seq_blacklist_table_cmp,
158 BUG_ON(t->entries[idx].start > seq);
160 if (seq >= t->entries[idx].end)
164 t->entries[idx].dirty = true;
168 int bch2_blacklist_table_initialize(struct bch_fs *c)
170 struct bch_sb_field_journal_seq_blacklist *bl =
171 bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
172 struct journal_seq_blacklist_table *t;
173 unsigned i, nr = blacklist_nr_entries(bl);
175 BUG_ON(c->journal_seq_blacklist_table);
180 t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
187 for (i = 0; i < nr; i++) {
188 t->entries[i].start = le64_to_cpu(bl->start[i].start);
189 t->entries[i].end = le64_to_cpu(bl->start[i].end);
192 eytzinger0_sort(t->entries,
194 sizeof(t->entries[0]),
195 journal_seq_blacklist_table_cmp,
198 c->journal_seq_blacklist_table = t;
203 bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
204 struct bch_sb_field *f)
206 struct bch_sb_field_journal_seq_blacklist *bl =
207 field_to_type(f, journal_seq_blacklist);
208 struct journal_seq_blacklist_entry *i;
209 unsigned nr = blacklist_nr_entries(bl);
211 for (i = bl->start; i < bl->start + nr; i++) {
212 if (le64_to_cpu(i->start) >=
214 return "entry start >= end";
216 if (i + 1 < bl->start + nr &&
217 le64_to_cpu(i[0].end) >
218 le64_to_cpu(i[1].start))
219 return "entries out of order";
225 static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
227 struct bch_sb_field *f)
229 struct bch_sb_field_journal_seq_blacklist *bl =
230 field_to_type(f, journal_seq_blacklist);
231 struct journal_seq_blacklist_entry *i;
232 unsigned nr = blacklist_nr_entries(bl);
234 for (i = bl->start; i < bl->start + nr; i++) {
238 pr_buf(out, "%llu-%llu",
239 le64_to_cpu(i->start),
240 le64_to_cpu(i->end));
244 const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
245 .validate = bch2_sb_journal_seq_blacklist_validate,
246 .to_text = bch2_sb_journal_seq_blacklist_to_text
249 void bch2_blacklist_entries_gc(struct work_struct *work)
251 struct bch_fs *c = container_of(work, struct bch_fs,
252 journal_seq_blacklist_gc_work);
253 struct journal_seq_blacklist_table *t;
254 struct bch_sb_field_journal_seq_blacklist *bl;
255 struct journal_seq_blacklist_entry *src, *dst;
256 struct btree_trans trans;
257 unsigned i, nr, new_nr;
260 bch2_trans_init(&trans, c, 0, 0);
262 for (i = 0; i < BTREE_ID_NR; i++) {
263 struct btree_iter *iter;
266 for_each_btree_node(&trans, iter, i, POS_MIN,
267 BTREE_ITER_PREFETCH, b)
268 if (test_bit(BCH_FS_STOPPING, &c->flags)) {
269 bch2_trans_exit(&trans);
272 bch2_trans_iter_free(&trans, iter);
275 ret = bch2_trans_exit(&trans);
279 mutex_lock(&c->sb_lock);
280 bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
284 nr = blacklist_nr_entries(bl);
287 t = c->journal_seq_blacklist_table;
290 for (src = bl->start, i = eytzinger0_first(t->nr);
291 src < bl->start + nr;
292 src++, i = eytzinger0_next(i, nr)) {
293 BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
294 BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
296 if (t->entries[i].dirty)
300 new_nr = dst - bl->start;
302 bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
305 bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
306 new_nr ? sb_blacklist_u64s(new_nr) : 0);
307 BUG_ON(new_nr && !bl);
310 c->disk_sb.sb->features[0] &=
311 ~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
316 mutex_unlock(&c->sb_lock);