X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fjournal_io.c;h=c5bc58247146a2cdbc1eecb987db7ba667e9677f;hb=d101ad4a61ce48c498936b28eedcf0e01a568d49;hp=109c1157eba1d0c18aa510b94ac134356324e8af;hpb=7fd6c3ffe45b3b42c0bc8a8c5d1387a5e3316a54;p=bcachefs-tools-debian diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 109c115..c5bc582 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -4,6 +4,7 @@ #include "alloc_foreground.h" #include "btree_io.h" #include "btree_update_interior.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "checksum.h" #include "disk_groups.h" @@ -408,8 +409,10 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, return 0; } - return journal_validate_key(c, jset, entry, 1, entry->btree_id, k, - version, big_endian, flags); + ret = journal_validate_key(c, jset, entry, 1, entry->btree_id, k, + version, big_endian, flags); + if (ret == FSCK_DELETED_KEY) + ret = 0; fsck_err: return ret; } @@ -547,6 +550,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); + struct printbuf err = PRINTBUF; int ret = 0; if (journal_entry_err_on(bytes < sizeof(*u) || @@ -555,10 +559,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); - return ret; + goto out; } + if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err), + c, version, jset, entry, + journal_entry_data_usage_bad_size, + "invalid journal entry usage: %s", err.buf)) { + journal_entry_null_range(entry, vstruct_next(entry)); + goto out; + } +out: fsck_err: + printbuf_exit(&err); return ret; } @@ -675,8 +688,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs le64_to_cpu(u->d[i].sectors), le64_to_cpu(u->d[i].fragmented)); } - - prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec)); } static int journal_entry_log_validate(struct bch_fs *c, @@ -713,6 +724,22 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs journal_entry_btree_keys_to_text(out, c, entry); } +static int journal_entry_write_buffer_keys_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, + enum bkey_invalid_flags flags) +{ + return journal_entry_btree_keys_validate(c, jset, entry, + version, big_endian, READ); +} + +static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c, + struct jset_entry *entry) +{ + journal_entry_btree_keys_to_text(out, c, entry); +} + struct jset_entry_ops { int (*validate)(struct bch_fs *, struct jset *, struct jset_entry *, unsigned, int, @@ -756,7 +783,6 @@ void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, static int jset_validate_entries(struct bch_fs *c, struct jset *jset, enum bkey_invalid_flags flags) { - struct jset_entry *entry; unsigned version = le32_to_cpu(jset->version); int ret = 0; @@ -1025,10 +1051,9 @@ next_block: return 0; } -static void bch2_journal_read_device(struct closure *cl) +static CLOSURE_CALLBACK(bch2_journal_read_device) { - struct journal_device *ja = - container_of(cl, struct journal_device, read); + closure_type(ja, struct journal_device, read); struct bch_dev *ca = container_of(ja, struct bch_dev, journal); struct bch_fs *c = ca->fs; struct journal_list *jlist = @@ -1145,8 +1170,6 @@ int bch2_journal_read(struct bch_fs *c, struct journal_list jlist; struct journal_replay *i, **_i, *prev = NULL; struct genradix_iter radix_iter; - struct bch_dev *ca; - unsigned iter; struct printbuf buf = PRINTBUF; bool degraded = false, last_write_torn = false; u64 seq; @@ -1157,7 +1180,7 @@ int bch2_journal_read(struct bch_fs *c, jlist.last_seq = 0; jlist.ret = 0; - for_each_member_device(ca, c, iter) { + for_each_member_device(c, ca) { if (!c->opts.fsck && !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal))) continue; @@ -1323,7 +1346,7 @@ int bch2_journal_read(struct bch_fs *c, continue; for (ptr = 0; ptr < i->nr_ptrs; ptr++) { - ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev); + struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev); if (!i->ptrs[ptr].csum_good) bch_err_dev_offset(ca, i->ptrs[ptr].sector, @@ -1494,6 +1517,8 @@ done: static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) { + struct bch_fs *c = container_of(j, struct bch_fs, journal); + /* we aren't holding j->lock: */ unsigned new_size = READ_ONCE(j->buf_size_want); void *new_buf; @@ -1501,6 +1526,11 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) if (buf->buf_size >= new_size) return; + size_t btree_write_buffer_size = new_size / 64; + + if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size)) + return; + new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN); if (!new_buf) return; @@ -1520,9 +1550,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j) return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK); } -static void journal_write_done(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_done) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_last_unwritten_buf(j); struct bch_replicas_padded replicas; @@ -1590,6 +1620,7 @@ static void journal_write_done(struct closure *cl) } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); + bch2_journal_reclaim_fast(j); bch2_journal_space_available(j); track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], @@ -1641,13 +1672,12 @@ static void journal_write_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } -static void do_journal_write(struct closure *cl) +static CLOSURE_CALLBACK(do_journal_write) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); - struct bch_extent_ptr *ptr; struct bio *bio; unsigned sectors = vstruct_sectors(w->data, c->block_bits); @@ -1691,11 +1721,13 @@ static void do_journal_write(struct closure *cl) static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct jset_entry *start, *end, *i, *next, *prev = NULL; + struct jset_entry *start, *end; struct jset *jset = w->data; + struct journal_keys_to_wb wb = { NULL }; unsigned sectors, bytes, u64s; - bool validate_before_checksum = false; unsigned long btree_roots_have = 0; + bool validate_before_checksum = false; + u64 seq = le64_to_cpu(jset->seq); int ret; /* @@ -1706,7 +1738,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) * If we wanted to be really fancy here, we could sort all the keys in * the jset and drop keys that were overwritten - probably not worth it: */ - vstruct_for_each_safe(jset, i, next) { + vstruct_for_each(jset, i) { unsigned u64s = le16_to_cpu(i->u64s); /* Empty entry: */ @@ -1723,40 +1755,40 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) * to c->btree_roots we have to get any missing btree roots and * add them to this journal entry: */ - if (i->type == BCH_JSET_ENTRY_btree_root) { + switch (i->type) { + case BCH_JSET_ENTRY_btree_root: bch2_journal_entry_to_btree_root(c, i); __set_bit(i->btree_id, &btree_roots_have); + break; + case BCH_JSET_ENTRY_write_buffer_keys: + EBUG_ON(!w->need_flush_to_write_buffer); + + if (!wb.wb) + bch2_journal_keys_to_write_buffer_start(c, &wb, seq); + + struct bkey_i *k; + jset_entry_for_each_key(i, k) { + ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k); + if (ret) { + bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer"); + bch2_journal_keys_to_write_buffer_end(c, &wb); + return ret; + } + } + i->type = BCH_JSET_ENTRY_btree_keys; + break; } - - /* Can we merge with previous entry? */ - if (prev && - i->btree_id == prev->btree_id && - i->level == prev->level && - i->type == prev->type && - i->type == BCH_JSET_ENTRY_btree_keys && - le16_to_cpu(prev->u64s) + u64s <= U16_MAX) { - memmove_u64s_down(vstruct_next(prev), - i->_data, - u64s); - le16_add_cpu(&prev->u64s, u64s); - continue; - } - - /* Couldn't merge, move i into new position (after prev): */ - prev = prev ? vstruct_next(prev) : jset->start; - if (i != prev) - memmove_u64s_down(prev, i, jset_u64s(u64s)); } - prev = prev ? vstruct_next(prev) : jset->start; - jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); + if (wb.wb) + bch2_journal_keys_to_write_buffer_end(c, &wb); + w->need_flush_to_write_buffer = false; start = end = vstruct_last(jset); end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have); - bch2_journal_super_entries_add_common(c, &end, - le64_to_cpu(jset->seq)); + bch2_journal_super_entries_add_common(c, &end, seq); u64s = (u64 *) end - (u64 *) start; BUG_ON(u64s > j->entry_u64s_reserved); @@ -1779,7 +1811,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c)); if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset)) - j->last_empty_seq = le64_to_cpu(jset->seq); + j->last_empty_seq = seq; if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) validate_before_checksum = true; @@ -1838,7 +1870,7 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf * (!w->must_flush && (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) { - w->noflush = true; + w->noflush = true; SET_JSET_NO_FLUSH(w->data, true); w->data->last_seq = 0; w->last_seq = 0; @@ -1853,16 +1885,15 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf * return 0; } -void bch2_journal_write(struct closure *cl) +CLOSURE_CALLBACK(bch2_journal_write) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); struct bch_replicas_padded replicas; struct bio *bio; struct printbuf journal_debug_buf = PRINTBUF; - unsigned i, nr_rw_members = 0; + unsigned nr_rw_members = 0; int ret; BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); @@ -1875,9 +1906,11 @@ void bch2_journal_write(struct closure *cl) if (ret) goto err; + mutex_lock(&j->buf_lock); journal_buf_realloc(j, w); ret = bch2_journal_write_prep(j, w); + mutex_unlock(&j->buf_lock); if (ret) goto err; @@ -1920,7 +1953,7 @@ void bch2_journal_write(struct closure *cl) if (c->opts.nochanges) goto no_io; - for_each_rw_member(ca, c, i) + for_each_rw_member(c, ca) nr_rw_members++; if (nr_rw_members > 1) @@ -1937,7 +1970,7 @@ void bch2_journal_write(struct closure *cl) goto err; if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { - for_each_rw_member(ca, c, i) { + for_each_rw_member(c, ca) { percpu_ref_get(&ca->io_ref); bio = ca->journal.bio;