#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "replicas.h"
-
-#include <trace/events/bcachefs.h>
+#include "trace.h"
static struct nonce journal_nonce(const struct jset *jset)
{
journal_entry_radix_idx(c, le64_to_cpu(j->seq)),
GFP_KERNEL);
if (!_i)
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_journal_entry_add;
/*
* Duplicate journal entries? If so we want the one that didn't have a
replace:
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
if (!i)
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_journal_entry_add;
i->nr_ptrs = 0;
i->csum_good = entry_ptr.csum_good;
struct bkey_i *k;
bool first = true;
- vstruct_for_each(entry, k) {
+ jset_entry_for_each_key(entry, k) {
if (!first) {
prt_newline(out);
prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
/* the bios are sized for this many pages, max: */
if (new_size > JOURNAL_ENTRY_SIZE_MAX)
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
new_size = roundup_pow_of_two(new_size);
n = kvpmalloc(new_size, GFP_KERNEL);
if (!n)
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
kvpfree(b->data, b->size);
b->data = n;
if (!ca->mi.durability ||
ca->mi.state != BCH_MEMBER_STATE_rw ||
!ja->nr ||
- bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
- ca->dev_idx) ||
+ bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
sectors > ja->sectors_free)
continue;
struct journal *j = container_of(cl, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *w = journal_last_unwritten_buf(j);
- struct bch_replicas_padded replicas;
union journal_res_state old, new;
u64 v, seq;
int err = 0;
if (!w->devs_written.nr) {
bch_err(c, "unable to write journal to sufficient devices");
err = -EIO;
- } else {
- bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
- w->devs_written);
- if (bch2_mark_replicas(c, &replicas.e))
- err = -EIO;
}
-
if (err)
bch2_fatal_error(c);
return;
}
+static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset)
+{
+ struct jset_entry *i, *next, *prev = NULL;
+
+ /*
+ * Simple compaction, dropping empty jset_entries (from journal
+ * reservations that weren't fully used) and merging jset_entries that
+ * can be.
+ *
+ * If we wanted to be really fancy here, we could sort all the keys in
+ * the jset and drop keys that were overwritten - probably not worth it:
+ */
+ vstruct_for_each_safe(jset, i, next) {
+ unsigned u64s = le16_to_cpu(i->u64s);
+
+ /* Empty entry: */
+ if (!u64s)
+ continue;
+
+ if (i->type == BCH_JSET_ENTRY_btree_root)
+ bch2_journal_entry_to_btree_root(c, i);
+
+ /* Can we merge with previous entry? */
+ if (prev &&
+ i->btree_id == prev->btree_id &&
+ i->level == prev->level &&
+ i->type == prev->type &&
+ i->type == BCH_JSET_ENTRY_btree_keys &&
+ le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
+ memmove_u64s_down(vstruct_next(prev),
+ i->_data,
+ u64s);
+ le16_add_cpu(&prev->u64s, u64s);
+ continue;
+ }
+
+ /* Couldn't merge, move i into new position (after prev): */
+ prev = prev ? vstruct_next(prev) : jset->start;
+ if (i != prev)
+ memmove_u64s_down(prev, i, jset_u64s(u64s));
+ }
+
+ prev = prev ? vstruct_next(prev) : jset->start;
+ jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
+}
+
void bch2_journal_write(struct closure *cl)
{
struct journal *j = container_of(cl, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
struct journal_buf *w = journal_last_unwritten_buf(j);
+ struct bch_replicas_padded replicas;
struct jset_entry *start, *end;
struct jset *jset;
struct bio *bio;
* entry:
*/
- bch2_journal_entries_to_btree_roots(c, jset);
+ bch2_journal_entries_postprocess(c, jset);
start = end = vstruct_last(jset);
BUG_ON(u64s > j->entry_u64s_reserved);
le32_add_cpu(&jset->u64s, u64s);
- BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
+
+ sectors = vstruct_sectors(jset, c->block_bits);
+ bytes = vstruct_bytes(jset);
+
+ if (sectors > w->sectors) {
+ bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
+ vstruct_bytes(jset), w->sectors << 9,
+ u64s, w->u64s_reserved, j->entry_u64s_reserved);
+ goto err;
+ }
jset->magic = cpu_to_le64(jset_magic(c));
jset->version = c->sb.version < bcachefs_metadata_version_bkey_renumber
jset_validate(c, NULL, jset, 0, WRITE))
goto err;
- sectors = vstruct_sectors(jset, c->block_bits);
- BUG_ON(sectors > w->sectors);
-
- bytes = vstruct_bytes(jset);
memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
retry_alloc:
bch_err(c, "Unable to allocate journal write:\n%s",
journal_debug_buf.buf);
printbuf_exit(&journal_debug_buf);
- bch2_fatal_error(c);
- continue_at(cl, journal_write_done, c->io_complete_wq);
- return;
+ goto err;
}
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
if (nr_rw_members > 1)
w->separate_flush = true;
+ /*
+ * Mark journal replicas before we submit the write to guarantee
+ * recovery will find the journal entries after a crash.
+ */
+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+ w->devs_written);
+ ret = bch2_mark_replicas(c, &replicas.e);
+ if (ret)
+ goto err;
+
if (!JSET_NO_FLUSH(jset) && w->separate_flush) {
for_each_rw_member(ca, c, i) {
percpu_ref_get(&ca->io_ref);