#include <linux/sort.h>
+static int bch2_btree_write_buffer_journal_flush(struct journal *,
+ struct journal_entry_pin *, u64);
+
static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
{
const struct btree_write_buffered_key *l = _l;
if (ret)
return ret;
+ /*
+ * We can't clone a path that has write locks: unshare it now, before
+ * set_pos and traverse():
+ */
+ if (iter->path->ref > 1)
+ iter->path = __bch2_btree_path_make_mut(trans, iter->path, true, _THIS_IP_);
+
path = iter->path;
if (!*write_locked) {
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
(*fast)++;
-
- if (path->ref > 1) {
- /*
- * We can't clone a path that has write locks: if the path is
- * shared, unlock before set_pos(), traverse():
- */
- bch2_btree_node_unlock_write(trans, path, path->l[0].b);
- *write_locked = false;
- }
return 0;
trans_commit:
- return bch2_trans_update(trans, iter, &wb->k, 0) ?:
+ trans->journal_res.seq = wb->journal_seq;
+
+ return bch2_trans_update(trans, iter, &wb->k,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, NULL, NULL,
commit_flags|
- BTREE_INSERT_NOCHECK_RW|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_RECLAIM);
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_no_journal_res|
+ BCH_TRANS_COMMIT_journal_reclaim);
}
static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
return old;
}
+/*
+ * Update a btree with a write buffered key using the journal seq of the
+ * original write buffer insert.
+ *
+ * It is not safe to rejournal the key once it has been inserted into the write
+ * buffer because that may break recovery ordering. For example, the key may
+ * have already been modified in the active write buffer in a seq that comes
+ * before the current transaction. If we were to journal this key again and
+ * crash, recovery would process updates in the wrong order.
+ */
+static int
+btree_write_buffered_insert(struct btree_trans *trans,
+ struct btree_write_buffered_key *wb)
+{
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k),
+ BTREE_ITER_CACHED|BTREE_ITER_INTENT);
+
+ trans->journal_res.seq = wb->journal_seq;
+
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, &wb->k,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
bool locked)
{
if (!locked && !mutex_trylock(&wb->flush_lock))
return 0;
- bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL);
+ bch2_journal_pin_copy(j, &pin, &wb->journal_pin,
+ bch2_btree_write_buffer_journal_flush);
bch2_journal_pin_drop(j, &wb->journal_pin);
s = btree_write_buffer_switch(wb);
keys = wb->keys[s.idx];
nr = s.nr;
+ if (race_fault())
+ goto slowpath;
+
/*
* We first sort so that we can detect and skip redundant updates, and
* then we attempt to flush in sorted btree order, as this is most
* However, since we're not flushing in the order they appear in the
* journal we won't be able to drop our journal pin until everything is
* flushed - which means this could deadlock the journal if we weren't
- * passing BTREE_INSERT_JOURNAL_RECLAIM. This causes the update to fail
+ * passing BCH_TRANS_COMMIT_journal_reclaim. This causes the update to fail
* if it would block taking a journal reservation.
*
* If that happens, simply skip the key so we can optimistically insert
if (!iter.path || iter.path->btree_id != i->btree) {
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p, BTREE_ITER_INTENT);
+ bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p,
+ BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
}
bch2_btree_iter_set_pos(&iter, i->k.k.p);
if (!i->journal_seq)
continue;
- if (i->journal_seq > pin.seq) {
- struct journal_entry_pin pin2;
-
- memset(&pin2, 0, sizeof(pin2));
-
- bch2_journal_pin_add(j, i->journal_seq, &pin2, NULL);
- bch2_journal_pin_drop(j, &pin);
- bch2_journal_pin_copy(j, &pin, &pin2, NULL);
- bch2_journal_pin_drop(j, &pin2);
- }
+ bch2_journal_pin_update(j, i->journal_seq, &pin,
+ bch2_btree_write_buffer_journal_flush);
ret = commit_do(trans, NULL, NULL,
commit_flags|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_RECLAIM,
- __bch2_btree_insert(trans, i->btree, &i->k, 0));
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_no_journal_res|
+ BCH_TRANS_COMMIT_journal_reclaim,
+ btree_write_buffered_insert(trans, i));
if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
break;
}
mutex_lock(&wb->flush_lock);
return bch2_trans_run(c,
- __bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true));
+ __bch2_btree_write_buffer_flush(trans, BCH_TRANS_COMMIT_no_check_rw, true));
}
static inline u64 btree_write_buffer_ref(int idx)