#include "recovery.h"
#include "subvolume.h"
#include "replicas.h"
+#include "trace.h"
#include <linux/prefetch.h>
#include <linux/sort.h>
-#include <trace/events/bcachefs.h>
/*
* bch2_btree_path_peek_slot() for a cached iterator might return a key in a
return 0;
}
-static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 0, seq);
}
-static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 1, seq);
}
bch2_journal_pin_add(&c->journal, seq, &w->journal,
btree_node_write_idx(b) == 0
- ? btree_node_flush0
- : btree_node_flush1);
+ ? bch2_btree_node_flush0
+ : bch2_btree_node_flush1);
}
/**
bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
unsigned long trace_ip)
{
- struct bch_fs *c = trans->c;
- int ret;
-
- bch2_trans_unlock(trans);
-
- ret = bch2_journal_preres_get(&c->journal,
+ return drop_locks_do(trans,
+ bch2_journal_preres_get(&trans->c->journal,
&trans->journal_preres,
trans->journal_preres_u64s,
- (flags & JOURNAL_WATERMARK_MASK));
- if (ret)
- return ret;
-
- ret = bch2_trans_relock(trans);
- if (ret) {
- trace_and_count(c, trans_restart_journal_preres_get, trans, trace_ip, 0);
- return ret;
- }
-
- return 0;
+ (flags & JOURNAL_WATERMARK_MASK)));
}
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
if (!new_k) {
bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_ids[path->btree_id], new_u64s);
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_btree_key_cache_insert;
}
trans_for_each_update(trans, i)
if (bch2_bkey_ops[old.k->type].atomic_trigger ==
bch2_bkey_ops[i->k->k.type].atomic_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
- ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new),
+ ret = bch2_mark_key(trans, i->btree_id, i->level,
+ old, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
struct bkey _deleted = KEY(0, 0, 0);
_deleted.p = i->path->pos;
- ret = bch2_mark_key(trans, deleted, bkey_i_to_s_c(new),
+ ret = bch2_mark_key(trans, i->btree_id, i->level,
+ deleted, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|flags) ?:
- bch2_mark_key(trans, old, deleted,
+ bch2_mark_key(trans, i->btree_id, i->level,
+ old, deleted,
BTREE_TRIGGER_OVERWRITE|flags);
}
prefetch(&trans->c->journal.flags);
- h = trans->hooks;
- while (h) {
- ret = h->fn(trans, h);
- if (ret)
- return ret;
- h = h->next;
- }
-
trans_for_each_update(trans, i) {
/* Multiple inserts might go to same leaf: */
if (!same_leaf_as_prev(trans, i))
goto revert_fs_usage;
}
+ h = trans->hooks;
+ while (h) {
+ ret = h->fn(trans, h);
+ if (ret)
+ goto revert_fs_usage;
+ h = h->next;
+ }
+
trans_for_each_update(trans, i)
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
ret = run_one_mem_trigger(trans, i, i->flags);
if (!i->cached)
btree_insert_key_leaf(trans, i);
else if (!i->key_cache_already_flushed)
- bch2_btree_insert_key_cached(trans, flags, i->path, i->k);
+ bch2_btree_insert_key_cached(trans, flags, i);
else {
bch2_btree_key_cache_drop(trans, i->path);
btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE);
trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path);
break;
case -BCH_ERR_btree_insert_need_mark_replicas:
- bch2_trans_unlock(trans);
-
- ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
- if (ret)
- break;
-
- ret = bch2_trans_relock(trans);
- if (ret)
- trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip);
+ ret = drop_locks_do(trans,
+ bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas));
break;
case -BCH_ERR_journal_res_get_blocked:
- bch2_trans_unlock(trans);
-
if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
!(flags & JOURNAL_WATERMARK_reserved)) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}
- ret = bch2_trans_journal_res_get(trans,
+ ret = drop_locks_do(trans,
+ bch2_trans_journal_res_get(trans,
(flags & JOURNAL_WATERMARK_MASK)|
- JOURNAL_RES_GET_CHECK);
- if (ret)
- break;
-
- ret = bch2_trans_relock(trans);
- if (ret)
- trace_and_count(c, trans_restart_journal_res_get, trans, trace_ip);
+ JOURNAL_RES_GET_CHECK));
break;
case -BCH_ERR_btree_insert_need_journal_reclaim:
bch2_trans_unlock(trans);
break;
ret = bch2_trans_relock(trans);
- if (ret)
- trace_and_count(c, trans_restart_journal_reclaim, trans, trace_ip);
break;
case -BCH_ERR_btree_insert_need_flush_buffer: {
struct btree_write_buffer *wb = &c->btree_write_buffer;
ret = 0;
if (wb->state.nr > wb->size * 3 / 4) {
- bch2_trans_reset_updates(trans);
bch2_trans_unlock(trans);
-
mutex_lock(&wb->flush_lock);
- if (wb->state.nr > wb->size * 3 / 4)
+ if (wb->state.nr > wb->size * 3 / 4) {
+ bch2_trans_begin(trans);
ret = __bch2_btree_write_buffer_flush(trans,
flags|BTREE_INSERT_NOCHECK_RW, true);
- else
+ if (!ret) {
+ trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
+ }
+ } else {
mutex_unlock(&wb->flush_lock);
-
- if (!ret) {
- trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
+ ret = bch2_trans_relock(trans);
}
}
break;
test_bit(BCH_FS_STARTED, &c->flags))
return -BCH_ERR_erofs_trans_commit;
- bch2_trans_unlock(trans);
-
- ret = bch2_fs_read_write_early(c) ?:
- bch2_trans_relock(trans);
+ ret = drop_locks_do(trans, bch2_fs_read_write_early(c));
if (ret)
return ret;
struct bkey_i *update;
int ret;
- update = bch2_bkey_make_mut(trans, k);
+ update = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(update);
if (ret)
return ret;
return ret;
}
+
+int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
+ enum btree_id id,
+ struct bpos old_pos,
+ struct bpos new_pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter old_iter, new_iter;
+ struct bkey_s_c old_k, new_k;
+ snapshot_id_list s;
+ struct bkey_i *update;
+ int ret;
+
+ if (!bch2_snapshot_has_children(c, old_pos.snapshot))
+ return 0;
+
+ darray_init(&s);
+
+ bch2_trans_iter_init(trans, &old_iter, id, old_pos,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS);
+ while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
+ !(ret = bkey_err(old_k)) &&
+ bkey_eq(old_pos, old_k.k->p)) {
+ struct bpos whiteout_pos =
+ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
+
+ if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
+ snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
+ continue;
+
+ new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_INTENT);
+ ret = bkey_err(new_k);
+ if (ret)
+ break;
+
+ if (new_k.k->type == KEY_TYPE_deleted) {
+ update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ break;
+
+ bkey_init(&update->k);
+ update->k.p = whiteout_pos;
+ update->k.type = KEY_TYPE_whiteout;
+
+ ret = bch2_trans_update(trans, &new_iter, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ }
+ bch2_trans_iter_exit(trans, &new_iter);
+
+ ret = snapshot_list_add(c, &s, old_k.k->p.snapshot);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &old_iter);
+ darray_exit(&s);
+
+ return ret;
+}
+
int bch2_trans_update_extent(struct btree_trans *trans,
struct btree_iter *orig_iter,
struct bkey_i *insert,
trans->extra_journal_res += compressed_sectors;
if (front_split) {
- update = bch2_bkey_make_mut(trans, k);
+ update = bch2_bkey_make_mut_noupdate(trans, k);
if ((ret = PTR_ERR_OR_ZERO(update)))
goto err;
bch2_cut_back(start, update);
- ret = bch2_btree_insert_nonextent(trans, btree_id, update,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
+ k.k->p, update->k.p) ?:
+ bch2_btree_insert_nonextent(trans, btree_id, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
if (ret)
goto err;
}
if (k.k->p.snapshot != insert->k.p.snapshot &&
(front_split || back_split)) {
- update = bch2_bkey_make_mut(trans, k);
+ update = bch2_bkey_make_mut_noupdate(trans, k);
if ((ret = PTR_ERR_OR_ZERO(update)))
goto err;
bch2_cut_front(start, update);
bch2_cut_back(insert->k.p, update);
- ret = bch2_btree_insert_nonextent(trans, btree_id, update,
+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
+ k.k->p, update->k.p) ?:
+ bch2_btree_insert_nonextent(trans, btree_id, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
if (ret)
goto err;
update->k.p = k.k->p;
update->k.p.snapshot = insert->k.p.snapshot;
- if (insert->k.p.snapshot != k.k->p.snapshot ||
- (btree_type_has_snapshots(btree_id) &&
- need_whiteout_for_snapshot(trans, btree_id, update->k.p)))
+ if (insert->k.p.snapshot != k.k->p.snapshot) {
update->k.type = KEY_TYPE_whiteout;
+ } else if (btree_type_has_snapshots(btree_id)) {
+ ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p);
+ if (ret < 0)
+ goto err;
+ if (ret)
+ update->k.type = KEY_TYPE_whiteout;
+ }
ret = bch2_btree_insert_nonextent(trans, btree_id, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
}
if (back_split) {
- update = bch2_bkey_make_mut(trans, k);
+ update = bch2_bkey_make_mut_noupdate(trans, k);
if ((ret = PTR_ERR_OR_ZERO(update)))
goto err;
unsigned long ip)
{
struct btree_path *btree_path;
+ struct bkey k;
int ret;
- i->key_cache_already_flushed = true;
- i->flags |= BTREE_TRIGGER_NORUN;
-
btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
BTREE_ITER_INTENT, _THIS_IP_);
-
ret = bch2_btree_path_traverse(trans, btree_path, 0);
if (ret)
- goto err;
+ goto out;
+
+ /*
+ * The old key in the insert entry might actually refer to an existing
+ * key in the btree that has been deleted from cache and not yet
+ * flushed. Check for this and skip the flush so we don't run triggers
+ * against a stale key.
+ */
+ bch2_btree_path_peek_slot_exact(btree_path, &k);
+ if (!bkey_deleted(&k))
+ goto out;
+
+ i->key_cache_already_flushed = true;
+ i->flags |= BTREE_TRIGGER_NORUN;
btree_path_set_should_be_locked(btree_path);
ret = bch2_trans_update_by_path_trace(trans, btree_path, i->k, flags, ip);
-err:
+out:
bch2_path_put(trans, btree_path, true);
return ret;
}
* the key cache - but the key has to exist in the btree for that to
* work:
*/
- if (path->cached &&
- bkey_deleted(&i->old_k) &&
- !(flags & BTREE_UPDATE_NO_KEY_CACHE_COHERENCY))
+ if (path->cached && bkey_deleted(&i->old_k))
return flush_new_cached_update(trans, path, i, flags, ip);
return 0;
return 0;
}
+int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
+ enum btree_id btree, struct bpos end)
+{
+ struct bkey_s_c k;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_INTENT);
+ k = bch2_btree_iter_prev(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ bch2_btree_iter_advance(iter);
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ BUG_ON(k.k->type != KEY_TYPE_deleted);
+
+ if (bkey_gt(k.k->p, end)) {
+ ret = -BCH_ERR_ENOSPC_btree_slot;
+ goto err;
+ }
+
+ return 0;
+err:
+ bch2_trans_iter_exit(trans, iter);
+ return ret;
+}
+
void bch2_trans_commit_hook(struct btree_trans *trans,
struct btree_trans_commit_hook *h)
{
return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
}
+int bch2_btree_delete_at_buffered(struct btree_trans *trans,
+ enum btree_id btree, struct bpos pos)
+{
+ struct bkey_i *k;
+
+ k = bch2_trans_kmalloc(trans, sizeof(*k));
+ if (IS_ERR(k))
+ return PTR_ERR(k);
+
+ bkey_init(&k->k);
+ k->k.p = pos;
+ return bch2_trans_update_buffered(trans, btree, k);
+}
+
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end,
unsigned update_flags,
int ret;
prt_vprintf(&buf, fmt, args);
- ret = buf.allocation_failure ? -ENOMEM : 0;
+ ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
if (ret)
goto err;
return ret;
}
-int bch2_trans_log_msg(struct btree_trans *trans, const char *fmt, ...)
+static int
+__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
+ va_list args)
{
- va_list args;
int ret;
- va_start(args, fmt);
- ret = __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args);
- va_end(args);
+ if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
+ ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
+ } else {
+ ret = bch2_trans_do(c, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|commit_flags,
+ __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
+ }
return ret;
}
int ret;
va_start(args, fmt);
+ ret = __bch2_fs_log_msg(c, 0, fmt, args);
+ va_end(args);
+ return ret;
+}
- if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
- ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
- } else {
- ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
- __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
- }
+/*
+ * Use for logging messages during recovery to enable reserved space and avoid
+ * blocking.
+ */
+int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+ va_start(args, fmt);
+ ret = __bch2_fs_log_msg(c, JOURNAL_WATERMARK_reserved, fmt, args);
va_end(args);
-
return ret;
-
}