#include "snapshot.h"
#include "trace.h"
+#include <linux/darray.h>
+
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
const struct btree_insert_entry *r)
{
}
static int __must_check
-bch2_trans_update_by_path(struct btree_trans *, struct btree_path *,
+bch2_trans_update_by_path(struct btree_trans *, btree_path_idx_t,
struct bkey_i *, enum btree_update_flags,
unsigned long ip);
-static noinline int __check_pos_snapshot_overwritten(struct btree_trans *trans,
- enum btree_id id,
- struct bpos pos)
-{
- struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret;
-
- bch2_trans_iter_init(trans, &iter, id, pos,
- BTREE_ITER_NOT_EXTENTS|
- BTREE_ITER_ALL_SNAPSHOTS);
- while (1) {
- k = bch2_btree_iter_prev(&iter);
- ret = bkey_err(k);
- if (ret)
- break;
-
- if (!k.k)
- break;
-
- if (!bkey_eq(pos, k.k->p))
- break;
-
- if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
- ret = 1;
- break;
- }
- }
- bch2_trans_iter_exit(trans, &iter);
-
- return ret;
-}
-
-static inline int check_pos_snapshot_overwritten(struct btree_trans *trans,
- enum btree_id id,
- struct bpos pos)
-{
- if (!btree_type_has_snapshots(id) ||
- bch2_snapshot_is_leaf(trans->c, pos.snapshot))
- return 0;
-
- return __check_pos_snapshot_overwritten(trans, id, pos);
-}
-
static noinline int extent_front_merge(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert)))
return 0;
- ret = check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p) ?:
- check_pos_snapshot_overwritten(trans, iter->btree_id, (*insert)->k.p);
+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?:
+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p);
if (ret < 0)
return ret;
if (ret)
struct bch_fs *c = trans->c;
int ret;
- ret = check_pos_snapshot_overwritten(trans, iter->btree_id, insert->k.p) ?:
- check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p);
+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
if (ret < 0)
return ret;
if (ret)
struct bkey_s_c old_k, new_k;
snapshot_id_list s;
struct bkey_i *update;
- int ret;
+ int ret = 0;
if (!bch2_snapshot_has_children(c, old_pos.snapshot))
return 0;
enum btree_id btree_id = iter->btree_id;
struct bkey_i *update;
struct bpos new_start = bkey_start_pos(new.k);
- bool front_split = bkey_lt(bkey_start_pos(old.k), new_start);
- bool back_split = bkey_gt(old.k->p, new.k->p);
+ unsigned front_split = bkey_lt(bkey_start_pos(old.k), new_start);
+ unsigned back_split = bkey_gt(old.k->p, new.k->p);
+ unsigned middle_split = (front_split || back_split) &&
+ old.k->p.snapshot != new.k->p.snapshot;
+ unsigned nr_splits = front_split + back_split + middle_split;
int ret = 0, compressed_sectors;
/*
* so that __bch2_trans_commit() can increase our disk
* reservation:
*/
- if (((front_split && back_split) ||
- ((front_split || back_split) && old.k->p.snapshot != new.k->p.snapshot)) &&
+ if (nr_splits > 1 &&
(compressed_sectors = bch2_bkey_sectors_compressed(old)))
- trans->extra_journal_res += compressed_sectors;
+ trans->extra_disk_res += compressed_sectors * (nr_splits - 1);
if (front_split) {
update = bch2_bkey_make_mut_noupdate(trans, old);
}
/* If we're overwriting in a different snapshot - middle split: */
- if (old.k->p.snapshot != new.k->p.snapshot &&
- (front_split || back_split)) {
+ if (middle_split) {
update = bch2_bkey_make_mut_noupdate(trans, old);
if ((ret = PTR_ERR_OR_ZERO(update)))
return ret;
}
static noinline int flush_new_cached_update(struct btree_trans *trans,
- struct btree_path *path,
struct btree_insert_entry *i,
enum btree_update_flags flags,
unsigned long ip)
{
- struct btree_path *btree_path;
struct bkey k;
int ret;
- btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
- BTREE_ITER_INTENT, _THIS_IP_);
- ret = bch2_btree_path_traverse(trans, btree_path, 0);
+ btree_path_idx_t path_idx =
+ bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0,
+ BTREE_ITER_INTENT, _THIS_IP_);
+ ret = bch2_btree_path_traverse(trans, path_idx, 0);
if (ret)
goto out;
+ struct btree_path *btree_path = trans->paths + path_idx;
+
/*
* The old key in the insert entry might actually refer to an existing
* key in the btree that has been deleted from cache and not yet
i->flags |= BTREE_TRIGGER_NORUN;
btree_path_set_should_be_locked(btree_path);
- ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip);
+ ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip);
out:
- bch2_path_put(trans, btree_path, true);
+ bch2_path_put(trans, path_idx, true);
return ret;
}
static int __must_check
-bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
+bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
struct bkey_i *k, enum btree_update_flags flags,
unsigned long ip)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i, n;
- u64 seq = 0;
int cmp;
+ struct btree_path *path = trans->paths + path_idx;
EBUG_ON(!path->should_be_locked);
- EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
+ EBUG_ON(trans->nr_updates >= trans->nr_paths);
EBUG_ON(!bpos_eq(k->k.p, path->pos));
- /*
- * The transaction journal res hasn't been allocated at this point.
- * That occurs at commit time. Reuse the seq field to pass in the seq
- * of a prejournaled key.
- */
- if (flags & BTREE_UPDATE_PREJOURNAL)
- seq = trans->journal_res.seq;
-
n = (struct btree_insert_entry) {
.flags = flags,
.bkey_type = __btree_node_type(path->level, path->btree_id),
.btree_id = path->btree_id,
.level = path->level,
.cached = path->cached,
- .path = path,
+ .path = path_idx,
.k = k,
- .seq = seq,
.ip_allocated = ip,
};
* Pending updates are kept sorted: first, find position of new update,
* then delete/trim any updates the new update overwrites:
*/
- trans_for_each_update(trans, i) {
+ for (i = trans->updates; i < trans->updates + trans->nr_updates; i++) {
cmp = btree_insert_entry_cmp(&n, i);
if (cmp <= 0)
break;
i->cached = n.cached;
i->k = n.k;
i->path = n.path;
- i->seq = n.seq;
i->ip_allocated = n.ip_allocated;
} else {
array_insert_item(trans->updates, trans->nr_updates,
}
}
- __btree_path_get(i->path, true);
+ __btree_path_get(trans->paths + i->path, true);
/*
* If a key is present in the key cache, it must also exist in the
* work:
*/
if (path->cached && bkey_deleted(&i->old_k))
- return flush_new_cached_update(trans, path, i, flags, ip);
+ return flush_new_cached_update(trans, i, flags, ip);
+
+ return 0;
+}
+
+static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct btree_path *path)
+{
+ struct btree_path *key_cache_path = btree_iter_key_cache_path(trans, iter);
+
+ if (!key_cache_path ||
+ !key_cache_path->should_be_locked ||
+ !bpos_eq(key_cache_path->pos, iter->pos)) {
+ struct bkey_cached *ck;
+ int ret;
+
+ if (!iter->key_cache_path)
+ iter->key_cache_path =
+ bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_CACHED, _THIS_IP_);
+
+ iter->key_cache_path =
+ bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
+ iter->flags & BTREE_ITER_INTENT,
+ _THIS_IP_);
+
+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path, BTREE_ITER_CACHED);
+ if (unlikely(ret))
+ return ret;
+
+ ck = (void *) trans->paths[iter->key_cache_path].l[0].b;
+
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
+ }
+
+ btree_path_set_should_be_locked(trans->paths + iter->key_cache_path);
+ }
return 0;
}
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_update_flags flags)
{
- struct btree_path *path = iter->update_path ?: iter->path;
- struct bkey_cached *ck;
+ btree_path_idx_t path_idx = iter->update_path ?: iter->path;
int ret;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
/*
* Ensure that updates to cached btrees go to the key cache:
*/
+ struct btree_path *path = trans->paths + path_idx;
if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
!path->cached &&
!path->level &&
btree_id_cached(trans->c, path->btree_id)) {
- if (!iter->key_cache_path ||
- !iter->key_cache_path->should_be_locked ||
- !bpos_eq(iter->key_cache_path->pos, k->k.p)) {
- if (!iter->key_cache_path)
- iter->key_cache_path =
- bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
- BTREE_ITER_INTENT|
- BTREE_ITER_CACHED, _THIS_IP_);
-
- iter->key_cache_path =
- bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
- iter->flags & BTREE_ITER_INTENT,
- _THIS_IP_);
-
- ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
- BTREE_ITER_CACHED);
- if (unlikely(ret))
- return ret;
-
- ck = (void *) iter->key_cache_path->l[0].b;
-
- if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
- trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
- return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
- }
-
- btree_path_set_should_be_locked(iter->key_cache_path);
- }
+ ret = bch2_trans_update_get_key_cache(trans, iter, path);
+ if (ret)
+ return ret;
- path = iter->key_cache_path;
+ path_idx = iter->key_cache_path;
}
- return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_);
+ return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_);
}
-/*
- * Add a transaction update for a key that has already been journaled.
- */
-int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq,
- struct btree_iter *iter, struct bkey_i *k,
- enum btree_update_flags flags)
+int bch2_btree_insert_clone_trans(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bkey_i *k)
{
- trans->journal_res.seq = seq;
- return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL|
- BTREE_UPDATE_PREJOURNAL);
+ struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k));
+ int ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ bkey_copy(n, k);
+ return bch2_btree_insert_trans(trans, btree, n, 0);
}
-int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
- enum btree_id btree,
- struct bkey_i *k)
+struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
{
- struct btree_write_buffered_key *i;
- int ret;
+ unsigned new_top = trans->journal_entries_u64s + u64s;
+ unsigned old_size = trans->journal_entries_size;
- EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size);
- EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
+ if (new_top > trans->journal_entries_size) {
+ trans->journal_entries_size = roundup_pow_of_two(new_top);
- trans_for_each_wb_update(trans, i) {
- if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) {
- bkey_copy(&i->k, k);
- return 0;
- }
+ btree_trans_stats(trans)->journal_entries_size = trans->journal_entries_size;
}
- if (!trans->wb_updates ||
- trans->nr_wb_updates == trans->wb_updates_size) {
- struct btree_write_buffered_key *u;
+ struct jset_entry *n =
+ bch2_trans_kmalloc_nomemzero(trans,
+ trans->journal_entries_size * sizeof(u64));
+ if (IS_ERR(n))
+ return ERR_CAST(n);
- if (trans->nr_wb_updates == trans->wb_updates_size) {
- struct btree_transaction_stats *s = btree_trans_stats(trans);
+ if (trans->journal_entries)
+ memcpy(n, trans->journal_entries, old_size * sizeof(u64));
+ trans->journal_entries = n;
- BUG_ON(trans->wb_updates_size > U8_MAX / 2);
- trans->wb_updates_size = max(1, trans->wb_updates_size * 2);
- if (s)
- s->wb_updates_size = trans->wb_updates_size;
- }
-
- u = bch2_trans_kmalloc_nomemzero(trans,
- trans->wb_updates_size *
- sizeof(struct btree_write_buffered_key));
- ret = PTR_ERR_OR_ZERO(u);
- if (ret)
- return ret;
-
- if (trans->nr_wb_updates)
- memcpy(u, trans->wb_updates, trans->nr_wb_updates *
- sizeof(struct btree_write_buffered_key));
- trans->wb_updates = u;
- }
-
- trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) {
- .btree = btree,
- };
-
- bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k);
- trans->nr_wb_updates++;
-
- return 0;
+ struct jset_entry *e = btree_trans_journal_entries_top(trans);
+ trans->journal_entries_u64s = new_top;
+ return e;
}
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
int ret;
bch2_trans_iter_init(trans, &iter, btree, k->k.p,
+ BTREE_ITER_CACHED|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter) ?:
return ret;
}
-int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id,
- struct bkey_i *k, enum btree_update_flags flags)
+int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
+ struct bkey_i *k, enum btree_update_flags flags)
{
struct btree_iter iter;
int ret;
* bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
* @id: btree to insert into
- * @insert_keys: list of keys to insert
- * @hook: insert callback
+ * @k: key to insert
+ * @disk_res: must be non-NULL whenever inserting or potentially
+ * splitting data extents
+ * @flags: transaction commit flags
+ *
+ * Returns: 0 on success, error code on failure
*/
-int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
- struct bkey_i *k,
- struct disk_reservation *disk_res,
- u64 *journal_seq, int flags)
+int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
+ struct disk_reservation *disk_res, int flags)
{
- return bch2_trans_do(c, disk_res, journal_seq, flags,
- __bch2_btree_insert(&trans, id, k, 0));
+ return bch2_trans_do(c, disk_res, NULL, flags,
+ bch2_btree_insert_trans(trans, id, k, 0));
}
int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter,
return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
}
-int bch2_btree_delete_at_buffered(struct btree_trans *trans,
- enum btree_id btree, struct bpos pos)
+int bch2_btree_delete(struct btree_trans *trans,
+ enum btree_id btree, struct bpos pos,
+ unsigned update_flags)
{
- struct bkey_i *k;
+ struct btree_iter iter;
+ int ret;
- k = bch2_trans_kmalloc(trans, sizeof(*k));
- if (IS_ERR(k))
- return PTR_ERR(k);
+ bch2_trans_iter_init(trans, &iter, btree, pos,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_btree_delete_at(trans, &iter, update_flags);
+ bch2_trans_iter_exit(trans, &iter);
- bkey_init(&k->k);
- k->k.p = pos;
- return bch2_trans_update_buffered(trans, btree, k);
+ return ret;
}
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
bch2_trans_commit(trans, &disk_res, journal_seq,
- BTREE_INSERT_NOFAIL);
+ BCH_TRANS_COMMIT_no_enospc);
bch2_disk_reservation_put(trans->c, &disk_res);
err:
/*
}
bch2_trans_iter_exit(trans, &iter);
- if (!ret && trans_was_restarted(trans, restart_count))
- ret = -BCH_ERR_transaction_restart_nested;
- return ret;
+ return ret ?: trans_was_restarted(trans, restart_count);
}
/*
u64 *journal_seq)
{
int ret = bch2_trans_run(c,
- bch2_btree_delete_range_trans(&trans, id, start, end,
+ bch2_btree_delete_range_trans(trans, id, start, end,
update_flags, journal_seq));
if (ret == -BCH_ERR_transaction_restart_nested)
ret = 0;
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
struct bpos pos, bool set)
{
- struct bkey_i *k;
- int ret = 0;
-
- k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k));
- ret = PTR_ERR_OR_ZERO(k);
- if (unlikely(ret))
+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
+ int ret = PTR_ERR_OR_ZERO(k);
+ if (ret)
return ret;
bkey_init(&k->k);
k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
k->k.p = pos;
- return bch2_trans_update_buffered(trans, btree, k);
+ struct btree_iter iter;
+ bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
+
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, k, 0);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
}
-static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args)
+int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
+ struct bpos pos, bool set)
{
- struct printbuf buf = PRINTBUF;
- struct jset_entry_log *l;
- unsigned u64s;
- int ret;
+ struct bkey_i k;
- prt_vprintf(&buf, fmt, args);
- ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
- if (ret)
- goto err;
+ bkey_init(&k.k);
+ k.k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
+ k.k.p = pos;
- u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
+ return bch2_trans_update_buffered(trans, btree, &k);
+}
- ret = darray_make_room(entries, jset_u64s(u64s));
+static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s)
+{
+ struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s));
+ int ret = PTR_ERR_OR_ZERO(e);
if (ret)
- goto err;
+ return ret;
- l = (void *) &darray_top(*entries);
- l->entry.u64s = cpu_to_le16(u64s);
- l->entry.btree_id = 0;
- l->entry.level = 1;
- l->entry.type = BCH_JSET_ENTRY_log;
- l->entry.pad[0] = 0;
- l->entry.pad[1] = 0;
- l->entry.pad[2] = 0;
- memcpy(l->d, buf.buf, buf.pos);
- while (buf.pos & 7)
- l->d[buf.pos++] = '\0';
-
- entries->nr += jset_u64s(u64s);
-err:
- printbuf_exit(&buf);
- return ret;
+ struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry);
+ journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s);
+ memcpy(l->d, buf->buf, buf->pos);
+ return 0;
}
+__printf(3, 0)
static int
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
va_list args)
{
- int ret;
+ struct printbuf buf = PRINTBUF;
+ prt_vprintf(&buf, fmt, args);
+
+ unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
+ prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos);
+
+ int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
+ if (ret)
+ goto err;
if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
- ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
+ ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s));
+ if (ret)
+ goto err;
+
+ struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries);
+ journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s);
+ memcpy(l->d, buf.buf, buf.pos);
+ c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
ret = bch2_trans_do(c, NULL, NULL,
- BTREE_INSERT_LAZY_RW|commit_flags,
- __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
+ BCH_TRANS_COMMIT_lazy_rw|commit_flags,
+ __bch2_trans_log_msg(trans, &buf, u64s));
}
-
+err:
+ printbuf_exit(&buf);
return ret;
}
+__printf(2, 3)
int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
{
va_list args;
* Use for logging messages during recovery to enable reserved space and avoid
* blocking.
*/
+__printf(2, 3)
int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...)
{
va_list args;