From 2b8c1bb0910534e8687ea3e5abf6d8bbba758247 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Sep 2021 19:06:29 -0400 Subject: [PATCH] Update bcachefs sources to 3f3f969859 bcachefs: Fix some compiler warnings --- .bcachefs_revision | 2 +- cmd_debug.c | 24 +- cmd_fusemount.c | 18 +- include/trace/events/bcachefs.h | 260 +-- libbcachefs/acl.c | 41 +- libbcachefs/alloc_background.c | 55 +- libbcachefs/bcachefs.h | 12 +- libbcachefs/bkey.h | 31 - libbcachefs/bkey_methods.c | 2 + libbcachefs/bset.c | 4 +- libbcachefs/btree_cache.c | 61 +- libbcachefs/btree_cache.h | 4 +- libbcachefs/btree_gc.c | 45 +- libbcachefs/btree_io.c | 11 +- libbcachefs/btree_io.h | 3 +- libbcachefs/btree_iter.c | 2486 +++++++++++++-------------- libbcachefs/btree_iter.h | 268 ++- libbcachefs/btree_key_cache.c | 136 +- libbcachefs/btree_key_cache.h | 5 +- libbcachefs/btree_locking.h | 141 +- libbcachefs/btree_types.h | 153 +- libbcachefs/btree_update.h | 24 +- libbcachefs/btree_update_interior.c | 335 ++-- libbcachefs/btree_update_interior.h | 36 +- libbcachefs/btree_update_leaf.c | 671 ++++---- libbcachefs/buckets.c | 197 +-- libbcachefs/buckets.h | 6 +- libbcachefs/debug.c | 32 +- libbcachefs/dirent.c | 77 +- libbcachefs/dirent.h | 3 +- libbcachefs/ec.c | 87 +- libbcachefs/extent_update.c | 32 +- libbcachefs/extent_update.h | 8 +- libbcachefs/extents.c | 4 +- libbcachefs/fs-common.c | 113 +- libbcachefs/fs-io.c | 82 +- libbcachefs/fs.c | 44 +- libbcachefs/fsck.c | 159 +- libbcachefs/inode.c | 61 +- libbcachefs/inode.h | 8 +- libbcachefs/io.c | 115 +- libbcachefs/journal_seq_blacklist.c | 4 +- libbcachefs/migrate.c | 26 +- libbcachefs/move.c | 118 +- libbcachefs/move.h | 4 + libbcachefs/move_types.h | 2 + libbcachefs/movinggc.c | 4 +- libbcachefs/opts.h | 2 +- libbcachefs/quota.c | 20 +- libbcachefs/rebalance.c | 11 +- libbcachefs/rebalance_types.h | 1 - libbcachefs/recovery.c | 38 +- libbcachefs/reflink.c | 76 +- libbcachefs/str_hash.h | 65 +- libbcachefs/super.c | 11 +- libbcachefs/sysfs.c | 43 +- libbcachefs/tests.c | 197 +-- libbcachefs/varint.c | 2 +- libbcachefs/xattr.c | 26 +- 59 files changed, 3117 insertions(+), 3389 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index ab237af..e80bf48 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -60fbf06f49679fdb2b37e1e863c321dfddfc3a4a +3f3f9698592290e98a727f5023115c1775be7d5f diff --git a/cmd_debug.c b/cmd_debug.c index 2f56e41..b3a6ea0 100644 --- a/cmd_debug.c +++ b/cmd_debug.c @@ -64,7 +64,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd) const struct bch_extent_ptr *ptr; struct bkey_ptrs_c ptrs; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; bch2_trans_init(&trans, c, 0, 0); @@ -95,6 +95,8 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd) ptr->offset << 9, btree_bytes(c)); } + + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } @@ -181,7 +183,7 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id, struct bpos start, struct bpos end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; char buf[512]; int ret; @@ -196,7 +198,7 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id, bch2_bkey_val_to_text(&PBUF(buf), c, k); puts(buf); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } @@ -205,7 +207,7 @@ static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id, unsigne struct bpos start, struct bpos end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; char buf[4096]; @@ -218,7 +220,7 @@ static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id, unsigne bch2_btree_node_to_text(&PBUF(buf), c, b); puts(buf); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } @@ -227,7 +229,7 @@ static void list_nodes(struct bch_fs *c, enum btree_id btree_id, unsigned level, struct bpos start, struct bpos end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; char buf[4096]; @@ -241,7 +243,7 @@ static void list_nodes(struct bch_fs *c, enum btree_id btree_id, unsigned level, fputs(buf, stdout); putchar('\n'); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } @@ -346,7 +348,7 @@ static void list_nodes_ondisk(struct bch_fs *c, enum btree_id btree_id, unsigned struct bpos start, struct bpos end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; char buf[4096]; @@ -362,7 +364,7 @@ static void list_nodes_ondisk(struct bch_fs *c, enum btree_id btree_id, unsigned print_node_ondisk(c, b); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } @@ -371,7 +373,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, unsigned l struct bpos start, struct bpos end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree_node_iter node_iter; struct bkey unpacked; struct bkey_s_c k; @@ -393,7 +395,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, unsigned l puts(buf); } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } diff --git a/cmd_fusemount.c b/cmd_fusemount.c index 2b6b2d7..216094f 100644 --- a/cmd_fusemount.c +++ b/cmd_fusemount.c @@ -171,7 +171,7 @@ static void bcachefs_fuse_setattr(fuse_req_t req, fuse_ino_t inum, struct bch_fs *c = fuse_req_userdata(req); struct bch_inode_unpacked inode_u; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; u64 now; int ret; @@ -185,8 +185,7 @@ retry: bch2_trans_begin(&trans); now = bch2_current_time(c); - iter = bch2_inode_peek(&trans, &inode_u, inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(iter); + ret = bch2_inode_peek(&trans, &iter, &inode_u, inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -208,11 +207,11 @@ retry: inode_u.bi_mtime = now; /* TODO: CTIME? */ - ret = bch2_inode_write(&trans, iter, &inode_u) ?: + ret = bch2_inode_write(&trans, &iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; @@ -523,7 +522,7 @@ static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t inum, static int inode_update_times(struct bch_fs *c, fuse_ino_t inum) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bch_inode_unpacked inode_u; int ret = 0; u64 now; @@ -533,15 +532,14 @@ retry: bch2_trans_begin(&trans); now = bch2_current_time(c); - iter = bch2_inode_peek(&trans, &inode_u, inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(iter); + ret = bch2_inode_peek(&trans, &iter, &inode_u, inum, BTREE_ITER_INTENT); if (ret) goto err; inode_u.bi_mtime = now; inode_u.bi_ctime = now; - ret = bch2_inode_write(&trans, iter, &inode_u); + ret = bch2_inode_write(&trans, &iter, &inode_u); if (ret) goto err; @@ -549,7 +547,7 @@ retry: BTREE_INSERT_NOFAIL); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index a11bb5f..fce3146 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -298,28 +298,6 @@ TRACE_EVENT(btree_reserve_get_fail, __entry->required, __entry->cl) ); -TRACE_EVENT(btree_insert_key, - TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k), - TP_ARGS(c, b, k), - - TP_STRUCT__entry( - __field(u8, id ) - __field(u64, inode ) - __field(u64, offset ) - __field(u32, size ) - ), - - TP_fast_assign( - __entry->id = b->c.btree_id; - __entry->inode = k->k.p.inode; - __entry->offset = k->k.p.offset; - __entry->size = k->k.size; - ), - - TP_printk("btree %u: %llu:%llu len %u", __entry->id, - __entry->inode, __entry->offset, __entry->size) -); - DEFINE_EVENT(btree_node, btree_split, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) @@ -540,69 +518,6 @@ TRACE_EVENT(copygc_wait, __entry->wait_amount, __entry->until) ); -TRACE_EVENT(trans_get_iter, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *got_pos, - unsigned got_locks, - unsigned got_uptodate, - struct bpos *src_pos, - unsigned src_locks, - unsigned src_uptodate), - TP_ARGS(trans_ip, caller_ip, btree_id, - got_pos, got_locks, got_uptodate, - src_pos, src_locks, src_uptodate), - - TP_STRUCT__entry( - __field(unsigned long, trans_ip ) - __field(unsigned long, caller_ip ) - __field(u8, btree_id ) - __field(u64, got_pos_inode ) - __field(u64, got_pos_offset ) - __field(u32, got_pos_snapshot ) - __field(u8, got_locks ) - __field(u8, got_uptodate ) - __field(u64, src_pos_inode ) - __field(u64, src_pos_offset ) - __field(u32, src_pos_snapshot ) - __field(u8, src_locks ) - __field(u8, src_uptodate ) - ), - - TP_fast_assign( - __entry->trans_ip = trans_ip; - __entry->caller_ip = caller_ip; - __entry->btree_id = btree_id; - __entry->got_pos_inode = got_pos->inode; - __entry->got_pos_offset = got_pos->offset; - __entry->got_pos_snapshot = got_pos->snapshot; - __entry->got_locks = got_locks; - __entry->got_uptodate = got_uptodate; - __entry->src_pos_inode = src_pos->inode; - __entry->src_pos_offset = src_pos->offset; - __entry->src_pos_snapshot = src_pos->snapshot; - __entry->src_locks = src_locks; - __entry->src_uptodate = src_uptodate; - ), - - TP_printk("%ps %pS btree %u got %llu:%llu:%u l %u u %u " - "src %llu:%llu:%u l %u u %u", - (void *) __entry->trans_ip, - (void *) __entry->caller_ip, - __entry->btree_id, - __entry->got_pos_inode, - __entry->got_pos_offset, - __entry->got_pos_snapshot, - __entry->got_locks, - __entry->got_uptodate, - __entry->src_pos_inode, - __entry->src_pos_offset, - __entry->src_pos_snapshot, - __entry->src_locks, - __entry->src_uptodate) -); - TRACE_EVENT(transaction_restart_ip, TP_PROTO(unsigned long caller, unsigned long ip), TP_ARGS(caller, ip), @@ -772,96 +687,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, TP_ARGS(trans_ip, caller_ip, btree_id, pos) ); -TRACE_EVENT(iter_traverse, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - bool key_cache, - enum btree_id btree_id, - struct bpos *pos, - int ret), - TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, ret), - - TP_STRUCT__entry( - __field(unsigned long, trans_ip ) - __field(unsigned long, caller_ip ) - __field(u8, key_cache ) - __field(u8, btree_id ) - __field(u64, pos_inode ) - __field(u64, pos_offset ) - __field(u32, pos_snapshot ) - __field(s32, ret ) - ), - - TP_fast_assign( - __entry->trans_ip = trans_ip; - __entry->caller_ip = caller_ip; - __entry->key_cache = key_cache; - __entry->btree_id = btree_id; - __entry->pos_inode = pos->inode; - __entry->pos_offset = pos->offset; - __entry->pos_snapshot = pos->snapshot; - __entry->ret = ret; - ), - - TP_printk("%ps %pS key cache %u btree %u %llu:%llu:%u ret %i", - (void *) __entry->trans_ip, - (void *) __entry->caller_ip, - __entry->key_cache, - __entry->btree_id, - __entry->pos_inode, - __entry->pos_offset, - __entry->pos_snapshot, - __entry->ret) -); - -TRACE_EVENT(iter_set_search_pos, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - enum btree_id btree_id, - struct bpos *old_pos, - struct bpos *new_pos, - unsigned good_level), - TP_ARGS(trans_ip, caller_ip, btree_id, old_pos, new_pos, good_level), - - TP_STRUCT__entry( - __field(unsigned long, trans_ip ) - __field(unsigned long, caller_ip ) - __field(u8, btree_id ) - __field(u64, old_pos_inode ) - __field(u64, old_pos_offset ) - __field(u32, old_pos_snapshot ) - __field(u64, new_pos_inode ) - __field(u64, new_pos_offset ) - __field(u32, new_pos_snapshot ) - __field(u8, good_level ) - ), - - TP_fast_assign( - __entry->trans_ip = trans_ip; - __entry->caller_ip = caller_ip; - __entry->btree_id = btree_id; - __entry->old_pos_inode = old_pos->inode; - __entry->old_pos_offset = old_pos->offset; - __entry->old_pos_snapshot = old_pos->snapshot; - __entry->new_pos_inode = new_pos->inode; - __entry->new_pos_offset = new_pos->offset; - __entry->new_pos_snapshot = new_pos->snapshot; - __entry->good_level = good_level; - ), - - TP_printk("%ps %pS btree %u old pos %llu:%llu:%u new pos %llu:%llu:%u l %u", - (void *) __entry->trans_ip, - (void *) __entry->caller_ip, - __entry->btree_id, - __entry->old_pos_inode, - __entry->old_pos_offset, - __entry->old_pos_snapshot, - __entry->new_pos_inode, - __entry->new_pos_offset, - __entry->new_pos_snapshot, - __entry->good_level) -); - TRACE_EVENT(trans_restart_would_deadlock, TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, @@ -931,99 +756,42 @@ TRACE_EVENT(trans_restart_would_deadlock, __entry->want_pos_snapshot) ); -TRACE_EVENT(trans_restart_mem_realloced, - TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, - unsigned long bytes), - TP_ARGS(trans_ip, caller_ip, bytes), +TRACE_EVENT(trans_restart_would_deadlock_write, + TP_PROTO(unsigned long trans_ip), + TP_ARGS(trans_ip), TP_STRUCT__entry( __field(unsigned long, trans_ip ) - __field(unsigned long, caller_ip ) - __field(unsigned long, bytes ) ), TP_fast_assign( __entry->trans_ip = trans_ip; - __entry->caller_ip = caller_ip; - __entry->bytes = bytes; ), - TP_printk("%ps %pS bytes %lu", - (void *) __entry->trans_ip, - (void *) __entry->caller_ip, - __entry->bytes) + TP_printk("%ps", (void *) __entry->trans_ip) ); -DECLARE_EVENT_CLASS(node_lock_fail, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - bool key_cache, - enum btree_id btree_id, - struct bpos *pos, - unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, - level, iter_seq, node, node_seq), +TRACE_EVENT(trans_restart_mem_realloced, + TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, + unsigned long bytes), + TP_ARGS(trans_ip, caller_ip, bytes), TP_STRUCT__entry( __field(unsigned long, trans_ip ) __field(unsigned long, caller_ip ) - __field(u8, key_cache ) - __field(u8, btree_id ) - __field(u64, pos_inode ) - __field(u64, pos_offset ) - __field(u32, pos_snapshot ) - __field(u32, level ) - __field(u32, iter_seq ) - __field(u32, node ) - __field(u32, node_seq ) + __field(unsigned long, bytes ) ), TP_fast_assign( - __entry->trans_ip = trans_ip; - __entry->caller_ip = caller_ip; - __entry->key_cache = key_cache; - __entry->btree_id = btree_id; - __entry->pos_inode = pos->inode; - __entry->pos_offset = pos->offset; - __entry->pos_snapshot = pos->snapshot; - __entry->level = level; - __entry->iter_seq = iter_seq; - __entry->node = node; - __entry->node_seq = node_seq; + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; + __entry->bytes = bytes; ), - TP_printk("%ps %pS key cache %u btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u", + TP_printk("%ps %pS bytes %lu", (void *) __entry->trans_ip, (void *) __entry->caller_ip, - __entry->key_cache, - __entry->btree_id, - __entry->pos_inode, - __entry->pos_offset, - __entry->pos_snapshot, - __entry->level, __entry->iter_seq, - __entry->node, __entry->node_seq) -); - -DEFINE_EVENT(node_lock_fail, node_upgrade_fail, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - bool key_cache, - enum btree_id btree_id, - struct bpos *pos, - unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, - level, iter_seq, node, node_seq) -); - -DEFINE_EVENT(node_lock_fail, node_relock_fail, - TP_PROTO(unsigned long trans_ip, - unsigned long caller_ip, - bool key_cache, - enum btree_id btree_id, - struct bpos *pos, - unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, - level, iter_seq, node, node_seq) + __entry->bytes) ); #endif /* _TRACE_BCACHE_H */ diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index eb907e5..2146a63 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -218,7 +218,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type) struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bkey_s_c_xattr xattr; struct posix_acl *acl = NULL; struct bkey_s_c k; @@ -228,20 +228,19 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type) retry: bch2_trans_begin(&trans); - iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, + ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc, &hash, inode->v.i_ino, &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); - if (IS_ERR(iter)) { - if (PTR_ERR(iter) == -EINTR) + if (ret) { + if (ret == -EINTR) goto retry; - - if (PTR_ERR(iter) != -ENOENT) - acl = ERR_CAST(iter); + if (ret != -ENOENT) + acl = ERR_PTR(ret); goto out; } - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) { acl = ERR_PTR(ret); @@ -254,8 +253,8 @@ retry: if (!IS_ERR(acl)) set_cached_acl(&inode->v, type, acl); - bch2_trans_iter_put(&trans, iter); out: + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return acl; } @@ -296,7 +295,7 @@ int bch2_set_acl(struct user_namespace *mnt_userns, struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *inode_iter; + struct btree_iter inode_iter = { NULL }; struct bch_inode_unpacked inode_u; struct bch_hash_info hash_info; struct posix_acl *acl; @@ -309,9 +308,8 @@ retry: bch2_trans_begin(&trans); acl = _acl; - inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -332,11 +330,11 @@ retry: inode_u.bi_ctime = bch2_current_time(c); inode_u.bi_mode = mode; - ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: + ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, 0); btree_err: - bch2_trans_iter_put(&trans, inode_iter); + bch2_trans_iter_exit(&trans, &inode_iter); if (ret == -EINTR) goto retry; @@ -360,22 +358,21 @@ int bch2_acl_chmod(struct btree_trans *trans, struct posix_acl **new_acl) { struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode); - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c_xattr xattr; struct bkey_i_xattr *new; struct posix_acl *acl; struct bkey_s_c k; int ret; - iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, + ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash_info, inode->bi_inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(iter); if (ret) return ret == -ENOENT ? 0 : ret; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); xattr = bkey_s_c_to_xattr(k); if (ret) goto err; @@ -396,12 +393,12 @@ int bch2_acl_chmod(struct btree_trans *trans, goto err; } - new->k.p = iter->pos; - ret = bch2_trans_update(trans, iter, &new->k_i, 0); + new->k.p = iter.pos; + ret = bch2_trans_update(trans, &iter, &new->k_i, 0); *new_acl = acl; acl = NULL; err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); if (!IS_ERR_OR_NULL(acl)) kfree(acl); return ret; diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 886861a..87fa924 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -353,32 +353,32 @@ err: int bch2_alloc_write(struct bch_fs *c, unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bch_dev *ca; unsigned i; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); for_each_member_device(ca, c, i) { - bch2_btree_iter_set_pos(iter, + bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, ca->mi.first_bucket)); - while (iter->pos.offset < ca->mi.nbuckets) { + while (iter.pos.offset < ca->mi.nbuckets) { bch2_trans_cond_resched(&trans); - ret = bch2_alloc_write_key(&trans, iter, flags); + ret = bch2_alloc_write_key(&trans, &iter, flags); if (ret) { percpu_ref_put(&ca->ref); goto err; } - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } } err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -390,18 +390,18 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, dev); - struct btree_iter *iter; + struct btree_iter iter; struct bucket *g; struct bkey_alloc_buf *a; struct bkey_alloc_unpacked u; u64 *time, now; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, POS(dev, bucket_nr), - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr), + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter); if (ret) goto out; @@ -412,7 +412,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, percpu_down_read(&c->mark_lock); g = bucket(ca, bucket_nr); - u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark)); + u = alloc_mem_to_key(&iter, g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); time = rw == READ ? &u.read_time : &u.write_time; @@ -423,10 +423,10 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, *time = now; bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, iter, &a->k, 0) ?: + ret = bch2_trans_update(trans, &iter, &a->k, 0) ?: bch2_trans_commit(trans, NULL, NULL, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -695,27 +695,28 @@ static int bucket_invalidate_btree(struct btree_trans *trans, struct bkey_alloc_unpacked u; struct bucket *g; struct bucket_mark m; - struct btree_iter *iter = - bch2_trans_get_iter(trans, BTREE_ID_alloc, - POS(ca->dev_idx, b), - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); + struct btree_iter iter; int ret; + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + POS(ca->dev_idx, b), + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + a = bch2_trans_kmalloc(trans, sizeof(*a)); ret = PTR_ERR_OR_ZERO(a); if (ret) goto err; - ret = bch2_btree_iter_traverse(iter); + ret = bch2_btree_iter_traverse(&iter); if (ret) goto err; percpu_down_read(&c->mark_lock); g = bucket(ca, b); m = READ_ONCE(g->mark); - u = alloc_mem_to_key(iter, g, m); + u = alloc_mem_to_key(&iter, g, m); percpu_up_read(&c->mark_lock); u.gen++; @@ -726,10 +727,10 @@ static int bucket_invalidate_btree(struct btree_trans *trans, u.write_time = atomic64_read(&c->io_clock[WRITE].now); bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, iter, &a->k, + ret = bch2_trans_update(trans, &iter, &a->k, BTREE_TRIGGER_BUCKET_INVALIDATE); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 051aba6..9975fc1 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -557,8 +557,8 @@ struct journal_keys { u64 journal_seq_base; }; -struct btree_iter_buf { - struct btree_iter *iter; +struct btree_path_buf { + struct btree_path *path; }; #define REPLICAS_DELTA_LIST_MAX (1U << 16) @@ -666,9 +666,9 @@ struct bch_fs { /* btree_iter.c: */ struct mutex btree_trans_lock; struct list_head btree_trans_list; - mempool_t btree_iters_pool; + mempool_t btree_paths_pool; mempool_t btree_trans_mem_pool; - struct btree_iter_buf __percpu *btree_iters_bufs; + struct btree_path_buf __percpu *btree_paths_bufs; struct srcu_struct btree_trans_barrier; @@ -791,6 +791,10 @@ struct bch_fs { struct write_point copygc_write_point; s64 copygc_wait; + /* DATA PROGRESS STATS */ + struct list_head data_progress_list; + struct mutex data_progress_lock; + /* STRIPES: */ GENRADIX(struct stripe) stripes[2]; diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index 2e45d88..c4a66f2 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -163,37 +163,6 @@ static inline struct bpos bpos_max(struct bpos l, struct bpos r) return bpos_cmp(l, r) > 0 ? l : r; } -#define sbb(a, b, borrow) \ -do { \ - typeof(a) d1, d2; \ - \ - d1 = a - borrow; \ - borrow = d1 > a; \ - \ - d2 = d1 - b; \ - borrow += d2 > d1; \ - a = d2; \ -} while (0) - -/* returns a - b: */ -static inline struct bpos bpos_sub(struct bpos a, struct bpos b) -{ - int borrow = 0; - - sbb(a.snapshot, b.snapshot, borrow); - sbb(a.offset, b.offset, borrow); - sbb(a.inode, b.inode, borrow); - return a; -} - -static inline struct bpos bpos_diff(struct bpos l, struct bpos r) -{ - if (bpos_cmp(l, r) > 0) - swap(l, r); - - return bpos_sub(r, l); -} - void bch2_bpos_swab(struct bpos *); void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index f8adbf4..a03b551 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -215,6 +215,8 @@ void bch2_bpos_to_text(struct printbuf *out, struct bpos pos) pr_buf(out, "POS_MIN"); else if (!bpos_cmp(pos, POS_MAX)) pr_buf(out, "POS_MAX"); + else if (!bpos_cmp(pos, SPOS_MAX)) + pr_buf(out, "SPOS_MAX"); else { if (pos.inode == U64_MAX) pr_buf(out, "U64_MAX"); diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index 0eb85ac..59e4c1d 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -197,9 +197,11 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter, return; /* Verify no duplicates: */ - btree_node_iter_for_each(iter, set) + btree_node_iter_for_each(iter, set) { + BUG_ON(set->k > set->end); btree_node_iter_for_each(iter, s2) BUG_ON(set != s2 && set->end == s2->end); + } /* Verify that set->end is correct: */ btree_node_iter_for_each(iter, set) { diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index cd0c500..5f9ab81 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -128,7 +128,8 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { - rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); + int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); + BUG_ON(ret); /* Cause future lookups for this node to fail: */ b->hash_val = 0; @@ -632,7 +633,8 @@ err: /* Slowpath, don't want it inlined into btree_iter_traverse() */ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, - struct btree_iter *iter, + struct btree_trans *trans, + struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level, @@ -648,8 +650,8 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * Parent node must be locked, else we could read in a btree node that's * been freed: */ - if (iter && !bch2_btree_node_relock(iter, level + 1)) { - btree_trans_restart(iter->trans); + if (trans && !bch2_btree_node_relock(trans, path, level + 1)) { + btree_trans_restart(trans); return ERR_PTR(-EINTR); } @@ -680,23 +682,23 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, six_unlock_intent(&b->c.lock); /* Unlock before doing IO: */ - if (iter && sync) - bch2_trans_unlock(iter->trans); + if (trans && sync) + bch2_trans_unlock(trans); bch2_btree_node_read(c, b, sync); if (!sync) return NULL; - if (iter && - (!bch2_trans_relock(iter->trans) || - !bch2_btree_iter_relock_intent(iter))) { - BUG_ON(!iter->trans->restarted); + if (trans && + (!bch2_trans_relock(trans) || + !bch2_btree_path_relock_intent(trans, path))) { + BUG_ON(!trans->restarted); return ERR_PTR(-EINTR); } if (!six_relock_type(&b->c.lock, lock_type, seq)) { - btree_trans_restart(iter->trans); + btree_trans_restart(trans); return ERR_PTR(-EINTR); } @@ -754,7 +756,7 @@ static inline void btree_check_header(struct bch_fs *c, struct btree *b) * The btree node will have either a read or a write lock held, depending on * the @write parameter. */ -struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_iter *iter, +struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) @@ -779,7 +781,7 @@ retry: * else we could read in a btree node from disk that's been * freed: */ - b = bch2_btree_node_fill(c, iter, k, iter->btree_id, + b = bch2_btree_node_fill(c, trans, path, k, path->btree_id, level, lock_type, true); /* We raced and found the btree node in the cache */ @@ -818,10 +820,10 @@ lock_node: * the parent was modified, when the pointer to the node we want * was removed - and we'll bail out: */ - if (btree_node_read_locked(iter, level + 1)) - btree_node_unlock(iter, level + 1); + if (btree_node_read_locked(path, level + 1)) + btree_node_unlock(path, level + 1); - if (!btree_node_lock(b, k->k.p, level, iter, lock_type, + if (!btree_node_lock(trans, path, b, k->k.p, level, lock_type, lock_node_check_fn, (void *) k, trace_ip)) { if (!trans->restarted) goto retry; @@ -832,13 +834,13 @@ lock_node: b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); - if (bch2_btree_node_relock(iter, level + 1)) + if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; trace_trans_restart_btree_node_reused(trans->ip, trace_ip, - iter->btree_id, - &iter->real_pos); + path->btree_id, + &path->pos); btree_trans_restart(trans); return ERR_PTR(-EINTR); } @@ -853,12 +855,12 @@ lock_node: bch2_btree_node_wait_on_read(b); /* - * should_be_locked is not set on this iterator yet, so we need - * to relock it specifically: + * should_be_locked is not set on this path yet, so we need to + * relock it specifically: */ - if (iter && + if (trans && (!bch2_trans_relock(trans) || - !bch2_btree_iter_relock_intent(iter))) { + !bch2_btree_path_relock_intent(trans, path))) { BUG_ON(!trans->restarted); return ERR_PTR(-EINTR); } @@ -886,7 +888,7 @@ lock_node: return ERR_PTR(-EIO); } - EBUG_ON(b->c.btree_id != iter->btree_id); + EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); @@ -917,7 +919,7 @@ retry: if (nofill) goto out; - b = bch2_btree_node_fill(c, NULL, k, btree_id, + b = bch2_btree_node_fill(c, NULL, NULL, k, btree_id, level, SIX_LOCK_read, true); /* We raced and found the btree node in the cache */ @@ -975,21 +977,24 @@ out: return b; } -int bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, +int bch2_btree_node_prefetch(struct bch_fs *c, + struct btree_trans *trans, + struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level) { struct btree_cache *bc = &c->btree_cache; struct btree *b; - BUG_ON(iter && !btree_node_locked(iter, level + 1)); + BUG_ON(trans && !btree_node_locked(path, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); b = btree_cache_find(bc, k); if (b) return 0; - b = bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false); + b = bch2_btree_node_fill(c, trans, path, k, btree_id, + level, SIX_LOCK_read, false); return PTR_ERR_OR_ZERO(b); } diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index 5032293..402cec1 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -22,14 +22,14 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); -struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_iter *, +struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *, const struct bkey_i *, unsigned, enum six_lock_type, unsigned long); struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, enum btree_id, unsigned, bool); -int bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *, +int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *, const struct bkey_i *, enum btree_id, unsigned); void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 3dd1094..307f287 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -775,7 +775,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bool initial, bool metadata_only) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; unsigned depth = metadata_only ? 1 : bch2_expensive_debug_checks ? 0 @@ -800,13 +800,13 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, if (!initial) { if (max_stale > 64) - bch2_btree_node_rewrite(&trans, iter, + bch2_btree_node_rewrite(&trans, &iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); else if (!bch2_btree_gc_rewrite_disabled && (bch2_btree_gc_always_rewrite || max_stale > 16)) - bch2_btree_node_rewrite(&trans, iter, + bch2_btree_node_rewrite(&trans, &iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); @@ -814,7 +814,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bch2_trans_cond_resched(&trans); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -1414,7 +1414,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, bool metadata_only) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; size_t idx = 0; @@ -1480,7 +1480,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, } } fsck_err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); out: genradix_free(&c->reflink_gc_table); @@ -1512,7 +1512,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, bool metadata_only) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; int ret; @@ -1547,7 +1547,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, r->size = k.k->size; r->refcount = 0; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return 0; @@ -1722,7 +1722,7 @@ static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k) static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf sk; int ret = 0, commit_err = 0; @@ -1730,14 +1730,21 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, - BTREE_ITER_PREFETCH| - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS); + bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN, + BTREE_ITER_PREFETCH| + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS); - while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = bkey_err(k))) { - c->gc_gens_pos = iter->pos; + while ((bch2_trans_begin(&trans), + k = bch2_btree_iter_peek(&iter)).k) { + ret = bkey_err(k); + + if (ret == -EINTR) + continue; + if (ret) + break; + + c->gc_gens_pos = iter.pos; if (gc_btree_gens_key(c, k) && !commit_err) { bch2_bkey_buf_reassemble(&sk, c, k); @@ -1745,7 +1752,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) commit_err = - bch2_trans_update(&trans, iter, sk.k, 0) ?: + bch2_trans_update(&trans, &iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOWAIT| BTREE_INSERT_NOFAIL); @@ -1755,9 +1762,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) } } - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 40fa011..f11fcab 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -465,16 +465,13 @@ void bch2_btree_build_aux_trees(struct btree *b) * * Returns true if we sorted (i.e. invalidated iterators */ -void bch2_btree_init_next(struct btree_trans *trans, - struct btree_iter *iter, - struct btree *b) +void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) { struct bch_fs *c = trans->c; struct btree_node_entry *bne; bool reinit_iter = false; EBUG_ON(!(b->c.lock.state.seq & 1)); - EBUG_ON(iter && iter->l[b->c.level].b != b); BUG_ON(bset_written(b, bset(b, &b->set[1]))); if (b->nsets == MAX_BSETS && @@ -503,8 +500,8 @@ void bch2_btree_init_next(struct btree_trans *trans, bch2_btree_build_aux_trees(b); - if (iter && reinit_iter) - bch2_btree_iter_reinit_node(iter, b); + if (reinit_iter) + bch2_trans_node_reinit_iter(trans, b); } static void btree_pos_to_text(struct printbuf *out, struct bch_fs *c, @@ -1260,7 +1257,7 @@ static void btree_node_read_all_replicas_done(struct closure *cl) bool dump_bset_maps = false; bool have_retry = false; int ret = 0, best = -1, write = READ; - unsigned i, written, written2; + unsigned i, written = 0, written2 = 0; __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 7fdcf87..0f20224 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -134,8 +134,7 @@ void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *); void bch2_btree_node_drop_keys_outside_node(struct btree *); void bch2_btree_build_aux_trees(struct btree *); -void bch2_btree_init_next(struct btree_trans *, struct btree_iter *, - struct btree *); +void bch2_btree_init_next(struct btree_trans *, struct btree *); int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, struct btree *, bool); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index fe710d1..ce4d7c7 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -17,26 +17,35 @@ #include #include -static void btree_iter_set_search_pos(struct btree_iter *, struct bpos); -static void btree_trans_sort_iters(struct btree_trans *); -static void btree_iter_check_sort(struct btree_trans *, struct btree_iter *); -static struct btree_iter *btree_iter_child_alloc(struct btree_iter *, unsigned long); -static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *, - struct btree_iter *); -static void btree_iter_copy(struct btree_iter *, struct btree_iter *); +static void btree_trans_verify_sorted(struct btree_trans *); +static void btree_path_check_sort(struct btree_trans *, struct btree_path *, int); -static inline int btree_iter_cmp(const struct btree_iter *l, - const struct btree_iter *r) +static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); +static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, + struct btree_path *); + +static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *); + +static inline int __btree_path_cmp(const struct btree_path *l, + enum btree_id r_btree_id, + bool r_cached, + struct bpos r_pos, + unsigned r_level) { - return cmp_int(l->btree_id, r->btree_id) ?: - -cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?: - bkey_cmp(l->real_pos, r->real_pos); + return cmp_int(l->btree_id, r_btree_id) ?: + cmp_int(l->cached, r_cached) ?: + bpos_cmp(l->pos, r_pos) ?: + -cmp_int(l->level, r_level); } -static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) +static inline int btree_path_cmp(const struct btree_path *l, + const struct btree_path *r) { - EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES); + return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level); +} +static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) +{ /* Are we iterating over keys in all snapshots? */ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { p = bpos_successor(p); @@ -50,8 +59,6 @@ static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p) { - EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES); - /* Are we iterating over keys in all snapshots? */ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { p = bpos_predecessor(p); @@ -63,10 +70,10 @@ static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos return p; } -static inline bool is_btree_node(struct btree_iter *iter, unsigned l) +static inline bool is_btree_node(struct btree_path *path, unsigned l) { return l < BTREE_MAX_DEPTH && - (unsigned long) iter->l[l].b >= 128; + (unsigned long) path->l[l].b >= 128; } static inline struct bpos btree_iter_search_key(struct btree_iter *iter) @@ -79,41 +86,40 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter) return pos; } -static inline bool btree_iter_pos_before_node(struct btree_iter *iter, +static inline bool btree_path_pos_before_node(struct btree_path *path, struct btree *b) { - return bpos_cmp(iter->real_pos, b->data->min_key) < 0; + return bpos_cmp(path->pos, b->data->min_key) < 0; } -static inline bool btree_iter_pos_after_node(struct btree_iter *iter, +static inline bool btree_path_pos_after_node(struct btree_path *path, struct btree *b) { - return bpos_cmp(b->key.k.p, iter->real_pos) < 0; + return bpos_cmp(b->key.k.p, path->pos) < 0; } -static inline bool btree_iter_pos_in_node(struct btree_iter *iter, +static inline bool btree_path_pos_in_node(struct btree_path *path, struct btree *b) { - return iter->btree_id == b->c.btree_id && - !btree_iter_pos_before_node(iter, b) && - !btree_iter_pos_after_node(iter, b); + return path->btree_id == b->c.btree_id && + !btree_path_pos_before_node(path, b) && + !btree_path_pos_after_node(path, b); } /* Btree node locking: */ -void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter) +void bch2_btree_node_unlock_write(struct btree_trans *trans, + struct btree_path *path, struct btree *b) { - bch2_btree_node_unlock_write_inlined(b, iter); + bch2_btree_node_unlock_write_inlined(trans, path, b); } -void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) +void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) { - struct btree_iter *linked; + struct btree_path *linked; unsigned readers = 0; - EBUG_ON(!btree_node_intent_locked(iter, b->c.level)); - - trans_for_each_iter(iter->trans, linked) + trans_for_each_path(trans, linked) if (linked->l[b->c.level].b == b && btree_node_read_locked(linked, b->c.level)) readers++; @@ -126,138 +132,132 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) */ atomic64_sub(__SIX_VAL(read_lock, readers), &b->c.lock.state.counter); - btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write); + btree_node_lock_type(trans->c, b, SIX_LOCK_write); atomic64_add(__SIX_VAL(read_lock, readers), &b->c.lock.state.counter); } -bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level) +bool __bch2_btree_node_relock(struct btree_trans *trans, + struct btree_path *path, unsigned level) { - struct btree *b = btree_iter_node(iter, level); - int want = __btree_lock_want(iter, level); + struct btree *b = btree_path_node(path, level); + int want = __btree_lock_want(path, level); - if (!is_btree_node(iter, level)) + if (!is_btree_node(path, level)) return false; if (race_fault()) return false; - if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) || - (btree_node_lock_seq_matches(iter, b, level) && - btree_node_lock_increment(iter->trans, b, level, want))) { - mark_btree_node_locked(iter, level, want); + if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || + (btree_node_lock_seq_matches(path, b, level) && + btree_node_lock_increment(trans, b, level, want))) { + mark_btree_node_locked(trans, path, level, want); return true; } else { return false; } } -static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level) +static bool bch2_btree_node_upgrade(struct btree_trans *trans, + struct btree_path *path, unsigned level) { - struct btree *b = iter->l[level].b; + struct btree *b = path->l[level].b; - EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED); + EBUG_ON(btree_lock_want(path, level) != BTREE_NODE_INTENT_LOCKED); - if (!is_btree_node(iter, level)) + if (!is_btree_node(path, level)) return false; - if (btree_node_intent_locked(iter, level)) + if (btree_node_intent_locked(path, level)) return true; if (race_fault()) return false; - if (btree_node_locked(iter, level) + if (btree_node_locked(path, level) ? six_lock_tryupgrade(&b->c.lock) - : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq)) + : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) goto success; - if (btree_node_lock_seq_matches(iter, b, level) && - btree_node_lock_increment(iter->trans, b, level, BTREE_NODE_INTENT_LOCKED)) { - btree_node_unlock(iter, level); + if (btree_node_lock_seq_matches(path, b, level) && + btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) { + btree_node_unlock(path, level); goto success; } return false; success: - mark_btree_node_intent_locked(iter, level); + mark_btree_node_intent_locked(trans, path, level); return true; } -static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade, - unsigned long trace_ip) +static inline bool btree_path_get_locks(struct btree_trans *trans, + struct btree_path *path, + bool upgrade, unsigned long trace_ip) { - unsigned l = iter->level; + unsigned l = path->level; int fail_idx = -1; do { - if (!btree_iter_node(iter, l)) + if (!btree_path_node(path, l)) break; if (!(upgrade - ? bch2_btree_node_upgrade(iter, l) - : bch2_btree_node_relock(iter, l))) { - (upgrade - ? trace_node_upgrade_fail - : trace_node_relock_fail)(iter->trans->ip, trace_ip, - btree_iter_type(iter) == BTREE_ITER_CACHED, - iter->btree_id, &iter->real_pos, - l, iter->l[l].lock_seq, - is_btree_node(iter, l) - ? 0 - : (unsigned long) iter->l[l].b, - is_btree_node(iter, l) - ? iter->l[l].b->c.lock.state.seq - : 0); + ? bch2_btree_node_upgrade(trans, path, l) + : bch2_btree_node_relock(trans, path, l))) fail_idx = l; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - } l++; - } while (l < iter->locks_want); + } while (l < path->locks_want); /* * When we fail to get a lock, we have to ensure that any child nodes - * can't be relocked so bch2_btree_iter_traverse has to walk back up to + * can't be relocked so bch2_btree_path_traverse has to walk back up to * the node that we failed to relock: */ - while (fail_idx >= 0) { - btree_node_unlock(iter, fail_idx); - iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; - --fail_idx; + if (fail_idx >= 0) { + __bch2_btree_path_unlock(path); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + + do { + path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; + --fail_idx; + } while (fail_idx >= 0); } - if (iter->uptodate == BTREE_ITER_NEED_RELOCK) - iter->uptodate = BTREE_ITER_NEED_PEEK; + if (path->uptodate == BTREE_ITER_NEED_RELOCK) + path->uptodate = BTREE_ITER_UPTODATE; - bch2_btree_trans_verify_locks(iter->trans); + bch2_trans_verify_locks(trans); - return iter->uptodate < BTREE_ITER_NEED_RELOCK; + return path->uptodate < BTREE_ITER_NEED_RELOCK; } static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, - enum btree_iter_type type) + bool cached) { - return type != BTREE_ITER_CACHED + return !cached ? container_of(_b, struct btree, c)->key.k.p : container_of(_b, struct bkey_cached, c)->key.pos; } /* Slowpath: */ -bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, - unsigned level, struct btree_iter *iter, +bool __bch2_btree_node_lock(struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct bpos pos, unsigned level, enum six_lock_type type, six_lock_should_sleep_fn should_sleep_fn, void *p, unsigned long ip) { - struct btree_trans *trans = iter->trans; - struct btree_iter *linked, *deadlock_iter = NULL; + struct btree_path *linked, *deadlock_path = NULL; u64 start_time = local_clock(); unsigned reason = 9; bool ret; /* Check if it's safe to block: */ - trans_for_each_iter(trans, linked) { + trans_for_each_path(trans, linked) { if (!linked->nodes_locked) continue; @@ -275,25 +275,25 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, */ if (type == SIX_LOCK_intent && linked->nodes_locked != linked->nodes_intent_locked) { - deadlock_iter = linked; + deadlock_path = linked; reason = 1; } - if (linked->btree_id != iter->btree_id) { - if (linked->btree_id > iter->btree_id) { - deadlock_iter = linked; + if (linked->btree_id != path->btree_id) { + if (linked->btree_id > path->btree_id) { + deadlock_path = linked; reason = 3; } continue; } /* - * Within the same btree, cached iterators come before non - * cached iterators: + * Within the same btree, cached paths come before non + * cached paths: */ - if (btree_iter_is_cached(linked) != btree_iter_is_cached(iter)) { - if (btree_iter_is_cached(iter)) { - deadlock_iter = linked; + if (linked->cached != path->cached) { + if (path->cached) { + deadlock_path = linked; reason = 4; } continue; @@ -301,32 +301,32 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, /* * Interior nodes must be locked before their descendants: if - * another iterator has possible descendants locked of the node + * another path has possible descendants locked of the node * we're about to lock, it must have the ancestors locked too: */ if (level > __fls(linked->nodes_locked)) { - deadlock_iter = linked; + deadlock_path = linked; reason = 5; } /* Must lock btree nodes in key order: */ if (btree_node_locked(linked, level) && bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b, - btree_iter_type(linked))) <= 0) { - deadlock_iter = linked; + linked->cached)) <= 0) { + deadlock_path = linked; reason = 7; BUG_ON(trans->in_traverse_all); } } - if (unlikely(deadlock_iter)) { + if (unlikely(deadlock_path)) { trace_trans_restart_would_deadlock(trans->ip, ip, trans->in_traverse_all, reason, - deadlock_iter->btree_id, - btree_iter_type(deadlock_iter), - &deadlock_iter->real_pos, - iter->btree_id, - btree_iter_type(iter), + deadlock_path->btree_id, + deadlock_path->cached, + &deadlock_path->pos, + path->btree_id, + path->cached, &pos); btree_trans_restart(trans); return false; @@ -336,9 +336,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, return true; #ifdef CONFIG_BCACHEFS_DEBUG - trans->locking_iter_idx = iter->idx; + trans->locking_path_idx = path->idx; trans->locking_pos = pos; - trans->locking_btree_id = iter->btree_id; + trans->locking_btree_id = path->btree_id; trans->locking_level = level; trans->locking = b; #endif @@ -357,59 +357,49 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, /* Btree iterator locking: */ #ifdef CONFIG_BCACHEFS_DEBUG -static void bch2_btree_iter_verify_locks(struct btree_iter *iter) + +static void bch2_btree_path_verify_locks(struct btree_path *path) { unsigned l; - if (!(iter->trans->iters_linked & (1ULL << iter->idx))) { - BUG_ON(iter->nodes_locked); + if (!path->nodes_locked) { + BUG_ON(path->uptodate == BTREE_ITER_UPTODATE); return; } - for (l = 0; btree_iter_node(iter, l); l++) { - if (iter->uptodate >= BTREE_ITER_NEED_RELOCK && - !btree_node_locked(iter, l)) - continue; - - BUG_ON(btree_lock_want(iter, l) != - btree_node_locked_type(iter, l)); - } + for (l = 0; btree_path_node(path, l); l++) + BUG_ON(btree_lock_want(path, l) != + btree_node_locked_type(path, l)); } -void bch2_btree_trans_verify_locks(struct btree_trans *trans) +void bch2_trans_verify_locks(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - bch2_btree_iter_verify_locks(iter); + trans_for_each_path(trans, path) + bch2_btree_path_verify_locks(path); } #else -static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} +static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} #endif +/* Btree path locking: */ + /* * Only for btree_cache.c - only relocks intent locks */ -bool bch2_btree_iter_relock_intent(struct btree_iter *iter) +bool bch2_btree_path_relock_intent(struct btree_trans *trans, + struct btree_path *path) { unsigned l; - for (l = iter->level; - l < iter->locks_want && btree_iter_node(iter, l); + for (l = path->level; + l < path->locks_want && btree_path_node(path, l); l++) { - if (!bch2_btree_node_relock(iter, l)) { - trace_node_relock_fail(iter->trans->ip, _RET_IP_, - btree_iter_type(iter) == BTREE_ITER_CACHED, - iter->btree_id, &iter->real_pos, - l, iter->l[l].lock_seq, - is_btree_node(iter, l) - ? 0 - : (unsigned long) iter->l[l].b, - is_btree_node(iter, l) - ? iter->l[l].b->c.lock.state.seq - : 0); - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - btree_trans_restart(iter->trans); + if (!bch2_btree_node_relock(trans, path, l)) { + __bch2_btree_path_unlock(path); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + btree_trans_restart(trans); return false; } } @@ -418,25 +408,27 @@ bool bch2_btree_iter_relock_intent(struct btree_iter *iter) } __flatten -bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip) +static bool bch2_btree_path_relock(struct btree_trans *trans, + struct btree_path *path, unsigned long trace_ip) { - bool ret = btree_iter_get_locks(iter, false, trace_ip); + bool ret = btree_path_get_locks(trans, path, false, trace_ip); if (!ret) - btree_trans_restart(iter->trans); + btree_trans_restart(trans); return ret; } -bool __bch2_btree_iter_upgrade(struct btree_iter *iter, +bool __bch2_btree_path_upgrade(struct btree_trans *trans, + struct btree_path *path, unsigned new_locks_want) { - struct btree_iter *linked; + struct btree_path *linked; - EBUG_ON(iter->locks_want >= new_locks_want); + EBUG_ON(path->locks_want >= new_locks_want); - iter->locks_want = new_locks_want; + path->locks_want = new_locks_want; - if (btree_iter_get_locks(iter, true, _THIS_IP_)) + if (btree_path_get_locks(trans, path, true, _THIS_IP_)) return true; /* @@ -444,7 +436,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, * iterators in the btree_trans here. * * On failure to upgrade the iterator, setting iter->locks_want and - * calling get_locks() is sufficient to make bch2_btree_iter_traverse() + * calling get_locks() is sufficient to make bch2_btree_path_traverse() * get the locks we want on transaction restart. * * But if this iterator was a clone, on transaction restart what we did @@ -456,75 +448,67 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, * * The code below used to be needed to ensure ancestor nodes get locked * before interior nodes - now that's handled by - * bch2_btree_iter_traverse_all(). + * bch2_btree_path_traverse_all(). */ - trans_for_each_iter(iter->trans, linked) - if (linked != iter && - btree_iter_type(linked) == btree_iter_type(iter) && - linked->btree_id == iter->btree_id && + trans_for_each_path(trans, linked) + if (linked != path && + linked->cached == path->cached && + linked->btree_id == path->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; - btree_iter_get_locks(linked, true, _THIS_IP_); + btree_path_get_locks(trans, linked, true, _THIS_IP_); } - if (iter->should_be_locked) - btree_trans_restart(iter->trans); return false; } -void __bch2_btree_iter_downgrade(struct btree_iter *iter, +void __bch2_btree_path_downgrade(struct btree_path *path, unsigned new_locks_want) { unsigned l; - EBUG_ON(iter->locks_want < new_locks_want); + EBUG_ON(path->locks_want < new_locks_want); - iter->locks_want = new_locks_want; + path->locks_want = new_locks_want; - while (iter->nodes_locked && - (l = __fls(iter->nodes_locked)) >= iter->locks_want) { - if (l > iter->level) { - btree_node_unlock(iter, l); + while (path->nodes_locked && + (l = __fls(path->nodes_locked)) >= path->locks_want) { + if (l > path->level) { + btree_node_unlock(path, l); } else { - if (btree_node_intent_locked(iter, l)) { - six_lock_downgrade(&iter->l[l].b->c.lock); - iter->nodes_intent_locked ^= 1 << l; + if (btree_node_intent_locked(path, l)) { + six_lock_downgrade(&path->l[l].b->c.lock); + path->nodes_intent_locked ^= 1 << l; } break; } } - bch2_btree_trans_verify_locks(iter->trans); + bch2_btree_path_verify_locks(path); } void bch2_trans_downgrade(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - bch2_btree_iter_downgrade(iter); + trans_for_each_path(trans, path) + bch2_btree_path_downgrade(path); } /* Btree transaction locking: */ -static inline bool btree_iter_should_be_locked(struct btree_iter *iter) -{ - return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) || - iter->should_be_locked; -} - bool bch2_trans_relock(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; if (unlikely(trans->restarted)) return false; - trans_for_each_iter(trans, iter) - if (btree_iter_should_be_locked(iter) && - !bch2_btree_iter_relock(iter, _RET_IP_)) { + trans_for_each_path(trans, path) + if (path->should_be_locked && + !bch2_btree_path_relock(trans, path, _RET_IP_)) { trace_trans_restart_relock(trans->ip, _RET_IP_, - iter->btree_id, &iter->real_pos); + path->btree_id, &path->pos); BUG_ON(!trans->restarted); return false; } @@ -533,10 +517,10 @@ bool bch2_trans_relock(struct btree_trans *trans) void bch2_trans_unlock(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - __bch2_btree_iter_unlock(iter); + trans_for_each_path(trans, path) + __bch2_btree_path_unlock(path); BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); } @@ -545,26 +529,27 @@ void bch2_trans_unlock(struct btree_trans *trans) #ifdef CONFIG_BCACHEFS_DEBUG -static void bch2_btree_iter_verify_cached(struct btree_iter *iter) +static void bch2_btree_path_verify_cached(struct btree_trans *trans, + struct btree_path *path) { struct bkey_cached *ck; - bool locked = btree_node_locked(iter, 0); + bool locked = btree_node_locked(path, 0); - if (!bch2_btree_node_relock(iter, 0)) + if (!bch2_btree_node_relock(trans, path, 0)) return; - ck = (void *) iter->l[0].b; - BUG_ON(ck->key.btree_id != iter->btree_id || - bkey_cmp(ck->key.pos, iter->pos)); + ck = (void *) path->l[0].b; + BUG_ON(ck->key.btree_id != path->btree_id || + bkey_cmp(ck->key.pos, path->pos)); if (!locked) - btree_node_unlock(iter, 0); + btree_node_unlock(path, 0); } -static void bch2_btree_iter_verify_level(struct btree_iter *iter, - unsigned level) +static void bch2_btree_path_verify_level(struct btree_trans *trans, + struct btree_path *path, unsigned level) { - struct btree_iter_level *l; + struct btree_path_level *l; struct btree_node_iter tmp; bool locked; struct bkey_packed *p, *k; @@ -574,65 +559,52 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter, if (!bch2_debug_check_iterators) return; - l = &iter->l[level]; + l = &path->l[level]; tmp = l->iter; - locked = btree_node_locked(iter, level); + locked = btree_node_locked(path, level); - if (btree_iter_type(iter) == BTREE_ITER_CACHED) { + if (path->cached) { if (!level) - bch2_btree_iter_verify_cached(iter); + bch2_btree_path_verify_cached(trans, path); return; } - BUG_ON(iter->level < iter->min_depth); - - if (!btree_iter_node(iter, level)) + if (!btree_path_node(path, level)) return; - if (!bch2_btree_node_relock(iter, level)) + if (!bch2_btree_node_relock(trans, path, level)) return; - BUG_ON(!btree_iter_pos_in_node(iter, l->b)); - - /* - * node iterators don't use leaf node iterator: - */ - if (btree_iter_type(iter) == BTREE_ITER_NODES && - level <= iter->min_depth) - goto unlock; + BUG_ON(!btree_path_pos_in_node(path, l->b)); bch2_btree_node_iter_verify(&l->iter, l->b); /* - * For interior nodes, the iterator will have skipped past - * deleted keys: - * - * For extents, the iterator may have skipped past deleted keys (but not - * whiteouts) + * For interior nodes, the iterator will have skipped past deleted keys: */ - p = level || btree_node_type_is_extents(iter->btree_id) + p = level ? bch2_btree_node_iter_prev(&tmp, l->b) : bch2_btree_node_iter_prev_all(&tmp, l->b); k = bch2_btree_node_iter_peek_all(&l->iter, l->b); - if (p && bkey_iter_pos_cmp(l->b, p, &iter->real_pos) >= 0) { + if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) { msg = "before"; goto err; } - if (k && bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) { + if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { msg = "after"; goto err; } -unlock: + if (!locked) - btree_node_unlock(iter, level); + btree_node_unlock(path, level); return; err: strcpy(buf2, "(none)"); strcpy(buf3, "(none)"); - bch2_bpos_to_text(&PBUF(buf1), iter->real_pos); + bch2_bpos_to_text(&PBUF(buf1), path->pos); if (p) { struct bkey uk = bkey_unpack_key(l->b, p); @@ -644,79 +616,84 @@ err: bch2_bkey_to_text(&PBUF(buf3), &uk); } - panic("iterator should be %s key at level %u:\n" - "iter pos %s\n" + panic("path should be %s key at level %u:\n" + "path pos %s\n" "prev key %s\n" "cur key %s\n", msg, level, buf1, buf2, buf3); } -static void bch2_btree_iter_verify(struct btree_iter *iter) +static void bch2_btree_path_verify(struct btree_trans *trans, + struct btree_path *path) { - struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; - enum btree_iter_type type = btree_iter_type(iter); unsigned i; - EBUG_ON(iter->btree_id >= BTREE_ID_NR); - - BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && - iter->pos.snapshot != iter->snapshot); - - BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && - (iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); - - BUG_ON(type == BTREE_ITER_NODES && - !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); - - BUG_ON(type != BTREE_ITER_NODES && - (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && - !btree_type_has_snapshots(iter->btree_id)); + EBUG_ON(path->btree_id >= BTREE_ID_NR); - for (i = 0; i < (type != BTREE_ITER_CACHED ? BTREE_MAX_DEPTH : 1); i++) { - if (!iter->l[i].b) { - BUG_ON(c->btree_roots[iter->btree_id].b->c.level > i); + for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { + if (!path->l[i].b) { + BUG_ON(c->btree_roots[path->btree_id].b->c.level > i); break; } - bch2_btree_iter_verify_level(iter, i); + bch2_btree_path_verify_level(trans, path, i); } - bch2_btree_iter_verify_locks(iter); + bch2_btree_path_verify_locks(path); } -static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) +void bch2_trans_verify_paths(struct btree_trans *trans) +{ + struct btree_path *path; + + trans_for_each_path(trans, path) + bch2_btree_path_verify(trans, path); +} + +static void bch2_btree_iter_verify(struct btree_iter *iter) { - enum btree_iter_type type = btree_iter_type(iter); + struct btree_trans *trans = iter->trans; + + BUG_ON(iter->btree_id >= BTREE_ID_NR); + + BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached); BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && iter->pos.snapshot != iter->snapshot); - BUG_ON((type == BTREE_ITER_KEYS || - type == BTREE_ITER_CACHED) && - (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || - bkey_cmp(iter->pos, iter->k.p) > 0)); + BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && + (iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); + + BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) && + (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && + !btree_type_has_snapshots(iter->btree_id)); + + bch2_btree_path_verify(trans, iter->path); } -void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b) +static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) { - struct btree_iter *iter; - - if (!bch2_debug_check_iterators) - return; + BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && + iter->pos.snapshot != iter->snapshot); - trans_for_each_iter_with_node(trans, b, iter) - bch2_btree_iter_verify_level(iter, b->c.level); + BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || + bkey_cmp(iter->pos, iter->k.p) > 0); } #else -static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {} +static inline void bch2_btree_path_verify_level(struct btree_trans *trans, + struct btree_path *path, unsigned l) {} +static inline void bch2_btree_path_verify(struct btree_trans *trans, + struct btree_path *path) {} static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} #endif +/* Btree path: fixups after btree updates */ + static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, struct btree *b, struct bset_tree *t, @@ -734,40 +711,38 @@ static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t)); } -static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter, +static void __bch2_btree_path_fix_key_modified(struct btree_path *path, struct btree *b, struct bkey_packed *where) { - struct btree_iter_level *l = &iter->l[b->c.level]; + struct btree_path_level *l = &path->l[b->c.level]; if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b)) return; - if (bkey_iter_pos_cmp(l->b, where, &iter->real_pos) < 0) + if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0) bch2_btree_node_iter_advance(&l->iter, l->b); - - btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); } -void bch2_btree_iter_fix_key_modified(struct btree_iter *iter, +void bch2_btree_path_fix_key_modified(struct btree_trans *trans, struct btree *b, struct bkey_packed *where) { - struct btree_iter *linked; + struct btree_path *path; - trans_for_each_iter_with_node(iter->trans, b, linked) { - __bch2_btree_iter_fix_key_modified(linked, b, where); - bch2_btree_iter_verify_level(linked, b->c.level); + trans_for_each_path_with_node(trans, b, path) { + __bch2_btree_path_fix_key_modified(path, b, where); + bch2_btree_path_verify_level(trans, path, b->c.level); } } -static void __bch2_btree_node_iter_fix(struct btree_iter *iter, - struct btree *b, - struct btree_node_iter *node_iter, - struct bset_tree *t, - struct bkey_packed *where, - unsigned clobber_u64s, - unsigned new_u64s) +static void __bch2_btree_node_iter_fix(struct btree_path *path, + struct btree *b, + struct btree_node_iter *node_iter, + struct bset_tree *t, + struct bkey_packed *where, + unsigned clobber_u64s, + unsigned new_u64s) { const struct bkey_packed *end = btree_bkey_last(b, t); struct btree_node_iter_set *set; @@ -785,7 +760,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, /* didn't find the bset in the iterator - might have to readd it: */ if (new_u64s && - bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) { + bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { bch2_btree_node_iter_push(node_iter, b, where, end); goto fixup_done; } else { @@ -800,7 +775,7 @@ found: return; if (new_u64s && - bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) { + bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { set->k = offset; } else if (set->k < offset + clobber_u64s) { set->k = offset + new_u64s; @@ -826,8 +801,7 @@ fixup_done: */ if (!bch2_btree_node_iter_end(node_iter) && iter_current_key_modified && - (b->c.level || - btree_node_type_is_extents(iter->btree_id))) { + b->c.level) { struct bset_tree *t; struct bkey_packed *k, *k2, *p; @@ -852,14 +826,10 @@ fixup_done: b, t, k2); } } - - if (!b->c.level && - node_iter == &iter->l[0].iter && - iter_current_key_modified) - btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); } -void bch2_btree_node_iter_fix(struct btree_iter *iter, +void bch2_btree_node_iter_fix(struct btree_trans *trans, + struct btree_path *path, struct btree *b, struct btree_node_iter *node_iter, struct bkey_packed *where, @@ -867,26 +837,28 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter, unsigned new_u64s) { struct bset_tree *t = bch2_bkey_to_bset(b, where); - struct btree_iter *linked; + struct btree_path *linked; - if (node_iter != &iter->l[b->c.level].iter) { - __bch2_btree_node_iter_fix(iter, b, node_iter, t, + if (node_iter != &path->l[b->c.level].iter) { + __bch2_btree_node_iter_fix(path, b, node_iter, t, where, clobber_u64s, new_u64s); if (bch2_debug_check_iterators) bch2_btree_node_iter_verify(node_iter, b); } - trans_for_each_iter_with_node(iter->trans, b, linked) { + trans_for_each_path_with_node(trans, b, linked) { __bch2_btree_node_iter_fix(linked, b, &linked->l[b->c.level].iter, t, where, clobber_u64s, new_u64s); - bch2_btree_iter_verify_level(linked, b->c.level); + bch2_btree_path_verify_level(trans, linked, b->c.level); } } -static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, - struct btree_iter_level *l, +/* Btree path level: pointer to a particular btree node and node iter */ + +static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c, + struct btree_path_level *l, struct bkey *u, struct bkey_packed *k) { @@ -911,48 +883,52 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, * assertion here: */ if (bch2_debug_check_bkeys && !bkey_deleted(ret.k)) - bch2_bkey_debugcheck(iter->trans->c, l->b, ret); + bch2_bkey_debugcheck(c, l->b, ret); return ret; } -/* peek_all() doesn't skip deleted keys */ -static inline struct bkey_s_c btree_iter_level_peek_all(struct btree_iter *iter, - struct btree_iter_level *l) +static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c, + struct btree_path_level *l, + struct bkey *u) { - return __btree_iter_unpack(iter, l, &iter->k, + return __btree_iter_unpack(c, l, u, bch2_btree_node_iter_peek_all(&l->iter, l->b)); } -static inline struct bkey_s_c btree_iter_level_peek(struct btree_iter *iter, - struct btree_iter_level *l) +static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans, + struct btree_path *path, + struct btree_path_level *l, + struct bkey *u) { - struct bkey_s_c k = __btree_iter_unpack(iter, l, &iter->k, + struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, bch2_btree_node_iter_peek(&l->iter, l->b)); - iter->real_pos = k.k ? k.k->p : l->b->key.k.p; + path->pos = k.k ? k.k->p : l->b->key.k.p; return k; } -static inline struct bkey_s_c btree_iter_level_prev(struct btree_iter *iter, - struct btree_iter_level *l) +static inline struct bkey_s_c btree_path_level_prev(struct bch_fs *c, + struct btree_path *path, + struct btree_path_level *l, + struct bkey *u) { - struct bkey_s_c k = __btree_iter_unpack(iter, l, &iter->k, + struct bkey_s_c k = __btree_iter_unpack(c, l, u, bch2_btree_node_iter_prev(&l->iter, l->b)); - iter->real_pos = k.k ? k.k->p : l->b->data->min_key; + path->pos = k.k ? k.k->p : l->b->data->min_key; return k; } -static inline bool btree_iter_advance_to_pos(struct btree_iter *iter, - struct btree_iter_level *l, +static inline bool btree_path_advance_to_pos(struct btree_path *path, + struct btree_path_level *l, int max_advance) { struct bkey_packed *k; int nr_advanced = 0; while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && - bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) { + bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { if (max_advance > 0 && nr_advanced >= max_advance) return false; @@ -966,9 +942,10 @@ static inline bool btree_iter_advance_to_pos(struct btree_iter *iter, /* * Verify that iterator for parent node points to child node: */ -static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) +static void btree_path_verify_new_node(struct btree_trans *trans, + struct btree_path *path, struct btree *b) { - struct btree_iter_level *l; + struct btree_path_level *l; unsigned plevel; bool parent_locked; struct bkey_packed *k; @@ -977,15 +954,15 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) return; plevel = b->c.level + 1; - if (!btree_iter_node(iter, plevel)) + if (!btree_path_node(path, plevel)) return; - parent_locked = btree_node_locked(iter, plevel); + parent_locked = btree_node_locked(path, plevel); - if (!bch2_btree_node_relock(iter, plevel)) + if (!bch2_btree_node_relock(trans, path, plevel)) return; - l = &iter->l[plevel]; + l = &path->l[plevel]; k = bch2_btree_node_iter_peek_all(&l->iter, l->b); if (!k || bkey_deleted(k) || @@ -996,8 +973,8 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) char buf4[100]; struct bkey uk = bkey_unpack_key(b, k); - bch2_dump_btree_node(iter->trans->c, l->b); - bch2_bpos_to_text(&PBUF(buf1), iter->real_pos); + bch2_dump_btree_node(trans->c, l->b); + bch2_bpos_to_text(&PBUF(buf1), path->pos); bch2_bkey_to_text(&PBUF(buf2), &uk); bch2_bpos_to_text(&PBUF(buf3), b->data->min_key); bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); @@ -1005,20 +982,20 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) "iter pos %s %s\n" "iter key %s\n" "new node %s-%s\n", - bch2_btree_ids[iter->btree_id], buf1, + bch2_btree_ids[path->btree_id], buf1, buf2, buf3, buf4); } if (!parent_locked) - btree_node_unlock(iter, b->c.level + 1); + btree_node_unlock(path, plevel); } -static inline void __btree_iter_init(struct btree_iter *iter, - unsigned level) +static inline void __btree_path_level_init(struct btree_path *path, + unsigned level) { - struct btree_iter_level *l = &iter->l[level]; + struct btree_path_level *l = &path->l[level]; - bch2_btree_node_iter_init(&l->iter, l->b, &iter->real_pos); + bch2_btree_node_iter_init(&l->iter, l->b, &path->pos); /* * Iterators to interior nodes should always be pointed at the first non @@ -1026,63 +1003,48 @@ static inline void __btree_iter_init(struct btree_iter *iter, */ if (level) bch2_btree_node_iter_peek(&l->iter, l->b); - - btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); } -static inline void btree_iter_node_set(struct btree_iter *iter, - struct btree *b) +static inline void btree_path_level_init(struct btree_trans *trans, + struct btree_path *path, + struct btree *b) { - BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); + BUG_ON(path->cached); - btree_iter_verify_new_node(iter, b); + btree_path_verify_new_node(trans, path, b); - EBUG_ON(!btree_iter_pos_in_node(iter, b)); + EBUG_ON(!btree_path_pos_in_node(path, b)); EBUG_ON(b->c.lock.state.seq & 1); - iter->l[b->c.level].lock_seq = b->c.lock.state.seq; - iter->l[b->c.level].b = b; - __btree_iter_init(iter, b->c.level); + path->l[b->c.level].lock_seq = b->c.lock.state.seq; + path->l[b->c.level].b = b; + __btree_path_level_init(path, b->c.level); } +/* Btree path: fixups after btree node updates: */ + /* * A btree node is being replaced - update the iterator to point to the new * node: */ -void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b) +void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) { - enum btree_node_locked_type t; - struct btree_iter *linked; + struct btree_path *path; - trans_for_each_iter(iter->trans, linked) - if (btree_iter_type(linked) != BTREE_ITER_CACHED && - btree_iter_pos_in_node(linked, b)) { - /* - * bch2_btree_iter_node_drop() has already been called - - * the old node we're replacing has already been - * unlocked and the pointer invalidated - */ - BUG_ON(btree_node_locked(linked, b->c.level)); + trans_for_each_path(trans, path) + if (!path->cached && + btree_path_pos_in_node(path, b)) { + enum btree_node_locked_type t = + btree_lock_want(path, b->c.level); - t = btree_lock_want(linked, b->c.level); - if (t != BTREE_NODE_UNLOCKED) { + if (path->nodes_locked && + t != BTREE_NODE_UNLOCKED) { + btree_node_unlock(path, b->c.level); six_lock_increment(&b->c.lock, t); - mark_btree_node_locked(linked, b->c.level, t); + mark_btree_node_locked(trans, path, b->c.level, t); } - btree_iter_node_set(linked, b); - } -} - -void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b) -{ - struct btree_iter *linked; - unsigned level = b->c.level; - - trans_for_each_iter(iter->trans, linked) - if (linked->l[level].b == b) { - btree_node_unlock(linked, level); - linked->l[level].b = BTREE_ITER_NO_NODE_DROP; + btree_path_level_init(trans, path, b); } } @@ -1090,14 +1052,16 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b) * A btree node has been modified in such a way as to invalidate iterators - fix * them: */ -void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b) +void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b) { - struct btree_iter *linked; + struct btree_path *path; - trans_for_each_iter_with_node(iter->trans, b, linked) - __btree_iter_init(linked, b->c.level); + trans_for_each_path_with_node(trans, b, path) + __btree_path_level_init(path, b->c.level); } +/* Btree path: traverse, set_pos: */ + static int lock_root_check_fn(struct six_lock *lock, void *p) { struct btree *b = container_of(lock, struct btree, c.lock); @@ -1106,38 +1070,38 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) return b == *rootp ? 0 : -1; } -static inline int btree_iter_lock_root(struct btree_trans *trans, - struct btree_iter *iter, +static inline int btree_path_lock_root(struct btree_trans *trans, + struct btree_path *path, unsigned depth_want, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; + struct btree *b, **rootp = &c->btree_roots[path->btree_id].b; enum six_lock_type lock_type; unsigned i; - EBUG_ON(iter->nodes_locked); + EBUG_ON(path->nodes_locked); while (1) { b = READ_ONCE(*rootp); - iter->level = READ_ONCE(b->c.level); + path->level = READ_ONCE(b->c.level); - if (unlikely(iter->level < depth_want)) { + if (unlikely(path->level < depth_want)) { /* * the root is at a lower depth than the depth we want: * got to the end of the btree, or we're walking nodes * greater than some depth and there are no nodes >= * that depth */ - iter->level = depth_want; - for (i = iter->level; i < BTREE_MAX_DEPTH; i++) - iter->l[i].b = NULL; + path->level = depth_want; + for (i = path->level; i < BTREE_MAX_DEPTH; i++) + path->l[i].b = NULL; return 1; } - lock_type = __btree_lock_want(iter, iter->level); - if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level, - iter, lock_type, + lock_type = __btree_lock_want(path, path->level); + if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX, + path->level, lock_type, lock_root_check_fn, rootp, trace_ip))) { if (trans->restarted) @@ -1146,16 +1110,16 @@ static inline int btree_iter_lock_root(struct btree_trans *trans, } if (likely(b == READ_ONCE(*rootp) && - b->c.level == iter->level && + b->c.level == path->level && !race_fault())) { - for (i = 0; i < iter->level; i++) - iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; - iter->l[iter->level].b = b; - for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++) - iter->l[i].b = NULL; - - mark_btree_node_locked(iter, iter->level, lock_type); - btree_iter_node_set(iter, b); + for (i = 0; i < path->level; i++) + path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; + path->l[path->level].b = b; + for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) + path->l[i].b = NULL; + + mark_btree_node_locked(trans, path, path->level, lock_type); + btree_path_level_init(trans, path, b); return 0; } @@ -1164,23 +1128,23 @@ static inline int btree_iter_lock_root(struct btree_trans *trans, } noinline -static int btree_iter_prefetch(struct btree_iter *iter) +static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path) { - struct bch_fs *c = iter->trans->c; - struct btree_iter_level *l = &iter->l[iter->level]; + struct bch_fs *c = trans->c; + struct btree_path_level *l = path_l(path); struct btree_node_iter node_iter = l->iter; struct bkey_packed *k; struct bkey_buf tmp; unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) - ? (iter->level > 1 ? 0 : 2) - : (iter->level > 1 ? 1 : 16); - bool was_locked = btree_node_locked(iter, iter->level); + ? (path->level > 1 ? 0 : 2) + : (path->level > 1 ? 1 : 16); + bool was_locked = btree_node_locked(path, path->level); int ret = 0; bch2_bkey_buf_init(&tmp); while (nr && !ret) { - if (!bch2_btree_node_relock(iter, iter->level)) + if (!bch2_btree_node_relock(trans, path, path->level)) break; bch2_btree_node_iter_advance(&node_iter, l->b); @@ -1189,26 +1153,27 @@ static int btree_iter_prefetch(struct btree_iter *iter) break; bch2_bkey_buf_unpack(&tmp, c, l->b, k); - ret = bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id, - iter->level - 1); + ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id, + path->level - 1); } if (!was_locked) - btree_node_unlock(iter, iter->level); + btree_node_unlock(path, path->level); bch2_bkey_buf_exit(&tmp, c); return ret; } -static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, +static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, + struct btree_path *path, unsigned plevel, struct btree *b) { - struct btree_iter_level *l = &iter->l[plevel]; - bool locked = btree_node_locked(iter, plevel); + struct btree_path_level *l = &path->l[plevel]; + bool locked = btree_node_locked(path, plevel); struct bkey_packed *k; struct bch_btree_ptr_v2 *bp; - if (!bch2_btree_node_relock(iter, plevel)) + if (!bch2_btree_node_relock(trans, path, plevel)) return; k = bch2_btree_node_iter_peek_all(&l->iter, l->b); @@ -1218,59 +1183,61 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, bp->mem_ptr = (unsigned long)b; if (!locked) - btree_node_unlock(iter, plevel); + btree_node_unlock(path, plevel); } -static __always_inline int btree_iter_down(struct btree_trans *trans, - struct btree_iter *iter, +static __always_inline int btree_path_down(struct btree_trans *trans, + struct btree_path *path, + unsigned flags, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree_iter_level *l = &iter->l[iter->level]; + struct btree_path_level *l = path_l(path); struct btree *b; - unsigned level = iter->level - 1; - enum six_lock_type lock_type = __btree_lock_want(iter, level); + unsigned level = path->level - 1; + enum six_lock_type lock_type = __btree_lock_want(path, level); struct bkey_buf tmp; int ret; - EBUG_ON(!btree_node_locked(iter, iter->level)); + EBUG_ON(!btree_node_locked(path, path->level)); bch2_bkey_buf_init(&tmp); bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip); + b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip); ret = PTR_ERR_OR_ZERO(b); if (unlikely(ret)) goto err; - mark_btree_node_locked(iter, level, lock_type); - btree_iter_node_set(iter, b); + mark_btree_node_locked(trans, path, level, lock_type); + btree_path_level_init(trans, path, b); if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 && unlikely(b != btree_node_mem_ptr(tmp.k))) - btree_node_mem_ptr_set(iter, level + 1, b); + btree_node_mem_ptr_set(trans, path, level + 1, b); - if (iter->flags & BTREE_ITER_PREFETCH) - ret = btree_iter_prefetch(iter); + if (flags & BTREE_ITER_PREFETCH) + ret = btree_path_prefetch(trans, path); - if (btree_node_read_locked(iter, level + 1)) - btree_node_unlock(iter, level + 1); - iter->level = level; + if (btree_node_read_locked(path, level + 1)) + btree_node_unlock(path, level + 1); + path->level = level; - bch2_btree_iter_verify_locks(iter); + bch2_btree_path_verify_locks(path); err: bch2_bkey_buf_exit(&tmp, c); return ret; } -static int btree_iter_traverse_one(struct btree_iter *, unsigned long); +static int btree_path_traverse_one(struct btree_trans *, struct btree_path *, + unsigned, unsigned long); -static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, +static int __btree_path_traverse_all(struct btree_trans *trans, int ret, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_path *path; int i; if (trans->in_traverse_all) @@ -1280,20 +1247,24 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, retry_all: trans->restarted = false; - trans_for_each_iter(trans, iter) - iter->should_be_locked = false; + trans_for_each_path(trans, path) + path->should_be_locked = false; + + btree_trans_verify_sorted(trans); - btree_trans_sort_iters(trans); +#ifdef CONFIG_BCACHEFS_DEBUG + trans->traverse_all_idx = U8_MAX; +#endif for (i = trans->nr_sorted - 2; i >= 0; --i) { - struct btree_iter *iter1 = trans->iters + trans->sorted[i]; - struct btree_iter *iter2 = trans->iters + trans->sorted[i + 1]; - - if (iter1->btree_id == iter2->btree_id && - iter1->locks_want < iter2->locks_want) - __bch2_btree_iter_upgrade(iter1, iter2->locks_want); - else if (!iter1->locks_want && iter2->locks_want) - __bch2_btree_iter_upgrade(iter1, 1); + struct btree_path *path1 = trans->paths + trans->sorted[i]; + struct btree_path *path2 = trans->paths + trans->sorted[i + 1]; + + if (path1->btree_id == path2->btree_id && + path1->locks_want < path2->locks_want) + __bch2_btree_path_upgrade(trans, path1, path2->locks_want); + else if (!path1->locks_want && path2->locks_want) + __bch2_btree_path_upgrade(trans, path1, 1); } bch2_trans_unlock(trans); @@ -1318,18 +1289,32 @@ retry_all: BUG_ON(ret && ret != -EINTR); /* Now, redo traversals in correct order: */ - trans_for_each_iter_inorder(trans, iter) { - EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + i = 0; + while (i < trans->nr_sorted) { + path = trans->paths + trans->sorted[i]; + + EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); +#ifdef CONFIG_BCACHEFS_DEBUG + trans->traverse_all_idx = path->idx; +#endif - ret = btree_iter_traverse_one(iter, _THIS_IP_); + ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_); if (ret) goto retry_all; - EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); + + if (path->nodes_locked) + i++; } - trans_for_each_iter(trans, iter) - BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); + /* + * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock() + * and relock(), relock() won't relock since path->should_be_locked + * isn't set yet, which is all fine + */ + trans_for_each_path(trans, path) + BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE); out: bch2_btree_cache_cannibalize_unlock(c); @@ -1339,37 +1324,50 @@ out: return ret; } -static int bch2_btree_iter_traverse_all(struct btree_trans *trans) +static int bch2_btree_path_traverse_all(struct btree_trans *trans) { - return __btree_iter_traverse_all(trans, 0, _RET_IP_); + return __btree_path_traverse_all(trans, 0, _RET_IP_); } -static inline bool btree_iter_good_node(struct btree_iter *iter, +static inline bool btree_path_good_node(struct btree_trans *trans, + struct btree_path *path, unsigned l, int check_pos) { - if (!is_btree_node(iter, l) || - !bch2_btree_node_relock(iter, l)) + if (!is_btree_node(path, l) || + !bch2_btree_node_relock(trans, path, l)) return false; - if (check_pos < 0 && btree_iter_pos_before_node(iter, iter->l[l].b)) + if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b)) return false; - if (check_pos > 0 && btree_iter_pos_after_node(iter, iter->l[l].b)) + if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b)) return false; return true; } -static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter, +static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, + struct btree_path *path, int check_pos) { - unsigned l = iter->level; + unsigned i, l = path->level; - while (btree_iter_node(iter, l) && - !btree_iter_good_node(iter, l, check_pos)) { - btree_node_unlock(iter, l); - iter->l[l].b = BTREE_ITER_NO_NODE_UP; + while (btree_path_node(path, l) && + !btree_path_good_node(trans, path, l, check_pos)) { + btree_node_unlock(path, l); + path->l[l].b = BTREE_ITER_NO_NODE_UP; l++; } + /* If we need intent locks, take them too: */ + for (i = l + 1; + i < path->locks_want && btree_path_node(path, i); + i++) + if (!bch2_btree_node_relock(trans, path, i)) + while (l <= i) { + btree_node_unlock(path, l); + path->l[l].b = BTREE_ITER_NO_NODE_UP; + l++; + } + return l; } @@ -1382,249 +1380,167 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter, * On error, caller (peek_node()/peek_key()) must return NULL; the error is * stashed in the iterator and returned from bch2_trans_exit(). */ -static int btree_iter_traverse_one(struct btree_iter *iter, +static int btree_path_traverse_one(struct btree_trans *trans, + struct btree_path *path, + unsigned flags, unsigned long trace_ip) { - struct btree_trans *trans = iter->trans; - unsigned l, depth_want = iter->level; + unsigned depth_want = path->level; int ret = 0; /* - * Ensure we obey iter->should_be_locked: if it's set, we can't unlock - * and re-traverse the iterator without a transaction restart: + * Ensure we obey path->should_be_locked: if it's set, we can't unlock + * and re-traverse the path without a transaction restart: */ - if (iter->should_be_locked) { - ret = bch2_btree_iter_relock(iter, trace_ip) ? 0 : -EINTR; + if (path->should_be_locked) { + ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR; goto out; } - if (btree_iter_type(iter) == BTREE_ITER_CACHED) { - ret = bch2_btree_iter_traverse_cached(iter); + if (path->cached) { + ret = bch2_btree_path_traverse_cached(trans, path, flags); goto out; } - if (unlikely(iter->level >= BTREE_MAX_DEPTH)) + if (unlikely(path->level >= BTREE_MAX_DEPTH)) goto out; - iter->level = btree_iter_up_until_good_node(iter, 0); - - /* If we need intent locks, take them too: */ - for (l = iter->level + 1; - l < iter->locks_want && btree_iter_node(iter, l); - l++) - if (!bch2_btree_node_relock(iter, l)) - while (iter->level <= l) { - btree_node_unlock(iter, iter->level); - iter->l[iter->level].b = BTREE_ITER_NO_NODE_UP; - iter->level++; - } + path->level = btree_path_up_until_good_node(trans, path, 0); /* - * Note: iter->nodes[iter->level] may be temporarily NULL here - that + * Note: path->nodes[path->level] may be temporarily NULL here - that * would indicate to other code that we got to the end of the btree, * here it indicates that relocking the root failed - it's critical that - * btree_iter_lock_root() comes next and that it can't fail + * btree_path_lock_root() comes next and that it can't fail */ - while (iter->level > depth_want) { - ret = btree_iter_node(iter, iter->level) - ? btree_iter_down(trans, iter, trace_ip) - : btree_iter_lock_root(trans, iter, depth_want, trace_ip); + while (path->level > depth_want) { + ret = btree_path_node(path, path->level) + ? btree_path_down(trans, path, flags, trace_ip) + : btree_path_lock_root(trans, path, depth_want, trace_ip); if (unlikely(ret)) { if (ret == 1) { /* - * Got to the end of the btree (in - * BTREE_ITER_NODES mode) + * No nodes at this level - got to the end of + * the btree: */ ret = 0; goto out; } - __bch2_btree_iter_unlock(iter); - iter->level = depth_want; + __bch2_btree_path_unlock(path); + path->level = depth_want; - if (ret == -EIO) { - iter->flags |= BTREE_ITER_ERROR; - iter->l[iter->level].b = + if (ret == -EIO) + path->l[path->level].b = BTREE_ITER_NO_NODE_ERROR; - } else { - iter->l[iter->level].b = + else + path->l[path->level].b = BTREE_ITER_NO_NODE_DOWN; - } goto out; } } - iter->uptodate = BTREE_ITER_NEED_PEEK; + path->uptodate = BTREE_ITER_UPTODATE; out: BUG_ON((ret == -EINTR) != !!trans->restarted); - trace_iter_traverse(trans->ip, trace_ip, - btree_iter_type(iter) == BTREE_ITER_CACHED, - iter->btree_id, &iter->real_pos, ret); - bch2_btree_iter_verify(iter); + bch2_btree_path_verify(trans, path); return ret; } -static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) +static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long); + +int __must_check bch2_btree_path_traverse(struct btree_trans *trans, + struct btree_path *path, unsigned flags) { - struct btree_trans *trans = iter->trans; int ret; + if (path->uptodate < BTREE_ITER_NEED_RELOCK) + return 0; + ret = bch2_trans_cond_resched(trans) ?: - btree_iter_traverse_one(iter, _RET_IP_); - if (unlikely(ret) && hweight64(trans->iters_linked) == 1) { - ret = __btree_iter_traverse_all(trans, ret, _RET_IP_); + btree_path_traverse_one(trans, path, flags, _RET_IP_); + if (unlikely(ret) && hweight64(trans->paths_allocated) == 1) { + ret = __btree_path_traverse_all(trans, ret, _RET_IP_); BUG_ON(ret == -EINTR); } return ret; } -/* - * Note: - * bch2_btree_iter_traverse() is for external users, btree_iter_traverse() is - * for internal btree iterator users - * - * bch2_btree_iter_traverse sets iter->real_pos to iter->pos, - * btree_iter_traverse() does not: - */ -static inline int __must_check -btree_iter_traverse(struct btree_iter *iter) -{ - return iter->uptodate >= BTREE_ITER_NEED_RELOCK - ? __bch2_btree_iter_traverse(iter) - : 0; -} - -int __must_check -bch2_btree_iter_traverse(struct btree_iter *iter) +static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, + struct btree_path *src) { - int ret; + unsigned i; - btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); + memcpy(&dst->pos, &src->pos, + sizeof(struct btree_path) - offsetof(struct btree_path, pos)); - ret = btree_iter_traverse(iter); - if (ret) - return ret; + for (i = 0; i < BTREE_MAX_DEPTH; i++) + if (btree_node_locked(dst, i)) + six_lock_increment(&dst->l[i].b->c.lock, + __btree_lock_want(dst, i)); - iter->should_be_locked = true; - return 0; + btree_path_check_sort(trans, dst, 0); } -/* Iterate across nodes (leaf and interior nodes) */ - -struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) +static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, + bool intent) { - struct btree *b; - int ret; - - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES); - bch2_btree_iter_verify(iter); - - ret = btree_iter_traverse(iter); - if (ret) - return NULL; - - b = btree_iter_node(iter, iter->level); - if (!b) - return NULL; + struct btree_path *new = btree_path_alloc(trans, src); - BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0); - - iter->pos = iter->real_pos = b->key.k.p; - - bch2_btree_iter_verify(iter); - iter->should_be_locked = true; - - return b; + btree_path_copy(trans, new, src); + __btree_path_get(new, intent); + return new; } -struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) +inline struct btree_path * __must_check +bch2_btree_path_make_mut(struct btree_trans *trans, + struct btree_path *path, bool intent) { - struct btree *b; - int ret; - - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES); - bch2_btree_iter_verify(iter); - - /* already got to end? */ - if (!btree_iter_node(iter, iter->level)) - return NULL; - - bch2_trans_cond_resched(iter->trans); - - btree_node_unlock(iter, iter->level); - iter->l[iter->level].b = BTREE_ITER_NO_NODE_UP; - iter->level++; - - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - ret = btree_iter_traverse(iter); - if (ret) - return NULL; - - /* got to end? */ - b = btree_iter_node(iter, iter->level); - if (!b) - return NULL; - - if (bpos_cmp(iter->pos, b->key.k.p) < 0) { - /* - * Haven't gotten to the end of the parent node: go back down to - * the next child node - */ - btree_iter_set_search_pos(iter, bpos_successor(iter->pos)); - - /* Unlock to avoid screwing up our lock invariants: */ - btree_node_unlock(iter, iter->level); - - iter->level = iter->min_depth; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - bch2_btree_iter_verify(iter); - - ret = btree_iter_traverse(iter); - if (ret) - return NULL; - - b = iter->l[iter->level].b; + if (path->ref > 1 || path->preserve) { + __btree_path_put(path, intent); + path = btree_path_clone(trans, path, intent); + path->preserve = false; +#ifdef CONFIG_BCACHEFS_DEBUG + path->ip_allocated = _RET_IP_; +#endif + btree_trans_verify_sorted(trans); } - iter->pos = iter->real_pos = b->key.k.p; - - bch2_btree_iter_verify(iter); - iter->should_be_locked = true; - - return b; + return path; } -/* Iterate across keys (in leaf nodes only) */ - -static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos) +static struct btree_path * __must_check +btree_path_set_pos(struct btree_trans *trans, + struct btree_path *path, struct bpos new_pos, + bool intent) { -#ifdef CONFIG_BCACHEFS_DEBUG - struct bpos old_pos = iter->real_pos; -#endif - int cmp = bpos_cmp(new_pos, iter->real_pos); - unsigned l = iter->level; + int cmp = bpos_cmp(new_pos, path->pos); + unsigned l = path->level; - EBUG_ON(iter->trans->restarted); + EBUG_ON(trans->restarted); + EBUG_ON(!path->ref); if (!cmp) - goto out; + return path; - iter->real_pos = new_pos; - iter->should_be_locked = false; + path = bch2_btree_path_make_mut(trans, path, intent); - btree_iter_check_sort(iter->trans, iter); + path->pos = new_pos; + path->should_be_locked = false; - if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) { - btree_node_unlock(iter, 0); - iter->l[0].b = BTREE_ITER_NO_NODE_CACHED; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - return; + btree_path_check_sort(trans, path, cmp); + + if (unlikely(path->cached)) { + btree_node_unlock(path, 0); + path->l[0].b = BTREE_ITER_NO_NODE_CACHED; + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + goto out; } - l = btree_iter_up_until_good_node(iter, cmp); + l = btree_path_up_until_good_node(trans, path, cmp); - if (btree_iter_node(iter, l)) { + if (btree_path_node(path, l)) { /* * We might have to skip over many keys, or just a few: try * advancing the node iterator, and if we have to skip over too @@ -1632,143 +1548,457 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p * is expensive). */ if (cmp < 0 || - !btree_iter_advance_to_pos(iter, &iter->l[l], 8)) - __btree_iter_init(iter, l); + !btree_path_advance_to_pos(path, &path->l[l], 8)) + __btree_path_level_init(path, l); + } - /* Don't leave it locked if we're not supposed to: */ - if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED) - btree_node_unlock(iter, l); + if (l != path->level) { + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + __bch2_btree_path_unlock(path); } out: - if (l != iter->level) - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - else - btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); - - bch2_btree_iter_verify(iter); -#ifdef CONFIG_BCACHEFS_DEBUG - trace_iter_set_search_pos(iter->trans->ip, _RET_IP_, - iter->btree_id, - &old_pos, &new_pos, l); -#endif + bch2_btree_path_verify(trans, path); + return path; } -inline bool bch2_btree_iter_advance(struct btree_iter *iter) +/* Btree path: main interface: */ + +static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) { - struct bpos pos = iter->k.p; - bool ret = bpos_cmp(pos, SPOS_MAX) != 0; + struct btree_path *next; - if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - pos = bkey_successor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); - return ret; + next = prev_btree_path(trans, path); + if (next && !btree_path_cmp(next, path)) + return next; + + next = next_btree_path(trans, path); + if (next && !btree_path_cmp(next, path)) + return next; + + return NULL; } -inline bool bch2_btree_iter_rewind(struct btree_iter *iter) +static bool have_node_at_pos(struct btree_trans *trans, struct btree_path *path) { - struct bpos pos = bkey_start_pos(&iter->k); - bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS - ? bpos_cmp(pos, POS_MIN) - : bkey_cmp(pos, POS_MIN)) != 0; + struct btree_path *next; - if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - pos = bkey_predecessor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); - return ret; + next = prev_btree_path(trans, path); + if (next && path_l(next)->b == path_l(path)->b) + return true; + + next = next_btree_path(trans, path); + if (next && path_l(next)->b == path_l(path)->b) + return true; + + return false; +} + +static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path) +{ + __bch2_btree_path_unlock(path); + btree_path_list_remove(trans, path); + trans->paths_allocated &= ~(1ULL << path->idx); } -static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) +void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) { - struct bpos next_pos = iter->l[0].b->key.k.p; - bool ret = bpos_cmp(next_pos, SPOS_MAX) != 0; + struct btree_path *dup; + + EBUG_ON(trans->paths + path->idx != path); + EBUG_ON(!path->ref); + + if (!__btree_path_put(path, intent)) + return; /* - * Typically, we don't want to modify iter->pos here, since that - * indicates where we searched from - unless we got to the end of the - * btree, in that case we want iter->pos to reflect that: + * Perhaps instead we should check for duplicate paths in traverse_all: */ - if (ret) - btree_iter_set_search_pos(iter, bpos_successor(next_pos)); - else - bch2_btree_iter_set_pos(iter, SPOS_MAX); + if (path->preserve && + (dup = have_path_at_pos(trans, path))) { + dup->preserve = true; + path->preserve = false; + } - return ret; + if (!path->preserve && + have_node_at_pos(trans, path)) + __bch2_path_free(trans, path); } -static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter) +noinline __cold +void bch2_dump_trans_paths_updates(struct btree_trans *trans) { - struct bpos next_pos = iter->l[0].b->data->min_key; - bool ret = bpos_cmp(next_pos, POS_MIN) != 0; + struct btree_path *path; + struct btree_insert_entry *i; + unsigned idx; + char buf[300]; - if (ret) - btree_iter_set_search_pos(iter, bpos_predecessor(next_pos)); - else - bch2_btree_iter_set_pos(iter, POS_MIN); + btree_trans_verify_sorted(trans); - return ret; + trans_for_each_path_inorder(trans, path, idx) + printk(KERN_ERR "path: idx %u ref %u:%u%s btree %s pos %s %pS\n", + path->idx, path->ref, path->intent_ref, + path->preserve ? " preserve" : "", + bch2_btree_ids[path->btree_id], + (bch2_bpos_to_text(&PBUF(buf), path->pos), buf), +#ifdef CONFIG_BCACHEFS_DEBUG + (void *) path->ip_allocated +#else + NULL +#endif + ); + + trans_for_each_update(trans, i) + printk(KERN_ERR "update: btree %s %s %pS\n", + bch2_btree_ids[i->btree_id], + (bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k)), buf), + (void *) i->ip_allocated); } -static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter, - struct bpos pos) +static struct btree_path *btree_path_alloc(struct btree_trans *trans, + struct btree_path *pos) { - struct btree_insert_entry *i; + struct btree_path *path; + unsigned idx; - if (!(iter->flags & BTREE_ITER_WITH_UPDATES)) - return NULL; + if (unlikely(trans->paths_allocated == + ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) { + bch2_dump_trans_paths_updates(trans); + panic("trans path oveflow\n"); + } - trans_for_each_update(iter->trans, i) - if ((cmp_int(iter->btree_id, i->iter->btree_id) ?: - bkey_cmp(pos, i->k->k.p)) <= 0) { - if (iter->btree_id == i->iter->btree_id) - return i->k; - break; - } + idx = __ffs64(~trans->paths_allocated); + trans->paths_allocated |= 1ULL << idx; - return NULL; + path = &trans->paths[idx]; + + path->idx = idx; + path->ref = 0; + path->intent_ref = 0; + path->nodes_locked = 0; + path->nodes_intent_locked = 0; + + btree_path_list_add(trans, pos, path); + return path; } -/** - * bch2_btree_iter_peek: returns first key greater than or equal to iterator's - * current position - */ -struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +struct btree_path *bch2_path_get(struct btree_trans *trans, bool cached, + enum btree_id btree_id, struct bpos pos, + unsigned locks_want, unsigned level, + bool intent) { - struct bpos search_key = btree_iter_search_key(iter); - struct bkey_i *next_update; - struct bkey_s_c k; - int ret; - - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); - bch2_btree_iter_verify(iter); - bch2_btree_iter_verify_entry_exit(iter); -start: - next_update = btree_trans_peek_updates(iter, search_key); - btree_iter_set_search_pos(iter, search_key); + struct btree_path *path, *path_pos = NULL; + int i; - while (1) { - ret = btree_iter_traverse(iter); - if (unlikely(ret)) - return bkey_s_c_err(ret); + BUG_ON(trans->restarted); - k = btree_iter_level_peek(iter, &iter->l[0]); + trans_for_each_path_inorder(trans, path, i) { + if (__btree_path_cmp(path, + btree_id, + cached, + pos, + level) > 0) + break; - if (next_update && - bpos_cmp(next_update->k.p, iter->real_pos) <= 0) { - iter->k = next_update->k; - k = bkey_i_to_s_c(next_update); - } + path_pos = path; + } - if (likely(k.k)) { - if (bkey_deleted(k.k)) { - search_key = bkey_successor(iter, k.k->p); - goto start; - } + if (path_pos && + path_pos->cached == cached && + path_pos->btree_id == btree_id && + path_pos->level == level) { + __btree_path_get(path_pos, intent); + path = btree_path_set_pos(trans, path_pos, pos, intent); + path->preserve = true; + } else { + path = btree_path_alloc(trans, path_pos); + path_pos = NULL; + + __btree_path_get(path, intent); + path->pos = pos; + path->btree_id = btree_id; + path->cached = cached; + path->preserve = true; + path->uptodate = BTREE_ITER_NEED_TRAVERSE; + path->should_be_locked = false; + path->level = level; + path->locks_want = locks_want; + path->nodes_locked = 0; + path->nodes_intent_locked = 0; + for (i = 0; i < ARRAY_SIZE(path->l); i++) + path->l[i].b = BTREE_ITER_NO_NODE_INIT; +#ifdef CONFIG_BCACHEFS_DEBUG + path->ip_allocated = _RET_IP_; +#endif + btree_trans_verify_sorted(trans); + } - break; + if (path->intent_ref) + locks_want = max(locks_want, level + 1); + + /* + * If the path has locks_want greater than requested, we don't downgrade + * it here - on transaction restart because btree node split needs to + * upgrade locks, we might be putting/getting the iterator again. + * Downgrading iterators only happens via bch2_trans_downgrade(), after + * a successful transaction commit. + */ + + locks_want = min(locks_want, BTREE_MAX_DEPTH); + if (locks_want > path->locks_want) { + path->locks_want = locks_want; + btree_path_get_locks(trans, path, true, _THIS_IP_); + } + + return path; +} + +inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u) +{ + + struct bkey_s_c k; + + BUG_ON(path->uptodate != BTREE_ITER_UPTODATE); + + if (!path->cached) { + struct btree_path_level *l = path_l(path); + struct bkey_packed *_k = + bch2_btree_node_iter_peek_all(&l->iter, l->b); + + k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; + + EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0); + + if (!k.k || bpos_cmp(path->pos, k.k->p)) + goto hole; + } else { + struct bkey_cached *ck = (void *) path->l[0].b; + + EBUG_ON(path->btree_id != ck->key.btree_id || + bkey_cmp(path->pos, ck->key.pos)); + + /* BTREE_ITER_CACHED_NOFILL? */ + if (unlikely(!ck->valid)) + goto hole; + + k = bkey_i_to_s_c(ck->k); + } + + return k; +hole: + bkey_init(u); + u->p = path->pos; + return (struct bkey_s_c) { u, NULL }; +} + +/* Btree iterators: */ + +int __must_check +__bch2_btree_iter_traverse(struct btree_iter *iter) +{ + return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); +} + +int __must_check +bch2_btree_iter_traverse(struct btree_iter *iter) +{ + int ret; + + iter->path = btree_path_set_pos(iter->trans, iter->path, + btree_iter_search_key(iter), + iter->flags & BTREE_ITER_INTENT); + + ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + if (ret) + return ret; + + iter->path->should_be_locked = true; + return 0; +} + +/* Iterate across nodes (leaf and interior nodes) */ + +struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) +{ + struct btree *b = NULL; + int ret; + + EBUG_ON(iter->path->cached); + bch2_btree_iter_verify(iter); + + ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + if (ret) + goto out; + + b = btree_path_node(iter->path, iter->path->level); + if (!b) + goto out; + + BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0); + + bkey_init(&iter->k); + iter->k.p = iter->pos = b->key.k.p; + iter->path->should_be_locked = true; +out: + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); + + return b; +} + +struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) +{ + struct btree_trans *trans = iter->trans; + struct btree_path *path = iter->path; + struct btree *b = NULL; + int ret; + + EBUG_ON(iter->path->cached); + bch2_btree_iter_verify(iter); + + /* already got to end? */ + if (!btree_path_node(path, path->level)) + goto out; + + bch2_trans_cond_resched(trans); + + btree_node_unlock(path, path->level); + path->l[path->level].b = BTREE_ITER_NO_NODE_UP; + path->level++; + + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + ret = bch2_btree_path_traverse(trans, path, iter->flags); + if (ret) + goto out; + + /* got to end? */ + b = btree_path_node(path, path->level); + if (!b) + goto out; + + if (bpos_cmp(iter->pos, b->key.k.p) < 0) { + /* + * Haven't gotten to the end of the parent node: go back down to + * the next child node + */ + path = iter->path = + btree_path_set_pos(trans, path, bpos_successor(iter->pos), + iter->flags & BTREE_ITER_INTENT); + + /* Unlock to avoid screwing up our lock invariants: */ + btree_node_unlock(path, path->level); + + path->level = iter->min_depth; + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + bch2_btree_iter_verify(iter); + + ret = bch2_btree_path_traverse(trans, path, iter->flags); + if (ret) { + b = NULL; + goto out; } - if (!btree_iter_set_pos_to_next_leaf(iter)) - return bkey_s_c_null; + b = path->l[path->level].b; + } + + bkey_init(&iter->k); + iter->k.p = iter->pos = b->key.k.p; + iter->path->should_be_locked = true; +out: + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); + + return b; +} + +/* Iterate across keys (in leaf nodes only) */ + +inline bool bch2_btree_iter_advance(struct btree_iter *iter) +{ + struct bpos pos = iter->k.p; + bool ret = bpos_cmp(pos, SPOS_MAX) != 0; + + if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) + pos = bkey_successor(iter, pos); + bch2_btree_iter_set_pos(iter, pos); + return ret; +} + +inline bool bch2_btree_iter_rewind(struct btree_iter *iter) +{ + struct bpos pos = bkey_start_pos(&iter->k); + bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS + ? bpos_cmp(pos, POS_MIN) + : bkey_cmp(pos, POS_MIN)) != 0; + + if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) + pos = bkey_predecessor(iter, pos); + bch2_btree_iter_set_pos(iter, pos); + return ret; +} + +/** + * bch2_btree_iter_peek: returns first key greater than or equal to iterator's + * current position + */ +struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +{ + struct btree_trans *trans = iter->trans; + struct bpos search_key = btree_iter_search_key(iter); + struct bkey_i *next_update; + struct bkey_s_c k; + int ret, cmp; + + EBUG_ON(iter->path->cached || iter->path->level); + bch2_btree_iter_verify(iter); + bch2_btree_iter_verify_entry_exit(iter); + + while (1) { + iter->path = btree_path_set_pos(trans, iter->path, search_key, + iter->flags & BTREE_ITER_INTENT); + + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (unlikely(ret)) { + /* ensure that iter->k is consistent with iter->pos: */ + bch2_btree_iter_set_pos(iter, iter->pos); + k = bkey_s_c_err(ret); + goto out; + } + + next_update = iter->flags & BTREE_ITER_WITH_UPDATES + ? btree_trans_peek_updates(trans, iter->btree_id, search_key) + : NULL; + k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); + + /* * In the btree, deleted keys sort before non deleted: */ + if (k.k && bkey_deleted(k.k) && + (!next_update || + bpos_cmp(k.k->p, next_update->k.p) <= 0)) { + search_key = k.k->p; + continue; + } + + if (next_update && + bpos_cmp(next_update->k.p, + k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) { + iter->k = next_update->k; + k = bkey_i_to_s_c(next_update); + } + + if (likely(k.k)) { + if (likely(!bkey_deleted(k.k))) + break; + + /* Advance to next key: */ + search_key = bkey_successor(iter, k.k->p); + } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) { + /* Advance to next leaf node: */ + search_key = bpos_successor(iter->path->l[0].b->key.k.p); + } else { + /* End of btree: */ + bch2_btree_iter_set_pos(iter, SPOS_MAX); + k = bkey_s_c_null; + goto out; + } } /* @@ -1780,9 +2010,18 @@ start: else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) iter->pos = bkey_start_pos(k.k); + cmp = bpos_cmp(k.k->p, iter->path->pos); + if (cmp) { + iter->path = bch2_btree_path_make_mut(trans, iter->path, + iter->flags & BTREE_ITER_INTENT); + iter->path->pos = k.k->p; + btree_path_check_sort(trans, iter->path, cmp); + } +out: + iter->path->should_be_locked = true; + bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); - iter->should_be_locked = true; return k; } @@ -1804,37 +2043,49 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) */ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) { - struct btree_iter_level *l = &iter->l[0]; + struct btree_trans *trans = iter->trans; + struct bpos search_key = iter->pos; struct bkey_s_c k; int ret; - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); + EBUG_ON(iter->path->cached || iter->path->level); EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); - btree_iter_set_search_pos(iter, iter->pos); - while (1) { - ret = btree_iter_traverse(iter); + iter->path = btree_path_set_pos(trans, iter->path, search_key, + iter->flags & BTREE_ITER_INTENT); + + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { + /* ensure that iter->k is consistent with iter->pos: */ + bch2_btree_iter_set_pos(iter, iter->pos); k = bkey_s_c_err(ret); - goto no_key; + goto out; } - k = btree_iter_level_peek(iter, l); + k = btree_path_level_peek(trans, iter->path, + &iter->path->l[0], &iter->k); if (!k.k || ((iter->flags & BTREE_ITER_IS_EXTENTS) ? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0 : bkey_cmp(k.k->p, iter->pos) > 0)) - k = btree_iter_level_prev(iter, l); + k = btree_path_level_prev(trans->c, iter->path, + &iter->path->l[0], &iter->k); - if (likely(k.k)) - break; + btree_path_check_sort(trans, iter->path, 0); - if (!btree_iter_set_pos_to_prev_leaf(iter)) { + if (likely(k.k)) { + break; + } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) { + /* Advance to previous leaf node: */ + search_key = bpos_predecessor(iter->path->l[0].b->data->min_key); + } else { + /* Start of btree: */ + bch2_btree_iter_set_pos(iter, POS_MIN); k = bkey_s_c_null; - goto no_key; + goto out; } } @@ -1844,19 +2095,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) if (bkey_cmp(k.k->p, iter->pos) < 0) iter->pos = k.k->p; out: + iter->path->should_be_locked = true; + bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); - iter->should_be_locked = true; + return k; -no_key: - /* - * btree_iter_level_peek() may have set iter->k to a key we didn't want, and - * then we errored going to the previous leaf - make sure it's - * consistent with iter->pos: - */ - bkey_init(&iter->k); - iter->k.p = iter->pos; - goto out; } /** @@ -1873,12 +2117,12 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) { + struct btree_trans *trans = iter->trans; struct bpos search_key; struct bkey_s_c k; int ret; - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS && - btree_iter_type(iter) != BTREE_ITER_CACHED); + EBUG_ON(iter->path->level); bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); @@ -1892,50 +2136,41 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } search_key = btree_iter_search_key(iter); - btree_iter_set_search_pos(iter, search_key); + iter->path = btree_path_set_pos(trans, iter->path, search_key, + iter->flags & BTREE_ITER_INTENT); - ret = btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) return bkey_s_c_err(ret); - if (btree_iter_type(iter) == BTREE_ITER_CACHED || - !(iter->flags & BTREE_ITER_IS_EXTENTS)) { + if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) { struct bkey_i *next_update; - struct bkey_cached *ck; - switch (btree_iter_type(iter)) { - case BTREE_ITER_KEYS: - k = btree_iter_level_peek_all(iter, &iter->l[0]); - EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, iter->pos) == 0); - break; - case BTREE_ITER_CACHED: - ck = (void *) iter->l[0].b; - EBUG_ON(iter->btree_id != ck->key.btree_id || - bkey_cmp(iter->pos, ck->key.pos)); - BUG_ON(!ck->valid); - - k = bkey_i_to_s_c(ck->k); - break; - case BTREE_ITER_NODES: - BUG(); - } + next_update = iter->flags & BTREE_ITER_WITH_UPDATES + ? btree_trans_peek_updates(trans, iter->btree_id, search_key) + : NULL; - next_update = btree_trans_peek_updates(iter, search_key); if (next_update && - (!k.k || bpos_cmp(next_update->k.p, k.k->p) <= 0)) { + !bpos_cmp(next_update->k.p, iter->pos)) { iter->k = next_update->k; k = bkey_i_to_s_c(next_update); + } else { + k = bch2_btree_path_peek_slot(iter->path, &iter->k); } } else { - if ((iter->flags & BTREE_ITER_INTENT)) { - struct btree_iter *child = - btree_iter_child_alloc(iter, _THIS_IP_); + struct bpos next; + + if (iter->flags & BTREE_ITER_INTENT) { + struct btree_iter iter2; - btree_iter_copy(child, iter); - k = bch2_btree_iter_peek(child); + bch2_trans_copy_iter(&iter2, iter); + k = bch2_btree_iter_peek(&iter2); - if (k.k && !bkey_err(k)) - iter->k = child->k; + if (k.k && !bkey_err(k)) { + iter->k = iter2.k; + k.k = &iter->k; + } + bch2_trans_iter_exit(trans, &iter2); } else { struct bpos pos = iter->pos; @@ -1945,19 +2180,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (unlikely(bkey_err(k))) return k; - } - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) { - if (!k.k || - ((iter->flags & BTREE_ITER_ALL_SNAPSHOTS) - ? bpos_cmp(iter->pos, k.k->p) - : bkey_cmp(iter->pos, k.k->p))) { - bkey_init(&iter->k); - iter->k.p = iter->pos; - k = (struct bkey_s_c) { &iter->k, NULL }; - } - } else { - struct bpos next = k.k ? bkey_start_pos(k.k) : POS_MAX; + next = k.k ? bkey_start_pos(k.k) : POS_MAX; if (bkey_cmp(iter->pos, next) < 0) { bkey_init(&iter->k); @@ -1974,9 +2198,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } } + iter->path->should_be_locked = true; + bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); - iter->should_be_locked = true; return k; } @@ -1997,35 +2222,14 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) return bch2_btree_iter_peek_slot(iter); } -static inline void bch2_btree_iter_init(struct btree_trans *trans, - struct btree_iter *iter, enum btree_id btree_id) -{ - struct bch_fs *c = trans->c; - unsigned i; - - iter->trans = trans; - iter->uptodate = BTREE_ITER_NEED_TRAVERSE; - iter->btree_id = btree_id; - iter->real_pos = POS_MIN; - iter->level = 0; - iter->min_depth = 0; - iter->locks_want = 0; - iter->nodes_locked = 0; - iter->nodes_intent_locked = 0; - for (i = 0; i < ARRAY_SIZE(iter->l); i++) - iter->l[i].b = BTREE_ITER_NO_NODE_INIT; - - prefetch(c->btree_roots[btree_id].b); -} - /* new transactional stuff: */ -static inline void btree_iter_verify_sorted_ref(struct btree_trans *trans, - struct btree_iter *iter) +static inline void btree_path_verify_sorted_ref(struct btree_trans *trans, + struct btree_path *path) { - EBUG_ON(iter->sorted_idx >= trans->nr_sorted); - EBUG_ON(trans->sorted[iter->sorted_idx] != iter->idx); - EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + EBUG_ON(path->sorted_idx >= trans->nr_sorted); + EBUG_ON(trans->sorted[path->sorted_idx] != path->idx); + EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); } static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) @@ -2034,432 +2238,180 @@ static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) unsigned i; for (i = 0; i < trans->nr_sorted; i++) - btree_iter_verify_sorted_ref(trans, trans->iters + trans->sorted[i]); + btree_path_verify_sorted_ref(trans, trans->paths + trans->sorted[i]); #endif } -static inline void btree_trans_verify_sorted(struct btree_trans *trans) +static void btree_trans_verify_sorted(struct btree_trans *trans) { #ifdef CONFIG_BCACHEFS_DEBUG - struct btree_iter *iter, *prev = NULL; + struct btree_path *path, *prev = NULL; + unsigned i; - trans_for_each_iter_inorder(trans, iter) - BUG_ON(prev && btree_iter_cmp(prev, iter) > 0); + trans_for_each_path_inorder(trans, path, i) { + BUG_ON(prev && btree_path_cmp(prev, path) > 0); + prev = path; + } #endif } -static inline void btree_iter_swap(struct btree_trans *trans, - struct btree_iter *l, struct btree_iter *r) +static inline void btree_path_swap(struct btree_trans *trans, + struct btree_path *l, struct btree_path *r) { swap(l->sorted_idx, r->sorted_idx); swap(trans->sorted[l->sorted_idx], trans->sorted[r->sorted_idx]); - btree_iter_verify_sorted_ref(trans, l); - btree_iter_verify_sorted_ref(trans, r); + btree_path_verify_sorted_ref(trans, l); + btree_path_verify_sorted_ref(trans, r); } -static void btree_trans_sort_iters(struct btree_trans *trans) +static void btree_path_check_sort(struct btree_trans *trans, struct btree_path *path, + int cmp) { - bool swapped = false; - int i, l = 0, r = trans->nr_sorted; - - while (1) { - for (i = l; i + 1 < r; i++) { - if (btree_iter_cmp(trans->iters + trans->sorted[i], - trans->iters + trans->sorted[i + 1]) > 0) { - swap(trans->sorted[i], trans->sorted[i + 1]); - trans->iters[trans->sorted[i]].sorted_idx = i; - trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1; - swapped = true; - } - } + struct btree_path *n; - if (!swapped) - break; + if (cmp <= 0) { + n = prev_btree_path(trans, path); + if (n && btree_path_cmp(n, path) > 0) { + do { + btree_path_swap(trans, n, path); + n = prev_btree_path(trans, path); + } while (n && btree_path_cmp(n, path) > 0); - r--; - swapped = false; - - for (i = r - 2; i >= l; --i) { - if (btree_iter_cmp(trans->iters + trans->sorted[i], - trans->iters + trans->sorted[i + 1]) > 0) { - swap(trans->sorted[i], - trans->sorted[i + 1]); - trans->iters[trans->sorted[i]].sorted_idx = i; - trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1; - swapped = true; - } + goto out; } - - if (!swapped) - break; - - l++; - swapped = false; - } - - btree_trans_verify_sorted_refs(trans); - btree_trans_verify_sorted(trans); -} - -static void btree_iter_check_sort(struct btree_trans *trans, struct btree_iter *iter) -{ - struct btree_iter *n; - - EBUG_ON(iter->sorted_idx == U8_MAX); - - n = next_btree_iter(trans, iter); - if (n && btree_iter_cmp(iter, n) > 0) { - do { - btree_iter_swap(trans, iter, n); - n = next_btree_iter(trans, iter); - } while (n && btree_iter_cmp(iter, n) > 0); - - return; } - n = prev_btree_iter(trans, iter); - if (n && btree_iter_cmp(n, iter) > 0) { - do { - btree_iter_swap(trans, n, iter); - n = prev_btree_iter(trans, iter); - } while (n && btree_iter_cmp(n, iter) > 0); + if (cmp >= 0) { + n = next_btree_path(trans, path); + if (n && btree_path_cmp(path, n) > 0) { + do { + btree_path_swap(trans, path, n); + n = next_btree_path(trans, path); + } while (n && btree_path_cmp(path, n) > 0); + } } - +out: btree_trans_verify_sorted(trans); } -static inline void btree_iter_list_remove(struct btree_trans *trans, - struct btree_iter *iter) +static inline void btree_path_list_remove(struct btree_trans *trans, + struct btree_path *path) { unsigned i; - EBUG_ON(iter->sorted_idx >= trans->nr_sorted); + EBUG_ON(path->sorted_idx >= trans->nr_sorted); - array_remove_item(trans->sorted, trans->nr_sorted, iter->sorted_idx); + array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); - for (i = iter->sorted_idx; i < trans->nr_sorted; i++) - trans->iters[trans->sorted[i]].sorted_idx = i; + for (i = path->sorted_idx; i < trans->nr_sorted; i++) + trans->paths[trans->sorted[i]].sorted_idx = i; - iter->sorted_idx = U8_MAX; + path->sorted_idx = U8_MAX; btree_trans_verify_sorted_refs(trans); } -static inline void btree_iter_list_add(struct btree_trans *trans, - struct btree_iter *pos, - struct btree_iter *iter) +static inline void btree_path_list_add(struct btree_trans *trans, + struct btree_path *pos, + struct btree_path *path) { unsigned i; btree_trans_verify_sorted_refs(trans); - iter->sorted_idx = pos ? pos->sorted_idx : trans->nr_sorted; + path->sorted_idx = pos ? pos->sorted_idx + 1 : 0; - array_insert_item(trans->sorted, trans->nr_sorted, iter->sorted_idx, iter->idx); + array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx); - for (i = iter->sorted_idx; i < trans->nr_sorted; i++) - trans->iters[trans->sorted[i]].sorted_idx = i; + for (i = path->sorted_idx; i < trans->nr_sorted; i++) + trans->paths[trans->sorted[i]].sorted_idx = i; btree_trans_verify_sorted_refs(trans); } -static void btree_iter_child_free(struct btree_iter *iter) -{ - struct btree_iter *child = btree_iter_child(iter); - - if (child) { - bch2_trans_iter_free(iter->trans, child); - iter->child_idx = U8_MAX; - } -} - -static struct btree_iter *btree_iter_child_alloc(struct btree_iter *iter, - unsigned long ip) -{ - struct btree_trans *trans = iter->trans; - struct btree_iter *child = btree_iter_child(iter); - - if (!child) { - child = btree_trans_iter_alloc(trans, iter); - child->ip_allocated = ip; - iter->child_idx = child->idx; - - trans->iters_live |= 1ULL << child->idx; - trans->iters_touched |= 1ULL << child->idx; - } - - return child; -} - -static inline void __bch2_trans_iter_free(struct btree_trans *trans, - unsigned idx) -{ - btree_iter_child_free(&trans->iters[idx]); - - btree_iter_list_remove(trans, &trans->iters[idx]); - - __bch2_btree_iter_unlock(&trans->iters[idx]); - trans->iters_linked &= ~(1ULL << idx); - trans->iters_live &= ~(1ULL << idx); - trans->iters_touched &= ~(1ULL << idx); -} - -int bch2_trans_iter_put(struct btree_trans *trans, - struct btree_iter *iter) -{ - int ret; - - if (IS_ERR_OR_NULL(iter)) - return 0; - - BUG_ON(trans->iters + iter->idx != iter); - BUG_ON(!btree_iter_live(trans, iter)); - - ret = btree_iter_err(iter); - - if (!(trans->iters_touched & (1ULL << iter->idx)) && - !(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) - __bch2_trans_iter_free(trans, iter->idx); - - trans->iters_live &= ~(1ULL << iter->idx); - return ret; -} - -int bch2_trans_iter_free(struct btree_trans *trans, - struct btree_iter *iter) -{ - if (IS_ERR_OR_NULL(iter)) - return 0; - - set_btree_iter_dontneed(trans, iter); - - return bch2_trans_iter_put(trans, iter); -} - -noinline __cold -static void btree_trans_iter_alloc_fail(struct btree_trans *trans) -{ - - struct btree_iter *iter; - struct btree_insert_entry *i; - char buf[100]; - - btree_trans_sort_iters(trans); - - trans_for_each_iter_inorder(trans, iter) - printk(KERN_ERR "iter: btree %s pos %s%s%s%s %pS\n", - bch2_btree_ids[iter->btree_id], - (bch2_bpos_to_text(&PBUF(buf), iter->real_pos), buf), - btree_iter_live(trans, iter) ? " live" : "", - (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "", - iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "", - (void *) iter->ip_allocated); - - trans_for_each_update(trans, i) { - char buf[300]; - - bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k)); - printk(KERN_ERR "update: btree %s %s\n", - bch2_btree_ids[i->iter->btree_id], buf); - } - panic("trans iter oveflow\n"); -} - -static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans, - struct btree_iter *pos) -{ - struct btree_iter *iter; - unsigned idx; - - if (unlikely(trans->iters_linked == - ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) - btree_trans_iter_alloc_fail(trans); - - idx = __ffs64(~trans->iters_linked); - iter = &trans->iters[idx]; - - iter->trans = trans; - iter->idx = idx; - iter->child_idx = U8_MAX; - iter->sorted_idx = U8_MAX; - iter->flags = 0; - iter->nodes_locked = 0; - iter->nodes_intent_locked = 0; - trans->iters_linked |= 1ULL << idx; - - btree_iter_list_add(trans, pos, iter); - return iter; -} - -static void btree_iter_copy(struct btree_iter *dst, struct btree_iter *src) +void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) { - unsigned i; - - __bch2_btree_iter_unlock(dst); - btree_iter_child_free(dst); - - memcpy(&dst->flags, &src->flags, - sizeof(struct btree_iter) - offsetof(struct btree_iter, flags)); - - for (i = 0; i < BTREE_MAX_DEPTH; i++) - if (btree_node_locked(dst, i)) - six_lock_increment(&dst->l[i].b->c.lock, - __btree_lock_want(dst, i)); - - dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT; - dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT; - - btree_iter_check_sort(dst->trans, dst); + if (iter->path) + bch2_path_put(trans, iter->path, + iter->flags & BTREE_ITER_INTENT); + iter->path = NULL; } -struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, - unsigned btree_id, struct bpos pos, - unsigned locks_want, - unsigned depth, - unsigned flags) +static void __bch2_trans_iter_init(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, + unsigned locks_want, + unsigned depth, + unsigned flags) { - struct btree_iter *iter, *best = NULL; - struct bpos real_pos, pos_min = POS_MIN; - EBUG_ON(trans->restarted); - if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && - btree_node_type_is_extents(btree_id) && - !(flags & BTREE_ITER_NOT_EXTENTS) && - !(flags & BTREE_ITER_ALL_SNAPSHOTS)) + if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && + btree_node_type_is_extents(btree_id)) flags |= BTREE_ITER_IS_EXTENTS; - if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && - !btree_type_has_snapshots(btree_id)) + if (!btree_type_has_snapshots(btree_id) && + !(flags & __BTREE_ITER_ALL_SNAPSHOTS)) flags &= ~BTREE_ITER_ALL_SNAPSHOTS; if (!(flags & BTREE_ITER_ALL_SNAPSHOTS)) pos.snapshot = btree_type_has_snapshots(btree_id) ? U32_MAX : 0; - real_pos = pos; - - if ((flags & BTREE_ITER_IS_EXTENTS) && - bkey_cmp(pos, POS_MAX)) - real_pos = bpos_nosnap_successor(pos); - - trans_for_each_iter(trans, iter) { - if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE)) - continue; - - if (iter->btree_id != btree_id) - continue; - - if (best) { - int cmp = bkey_cmp(bpos_diff(best->real_pos, real_pos), - bpos_diff(iter->real_pos, real_pos)); - - if (cmp < 0 || - ((cmp == 0 && btree_iter_keep(trans, iter)))) - continue; - } - - best = iter; - } - - if (!best) { - iter = btree_trans_iter_alloc(trans, NULL); - bch2_btree_iter_init(trans, iter, btree_id); - } else if (btree_iter_keep(trans, best)) { - iter = btree_trans_iter_alloc(trans, best); - btree_iter_copy(iter, best); - } else { - iter = best; - } - - trans->iters_live |= 1ULL << iter->idx; - trans->iters_touched |= 1ULL << iter->idx; - - iter->flags = flags; - - iter->snapshot = pos.snapshot; - - /* - * If the iterator has locks_want greater than requested, we explicitly - * do not downgrade it here - on transaction restart because btree node - * split needs to upgrade locks, we might be putting/getting the - * iterator again. Downgrading iterators only happens via an explicit - * bch2_trans_downgrade(). - */ - - locks_want = min(locks_want, BTREE_MAX_DEPTH); - if (locks_want > iter->locks_want) { - iter->locks_want = locks_want; - btree_iter_get_locks(iter, true, _THIS_IP_); - } - - while (iter->level != depth) { - btree_node_unlock(iter, iter->level); - iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT; - iter->uptodate = BTREE_ITER_NEED_TRAVERSE; - if (iter->level < depth) - iter->level++; - else - iter->level--; - } - + iter->trans = trans; + iter->path = NULL; + iter->btree_id = btree_id; iter->min_depth = depth; + iter->flags = flags; + iter->snapshot = pos.snapshot; + iter->pos = pos; + iter->k.type = KEY_TYPE_deleted; + iter->k.p = pos; + iter->k.size = 0; - bch2_btree_iter_set_pos(iter, pos); - btree_iter_set_search_pos(iter, real_pos); - - trace_trans_get_iter(_RET_IP_, trans->ip, - btree_id, - &real_pos, locks_want, iter->uptodate, - best ? &best->real_pos : &pos_min, - best ? best->locks_want : U8_MAX, - best ? best->uptodate : U8_MAX); - - return iter; + iter->path = bch2_path_get(trans, + flags & BTREE_ITER_CACHED, + btree_id, + iter->pos, + locks_want, + depth, + flags & BTREE_ITER_INTENT); } -struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, - enum btree_id btree_id, - struct bpos pos, - unsigned locks_want, - unsigned depth, - unsigned flags) +void bch2_trans_iter_init(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, + unsigned flags) { - struct btree_iter *iter = - __bch2_trans_get_iter(trans, btree_id, pos, - locks_want, depth, - BTREE_ITER_NODES| - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS| - flags); - - BUG_ON(bkey_cmp(iter->pos, pos)); - BUG_ON(iter->locks_want != min(locks_want, BTREE_MAX_DEPTH)); - BUG_ON(iter->level != depth); - BUG_ON(iter->min_depth != depth); - iter->ip_allocated = _RET_IP_; - - return iter; + __bch2_trans_iter_init(trans, iter, btree_id, pos, + 0, 0, flags); } -struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans, - struct btree_iter *src) +void bch2_trans_node_iter_init(struct btree_trans *trans, + struct btree_iter *iter, + enum btree_id btree_id, + struct bpos pos, + unsigned locks_want, + unsigned depth, + unsigned flags) { - struct btree_iter *iter; - - iter = btree_trans_iter_alloc(trans, src); - btree_iter_copy(iter, src); - - trans->iters_live |= 1ULL << iter->idx; - /* - * We don't need to preserve this iter since it's cheap to copy it - * again - this will cause trans_iter_put() to free it right away: - */ - set_btree_iter_dontneed(trans, iter); + __bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth, + BTREE_ITER_NOT_EXTENTS| + __BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_ALL_SNAPSHOTS| + flags); + BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH)); + BUG_ON(iter->path->level != depth); + BUG_ON(iter->min_depth != depth); +} - return iter; +void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) +{ + *dst = *src; + if (src->path) + __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); } void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) @@ -2500,20 +2452,6 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } -inline void bch2_trans_unlink_iters(struct btree_trans *trans) -{ - u64 iters = trans->iters_linked & - ~trans->iters_touched & - ~trans->iters_live; - - while (iters) { - unsigned idx = __ffs64(iters); - - iters &= ~(1ULL << idx); - __bch2_trans_iter_free(trans, idx); - } -} - /** * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset @@ -2524,18 +2462,11 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans) */ void bch2_trans_begin(struct btree_trans *trans) { - struct btree_iter *iter; - - trans_for_each_iter(trans, iter) - iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT| - BTREE_ITER_SET_POS_AFTER_COMMIT); + struct btree_insert_entry *i; + struct btree_path *path; - /* - * XXX: we shouldn't be doing this if the transaction was restarted, but - * currently we still overflow transaction iterators if we do that - * */ - bch2_trans_unlink_iters(trans); - trans->iters_touched &= trans->iters_live; + trans_for_each_update(trans, i) + __btree_path_put(i->path, true); trans->extra_journal_res = 0; trans->nr_updates = 0; @@ -2552,32 +2483,44 @@ void bch2_trans_begin(struct btree_trans *trans) (void *) &trans->fs_usage_deltas->memset_start); } + trans_for_each_path(trans, path) { + path->should_be_locked = false; + + /* + * XXX: we probably shouldn't be doing this if the transaction + * was restarted, but currently we still overflow transaction + * iterators if we do that + */ + if (!path->ref && !path->preserve) + __bch2_path_free(trans, path); + else if (!path->ref) + path->preserve = false; + } + bch2_trans_cond_resched(trans); if (trans->restarted) - bch2_btree_iter_traverse_all(trans); + bch2_btree_path_traverse_all(trans); trans->restarted = false; } -static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c) +static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) { - size_t iters_bytes = sizeof(struct btree_iter) * BTREE_ITER_MAX; + size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX; size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX; - size_t sorted_bytes = sizeof(u8) * BTREE_ITER_MAX; void *p = NULL; BUG_ON(trans->used_mempool); #ifdef __KERNEL__ - p = this_cpu_xchg(c->btree_iters_bufs->iter, NULL); + p = this_cpu_xchg(c->btree_paths_bufs->path , NULL); #endif if (!p) - p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS); + p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); - trans->iters = p; p += iters_bytes; + trans->paths = p; p += paths_bytes; trans->updates = p; p += updates_bytes; - trans->sorted = p; p += sorted_bytes; } void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, @@ -2589,11 +2532,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, trans->c = c; trans->ip = _RET_IP_; - /* - * reallocating iterators currently completely breaks - * bch2_trans_iter_put(), we always allocate the max: - */ - bch2_trans_alloc_iters(trans, c); + bch2_trans_alloc_paths(trans, c); if (expected_mem_bytes) { trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes); @@ -2615,54 +2554,63 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, #endif } +static void check_btree_paths_leaked(struct btree_trans *trans) +{ +#ifdef CONFIG_BCACHEFS_DEBUG + struct bch_fs *c = trans->c; + struct btree_path *path; + + trans_for_each_path(trans, path) + if (path->ref) + goto leaked; + return; +leaked: + bch_err(c, "btree paths leaked from %pS!", (void *) trans->ip); + trans_for_each_path(trans, path) + if (path->ref) + printk(KERN_ERR " btree %s %pS\n", + bch2_btree_ids[path->btree_id], + (void *) path->ip_allocated); + /* Be noisy about this: */ + bch2_fatal_error(c); +#endif +} + int bch2_trans_exit(struct btree_trans *trans) __releases(&c->btree_trans_barrier) { + struct btree_insert_entry *i; struct bch_fs *c = trans->c; bch2_trans_unlock(trans); -#ifdef CONFIG_BCACHEFS_DEBUG - if (trans->iters_live) { - struct btree_iter *iter; - - trans_for_each_iter(trans, iter) - btree_iter_child_free(iter); - } + trans_for_each_update(trans, i) + __btree_path_put(i->path, true); + trans->nr_updates = 0; - if (trans->iters_live) { - struct btree_iter *iter; - - bch_err(c, "btree iterators leaked!"); - trans_for_each_iter(trans, iter) - if (btree_iter_live(trans, iter)) - printk(KERN_ERR " btree %s allocated at %pS\n", - bch2_btree_ids[iter->btree_id], - (void *) iter->ip_allocated); - /* Be noisy about this: */ - bch2_fatal_error(c); - } + check_btree_paths_leaked(trans); - mutex_lock(&trans->c->btree_trans_lock); +#ifdef CONFIG_BCACHEFS_DEBUG + mutex_lock(&c->btree_trans_lock); list_del(&trans->list); - mutex_unlock(&trans->c->btree_trans_lock); + mutex_unlock(&c->btree_trans_lock); #endif srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); - bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres); + bch2_journal_preres_put(&c->journal, &trans->journal_preres); if (trans->fs_usage_deltas) { if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) == REPLICAS_DELTA_LIST_MAX) mempool_free(trans->fs_usage_deltas, - &trans->c->replicas_delta_pool); + &c->replicas_delta_pool); else kfree(trans->fs_usage_deltas); } if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) - mempool_free(trans->mem, &trans->c->btree_trans_mem_pool); + mempool_free(trans->mem, &c->btree_trans_mem_pool); else kfree(trans->mem); @@ -2670,36 +2618,35 @@ int bch2_trans_exit(struct btree_trans *trans) /* * Userspace doesn't have a real percpu implementation: */ - trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters); + trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths); #endif - if (trans->iters) - mempool_free(trans->iters, &trans->c->btree_iters_pool); + if (trans->paths) + mempool_free(trans->paths, &c->btree_paths_pool); trans->mem = (void *) 0x1; - trans->iters = (void *) 0x1; + trans->paths = (void *) 0x1; return trans->error ? -EIO : 0; } static void __maybe_unused -bch2_btree_iter_node_to_text(struct printbuf *out, +bch2_btree_path_node_to_text(struct printbuf *out, struct btree_bkey_cached_common *_b, - enum btree_iter_type type) + bool cached) { pr_buf(out, " l=%u %s:", _b->level, bch2_btree_ids[_b->btree_id]); - bch2_bpos_to_text(out, btree_node_pos(_b, type)); + bch2_bpos_to_text(out, btree_node_pos(_b, cached)); } #ifdef CONFIG_BCACHEFS_DEBUG -static bool trans_has_btree_nodes_locked(struct btree_trans *trans) +static bool trans_has_locks(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - if (btree_iter_type(iter) != BTREE_ITER_CACHED && - iter->nodes_locked) + trans_for_each_path(trans, path) + if (path->nodes_locked) return true; return false; } @@ -2709,35 +2656,36 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) { #ifdef CONFIG_BCACHEFS_DEBUG struct btree_trans *trans; - struct btree_iter *iter; + struct btree_path *path; struct btree *b; unsigned l; mutex_lock(&c->btree_trans_lock); list_for_each_entry(trans, &c->btree_trans_list, list) { - if (!trans_has_btree_nodes_locked(trans)) + if (!trans_has_locks(trans)) continue; pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip); - trans_for_each_iter(trans, iter) { - if (!iter->nodes_locked) + trans_for_each_path(trans, path) { + if (!path->nodes_locked) continue; - pr_buf(out, " iter %u %c %s:", - iter->idx, - btree_iter_type(iter) == BTREE_ITER_CACHED ? 'c' : 'b', - bch2_btree_ids[iter->btree_id]); - bch2_bpos_to_text(out, iter->pos); + pr_buf(out, " path %u %c l=%u %s:", + path->idx, + path->cached ? 'c' : 'b', + path->level, + bch2_btree_ids[path->btree_id]); + bch2_bpos_to_text(out, path->pos); pr_buf(out, "\n"); for (l = 0; l < BTREE_MAX_DEPTH; l++) { - if (btree_node_locked(iter, l)) { + if (btree_node_locked(path, l)) { pr_buf(out, " %s l=%u ", - btree_node_intent_locked(iter, l) ? "i" : "r", l); - bch2_btree_iter_node_to_text(out, - (void *) iter->l[l].b, - btree_iter_type(iter)); + btree_node_intent_locked(path, l) ? "i" : "r", l); + bch2_btree_path_node_to_text(out, + (void *) path->l[l].b, + path->cached); pr_buf(out, "\n"); } } @@ -2745,18 +2693,17 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) b = READ_ONCE(trans->locking); if (b) { - iter = &trans->iters[trans->locking_iter_idx]; - pr_buf(out, " locking iter %u %c l=%u %s:", - trans->locking_iter_idx, - btree_iter_type(iter) == BTREE_ITER_CACHED ? 'c' : 'b', + path = &trans->paths[trans->locking_path_idx]; + pr_buf(out, " locking path %u %c l=%u %s:", + trans->locking_path_idx, + path->cached ? 'c' : 'b', trans->locking_level, bch2_btree_ids[trans->locking_btree_id]); bch2_bpos_to_text(out, trans->locking_pos); pr_buf(out, " node "); - bch2_btree_iter_node_to_text(out, - (void *) b, - btree_iter_type(iter)); + bch2_btree_path_node_to_text(out, + (void *) b, path->cached); pr_buf(out, "\n"); } } @@ -2767,7 +2714,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) void bch2_fs_btree_iter_exit(struct bch_fs *c) { mempool_exit(&c->btree_trans_mem_pool); - mempool_exit(&c->btree_iters_pool); + mempool_exit(&c->btree_paths_pool); cleanup_srcu_struct(&c->btree_trans_barrier); } @@ -2779,9 +2726,8 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) mutex_init(&c->btree_trans_lock); return init_srcu_struct(&c->btree_trans_barrier) ?: - mempool_init_kmalloc_pool(&c->btree_iters_pool, 1, - sizeof(u8) * nr + - sizeof(struct btree_iter) * nr + + mempool_init_kmalloc_pool(&c->btree_paths_pool, 1, + sizeof(struct btree_path) * nr + sizeof(struct btree_insert_entry) * nr) ?: mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, BTREE_TRANS_MEM_MAX); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 39124e6..be1bb48 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -5,40 +5,49 @@ #include "bset.h" #include "btree_types.h" -static inline void btree_iter_set_dirty(struct btree_iter *iter, - enum btree_iter_uptodate u) +static inline void __btree_path_get(struct btree_path *path, bool intent) { - iter->uptodate = max_t(unsigned, iter->uptodate, u); + path->ref++; + path->intent_ref += intent; } -static inline struct btree *btree_iter_node(struct btree_iter *iter, +static inline bool __btree_path_put(struct btree_path *path, bool intent) +{ + EBUG_ON(!path->ref); + EBUG_ON(!path->intent_ref && intent); + path->intent_ref -= intent; + return --path->ref == 0; +} + +static inline void btree_path_set_dirty(struct btree_path *path, + enum btree_path_uptodate u) +{ + path->uptodate = max_t(unsigned, path->uptodate, u); +} + +static inline struct btree *btree_path_node(struct btree_path *path, unsigned level) { - return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL; + return level < BTREE_MAX_DEPTH ? path->l[level].b : NULL; } -static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter, +static inline bool btree_node_lock_seq_matches(const struct btree_path *path, const struct btree *b, unsigned level) { /* * We don't compare the low bits of the lock sequence numbers because - * @iter might have taken a write lock on @b, and we don't want to skip - * the linked iterator if the sequence numbers were equal before taking - * that write lock. The lock sequence number is incremented by taking - * and releasing write locks and is even when unlocked: + * @path might have taken a write lock on @b, and we don't want to skip + * the linked path if the sequence numbers were equal before taking that + * write lock. The lock sequence number is incremented by taking and + * releasing write locks and is even when unlocked: */ - return iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1; + return path->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1; } -static inline struct btree *btree_node_parent(struct btree_iter *iter, +static inline struct btree *btree_node_parent(struct btree_path *path, struct btree *b) { - return btree_iter_node(iter, b->c.level + 1); -} - -static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans) -{ - return hweight64(trans->iters_linked) > 1; + return btree_path_node(path, b->c.level + 1); } static inline int btree_iter_err(const struct btree_iter *iter) @@ -46,97 +55,105 @@ static inline int btree_iter_err(const struct btree_iter *iter) return iter->flags & BTREE_ITER_ERROR ? -EIO : 0; } -/* Iterate over iters within a transaction: */ +/* Iterate over paths within a transaction: */ -static inline struct btree_iter * -__trans_next_iter(struct btree_trans *trans, unsigned idx) +static inline struct btree_path * +__trans_next_path(struct btree_trans *trans, unsigned idx) { u64 l; if (idx == BTREE_ITER_MAX) return NULL; - l = trans->iters_linked >> idx; + l = trans->paths_allocated >> idx; if (!l) return NULL; idx += __ffs64(l); EBUG_ON(idx >= BTREE_ITER_MAX); - EBUG_ON(trans->iters[idx].idx != idx); - return &trans->iters[idx]; + EBUG_ON(trans->paths[idx].idx != idx); + return &trans->paths[idx]; } -#define trans_for_each_iter(_trans, _iter) \ - for (_iter = __trans_next_iter((_trans), 0); \ - (_iter); \ - _iter = __trans_next_iter((_trans), (_iter)->idx + 1)) +#define trans_for_each_path(_trans, _path) \ + for (_path = __trans_next_path((_trans), 0); \ + (_path); \ + _path = __trans_next_path((_trans), (_path)->idx + 1)) -static inline struct btree_iter *next_btree_iter(struct btree_trans *trans, struct btree_iter *iter) +static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path) { - unsigned idx = iter ? iter->sorted_idx + 1 : 0; + unsigned idx = path ? path->sorted_idx + 1 : 0; EBUG_ON(idx > trans->nr_sorted); return idx < trans->nr_sorted - ? trans->iters + trans->sorted[idx] + ? trans->paths + trans->sorted[idx] : NULL; } -static inline struct btree_iter *prev_btree_iter(struct btree_trans *trans, struct btree_iter *iter) +static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path) { - EBUG_ON(iter->sorted_idx >= trans->nr_sorted); - return iter->sorted_idx - ? trans->iters + trans->sorted[iter->sorted_idx - 1] + EBUG_ON(path->sorted_idx >= trans->nr_sorted); + return path->sorted_idx + ? trans->paths + trans->sorted[path->sorted_idx - 1] : NULL; } -#define trans_for_each_iter_inorder(_trans, _iter) \ - for (_iter = next_btree_iter(trans, NULL); \ - (_iter); \ - _iter = next_btree_iter((_trans), (_iter))) +#define trans_for_each_path_inorder(_trans, _path, _i) \ + for (_i = 0; \ + ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\ + _i++) -static inline bool __iter_has_node(const struct btree_iter *iter, +static inline bool __path_has_node(const struct btree_path *path, const struct btree *b) { - return iter->l[b->c.level].b == b && - btree_node_lock_seq_matches(iter, b, b->c.level); + return path->l[b->c.level].b == b && + btree_node_lock_seq_matches(path, b, b->c.level); } -static inline struct btree_iter * -__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b, +static inline struct btree_path * +__trans_next_path_with_node(struct btree_trans *trans, struct btree *b, unsigned idx) { - struct btree_iter *iter = __trans_next_iter(trans, idx); + struct btree_path *path = __trans_next_path(trans, idx); - while (iter && !__iter_has_node(iter, b)) - iter = __trans_next_iter(trans, iter->idx + 1); + while (path && !__path_has_node(path, b)) + path = __trans_next_path(trans, path->idx + 1); - return iter; + return path; } -#define trans_for_each_iter_with_node(_trans, _b, _iter) \ - for (_iter = __trans_next_iter_with_node((_trans), (_b), 0); \ - (_iter); \ - _iter = __trans_next_iter_with_node((_trans), (_b), \ - (_iter)->idx + 1)) +#define trans_for_each_path_with_node(_trans, _b, _path) \ + for (_path = __trans_next_path_with_node((_trans), (_b), 0); \ + (_path); \ + _path = __trans_next_path_with_node((_trans), (_b), \ + (_path)->idx + 1)) + +struct btree_path * __must_check +bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *, bool); +int __must_check bch2_btree_path_traverse(struct btree_trans *, + struct btree_path *, unsigned); +struct btree_path *bch2_path_get(struct btree_trans *, bool, enum btree_id, + struct bpos, unsigned, unsigned, bool); +inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *); #ifdef CONFIG_BCACHEFS_DEBUG -void bch2_btree_trans_verify_iters(struct btree_trans *, struct btree *); -void bch2_btree_trans_verify_locks(struct btree_trans *); +void bch2_trans_verify_paths(struct btree_trans *); +void bch2_trans_verify_locks(struct btree_trans *); #else -static inline void bch2_btree_trans_verify_iters(struct btree_trans *trans, - struct btree *b) {} -static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {} +static inline void bch2_trans_verify_paths(struct btree_trans *trans) {} +static inline void bch2_trans_verify_locks(struct btree_trans *trans) {} #endif -void bch2_btree_iter_fix_key_modified(struct btree_iter *, struct btree *, - struct bkey_packed *); -void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *, - struct btree_node_iter *, struct bkey_packed *, - unsigned, unsigned); +void bch2_btree_path_fix_key_modified(struct btree_trans *trans, + struct btree *, struct bkey_packed *); +void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *, + struct btree *, struct btree_node_iter *, + struct bkey_packed *, unsigned, unsigned); + +bool bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); -bool bch2_btree_iter_relock_intent(struct btree_iter *); -bool bch2_btree_iter_relock(struct btree_iter *, unsigned long); +void bch2_path_put(struct btree_trans *, struct btree_path *, bool); bool bch2_trans_relock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); @@ -149,35 +166,36 @@ static inline int btree_trans_restart(struct btree_trans *trans) return -EINTR; } -bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned); +bool __bch2_btree_path_upgrade(struct btree_trans *, + struct btree_path *, unsigned); -static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter, +static inline bool bch2_btree_path_upgrade(struct btree_trans *trans, + struct btree_path *path, unsigned new_locks_want) { new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH); - return iter->locks_want < new_locks_want - ? __bch2_btree_iter_upgrade(iter, new_locks_want) - : iter->uptodate <= BTREE_ITER_NEED_PEEK; + return path->locks_want < new_locks_want + ? __bch2_btree_path_upgrade(trans, path, new_locks_want) + : path->uptodate == BTREE_ITER_UPTODATE; } -void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned); +void __bch2_btree_path_downgrade(struct btree_path *, unsigned); -static inline void bch2_btree_iter_downgrade(struct btree_iter *iter) +static inline void bch2_btree_path_downgrade(struct btree_path *path) { - unsigned new_locks_want = iter->level + !!(iter->flags & BTREE_ITER_INTENT); + unsigned new_locks_want = path->level + !!path->intent_ref; - if (iter->locks_want > new_locks_want) - __bch2_btree_iter_downgrade(iter, new_locks_want); + if (path->locks_want > new_locks_want) + __bch2_btree_path_downgrade(path, new_locks_want); } void bch2_trans_downgrade(struct btree_trans *); -void bch2_btree_iter_node_replace(struct btree_iter *, struct btree *); -void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *); - -void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *); +void bch2_trans_node_add(struct btree_trans *trans, struct btree *); +void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); +int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); int __must_check bch2_btree_iter_traverse(struct btree_iter *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *); @@ -206,7 +224,8 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos iter->k.p.offset = iter->pos.offset = new_pos.offset; iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot; iter->k.size = 0; - iter->should_be_locked = false; + if (iter->path->ref == 1) + iter->path->should_be_locked = false; } static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter) @@ -215,16 +234,6 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it iter->pos = bkey_start_pos(&iter->k); } -static inline struct btree_iter *idx_to_btree_iter(struct btree_trans *trans, unsigned idx) -{ - return idx != U8_MAX ? trans->iters + idx : NULL; -} - -static inline struct btree_iter *btree_iter_child(struct btree_iter *iter) -{ - return idx_to_btree_iter(iter->trans, iter->child_idx); -} - /* * Unlocks before scheduling * Note: does not revalidate iterator @@ -242,11 +251,11 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans) #define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ _locks_want, _depth, _flags, _b) \ - for (iter = bch2_trans_get_node_iter((_trans), (_btree_id), \ + for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ _start, _locks_want, _depth, _flags), \ - _b = bch2_btree_iter_peek_node(_iter); \ + _b = bch2_btree_iter_peek_node(&(_iter)); \ (_b); \ - (_b) = bch2_btree_iter_next_node(_iter)) + (_b) = bch2_btree_iter_next_node(&(_iter))) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ _flags, _b) \ @@ -276,75 +285,36 @@ static inline int bkey_err(struct bkey_s_c k) #define for_each_btree_key(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ - for ((_iter) = bch2_trans_get_iter((_trans), (_btree_id), \ - (_start), (_flags)), \ - (_k) = __bch2_btree_iter_peek(_iter, _flags); \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)), \ + (_k) = __bch2_btree_iter_peek(&(_iter), _flags); \ !((_ret) = bkey_err(_k)) && (_k).k; \ - (_k) = __bch2_btree_iter_next(_iter, _flags)) + (_k) = __bch2_btree_iter_next(&(_iter), _flags)) #define for_each_btree_key_continue(_iter, _flags, _k, _ret) \ - for ((_k) = __bch2_btree_iter_peek(_iter, _flags); \ + for ((_k) = __bch2_btree_iter_peek(&(_iter), _flags); \ !((_ret) = bkey_err(_k)) && (_k).k; \ - (_k) = __bch2_btree_iter_next(_iter, _flags)) + (_k) = __bch2_btree_iter_next(&(_iter), _flags)) /* new multiple iterator interface: */ -int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); -int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); +void bch2_dump_trans_paths_updates(struct btree_trans *); -void bch2_trans_unlink_iters(struct btree_trans *); +void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); +void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *, + unsigned, struct bpos, unsigned); +void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *, + enum btree_id, struct bpos, + unsigned, unsigned, unsigned); +void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); -struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id, - struct bpos, unsigned, - unsigned, unsigned); - -static inline struct btree_iter * -bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id, - struct bpos pos, unsigned flags) -{ - struct btree_iter *iter = - __bch2_trans_get_iter(trans, btree_id, pos, - (flags & BTREE_ITER_INTENT) != 0, 0, - flags); - iter->ip_allocated = _THIS_IP_; - return iter; -} - -struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *, - struct btree_iter *); -static inline struct btree_iter * -bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src) -{ - struct btree_iter *iter = - __bch2_trans_copy_iter(trans, src); - - iter->ip_allocated = _THIS_IP_; - return iter; -} - -struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *, - enum btree_id, struct bpos, - unsigned, unsigned, unsigned); - -static inline bool btree_iter_live(struct btree_trans *trans, struct btree_iter *iter) +static inline void set_btree_iter_dontneed(struct btree_iter *iter) { - return (trans->iters_live & (1ULL << iter->idx)) != 0; + iter->path->preserve = false; } -static inline bool btree_iter_keep(struct btree_trans *trans, struct btree_iter *iter) -{ - return btree_iter_live(trans, iter) || - (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT); -} - -static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter) -{ - trans->iters_touched &= ~(1ULL << iter->idx); -} - -void bch2_trans_begin(struct btree_trans *); - void *bch2_trans_kmalloc(struct btree_trans *, size_t); +void bch2_trans_begin(struct btree_trans *); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); int bch2_trans_exit(struct btree_trans *); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index e327ef3..938ced3 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -196,23 +196,23 @@ btree_key_cache_create(struct btree_key_cache *c, } static int btree_key_cache_fill(struct btree_trans *trans, - struct btree_iter *ck_iter, + struct btree_path *ck_path, struct bkey_cached *ck) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; unsigned new_u64s = 0; struct bkey_i *new_k = NULL; int ret; - iter = bch2_trans_get_iter(trans, ck->key.btree_id, - ck->key.pos, BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, ck->key.btree_id, + ck->key.pos, BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; - if (!bch2_btree_node_relock(ck_iter, 0)) { + if (!bch2_btree_node_relock(trans, ck_path, 0)) { trace_transaction_restart_ip(trans->ip, _THIS_IP_); ret = btree_trans_restart(trans); goto err; @@ -237,7 +237,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, * XXX: not allowed to be holding read locks when we take a write lock, * currently */ - bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter); + bch2_btree_node_lock_write(trans, ck_path, ck_path->l[0].b); if (new_k) { kfree(ck->k); ck->u64s = new_u64s; @@ -246,63 +246,64 @@ static int btree_key_cache_fill(struct btree_trans *trans, bkey_reassemble(ck->k, k); ck->valid = true; - bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter); + bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); /* We're not likely to need this iterator again: */ - set_btree_iter_dontneed(trans, iter); + set_btree_iter_dontneed(&iter); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } static int bkey_cached_check_fn(struct six_lock *lock, void *p) { struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock); - const struct btree_iter *iter = p; + const struct btree_path *path = p; - return ck->key.btree_id == iter->btree_id && - !bpos_cmp(ck->key.pos, iter->pos) ? 0 : -1; + return ck->key.btree_id == path->btree_id && + !bpos_cmp(ck->key.pos, path->pos) ? 0 : -1; } __flatten -int bch2_btree_iter_traverse_cached(struct btree_iter *iter) +int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, + unsigned flags) { - struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; struct bkey_cached *ck; int ret = 0; - BUG_ON(iter->level); + BUG_ON(path->level); - iter->l[1].b = NULL; + path->l[1].b = NULL; - if (bch2_btree_node_relock(iter, 0)) { - ck = (void *) iter->l[0].b; + if (bch2_btree_node_relock(trans, path, 0)) { + ck = (void *) path->l[0].b; goto fill; } retry: - ck = bch2_btree_key_cache_find(c, iter->btree_id, iter->pos); + ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { - if (iter->flags & BTREE_ITER_CACHED_NOCREATE) { - iter->l[0].b = NULL; + if (flags & BTREE_ITER_CACHED_NOCREATE) { + path->l[0].b = NULL; return 0; } ck = btree_key_cache_create(&c->btree_key_cache, - iter->btree_id, iter->pos); + path->btree_id, path->pos); ret = PTR_ERR_OR_ZERO(ck); if (ret) goto err; if (!ck) goto retry; - mark_btree_node_locked(iter, 0, SIX_LOCK_intent); - iter->locks_want = 1; + mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); + path->locks_want = 1; } else { - enum six_lock_type lock_want = __btree_lock_want(iter, 0); + enum six_lock_type lock_want = __btree_lock_want(path, 0); - if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want, - bkey_cached_check_fn, iter, _THIS_IP_)) { + if (!btree_node_lock(trans, path, (void *) ck, path->pos, 0, + lock_want, + bkey_cached_check_fn, path, _THIS_IP_)) { if (!trans->restarted) goto retry; @@ -311,28 +312,27 @@ retry: goto err; } - if (ck->key.btree_id != iter->btree_id || - bpos_cmp(ck->key.pos, iter->pos)) { + if (ck->key.btree_id != path->btree_id || + bpos_cmp(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } - mark_btree_node_locked(iter, 0, lock_want); + mark_btree_node_locked(trans, path, 0, lock_want); } - iter->l[0].lock_seq = ck->c.lock.state.seq; - iter->l[0].b = (void *) ck; + path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].b = (void *) ck; fill: - if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) { - if (!iter->locks_want && - !!__bch2_btree_iter_upgrade(iter, 1)) { + if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { + if (!path->locks_want && + !__bch2_btree_path_upgrade(trans, path, 1)) { trace_transaction_restart_ip(trans->ip, _THIS_IP_); - BUG_ON(!trans->restarted); - ret = -EINTR; + ret = btree_trans_restart(trans); goto err; } - ret = btree_key_cache_fill(trans, iter, ck); + ret = btree_key_cache_fill(trans, path, ck); if (ret) goto err; } @@ -340,22 +340,14 @@ fill: if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); - iter->uptodate = BTREE_ITER_NEED_PEEK; - - if ((iter->flags & BTREE_ITER_INTENT) && - !bch2_btree_iter_upgrade(iter, 1)) { - BUG_ON(!trans->restarted); - ret = -EINTR; - } - - BUG_ON(!ret && !btree_node_locked(iter, 0)); + path->uptodate = BTREE_ITER_UPTODATE; + BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); return ret; err: if (ret != -EINTR) { - btree_node_unlock(iter, 0); - iter->flags |= BTREE_ITER_ERROR; - iter->l[0].b = BTREE_ITER_NO_NODE_ERROR; + btree_node_unlock(path, 0); + path->l[0].b = BTREE_ITER_NO_NODE_ERROR; } return ret; } @@ -368,23 +360,23 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct journal *j = &c->journal; - struct btree_iter *c_iter = NULL, *b_iter = NULL; + struct btree_iter c_iter, b_iter; struct bkey_cached *ck = NULL; int ret; - b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos, - BTREE_ITER_SLOTS| - BTREE_ITER_INTENT); - c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos, - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_CACHED_NOCREATE| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(c_iter); + bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos, + BTREE_ITER_SLOTS| + BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_CACHED_NOCREATE| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&c_iter); if (ret) goto out; - ck = (void *) c_iter->l[0].b; + ck = (void *) c_iter.path->l[0].b; if (!ck || (journal_seq && ck->journal.seq != journal_seq)) goto out; @@ -400,8 +392,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, * allocator/copygc depend on journal reclaim making progress, we need * to be using alloc reserves: * */ - ret = bch2_btree_iter_traverse(b_iter) ?: - bch2_trans_update(trans, b_iter, ck->k, + ret = bch2_btree_iter_traverse(&b_iter) ?: + bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, @@ -423,7 +415,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, bch2_journal_pin_drop(j, &ck->journal); bch2_journal_preres_put(j, &ck->res); - BUG_ON(!btree_node_locked(c_iter, 0)); + BUG_ON(!btree_node_locked(c_iter.path, 0)); if (!evict) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { @@ -432,10 +424,10 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, } } else { evict: - BUG_ON(!btree_node_intent_locked(c_iter, 0)); + BUG_ON(!btree_node_intent_locked(c_iter.path, 0)); - mark_btree_node_unlocked(c_iter, 0); - c_iter->l[0].b = NULL; + mark_btree_node_unlocked(c_iter.path, 0); + c_iter.path->l[0].b = NULL; six_lock_write(&ck->c.lock, NULL, NULL); @@ -451,8 +443,8 @@ evict: mutex_unlock(&c->btree_key_cache.lock); } out: - bch2_trans_iter_put(trans, b_iter); - bch2_trans_iter_put(trans, c_iter); + bch2_trans_iter_exit(trans, &b_iter); + bch2_trans_iter_exit(trans, &c_iter); return ret; } @@ -503,11 +495,11 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans, } bool bch2_btree_insert_key_cached(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct bkey_i *insert) { struct bch_fs *c = trans->c; - struct bkey_cached *ck = (void *) iter->l[0].b; + struct bkey_cached *ck = (void *) path->l[0].b; bool kick_reclaim = false; BUG_ON(insert->u64s > ck->u64s); diff --git a/libbcachefs/btree_key_cache.h b/libbcachefs/btree_key_cache.h index 7e2b0a0..0768ef3 100644 --- a/libbcachefs/btree_key_cache.h +++ b/libbcachefs/btree_key_cache.h @@ -26,10 +26,11 @@ int bch2_btree_key_cache_journal_flush(struct journal *, struct bkey_cached * bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos); -int bch2_btree_iter_traverse_cached(struct btree_iter *); +int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *, + unsigned); bool bch2_btree_insert_key_cached(struct btree_trans *, - struct btree_iter *, struct bkey_i *); + struct btree_path *, struct bkey_i *); int bch2_btree_key_cache_flush(struct btree_trans *, enum btree_id, struct bpos); #ifdef CONFIG_BCACHEFS_DEBUG diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index 7532bcd..5c6b758 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -21,7 +21,7 @@ enum btree_node_locked_type { BTREE_NODE_INTENT_LOCKED = SIX_LOCK_intent, }; -static inline int btree_node_locked_type(struct btree_iter *iter, +static inline int btree_node_locked_type(struct btree_path *path, unsigned level) { /* @@ -30,35 +30,36 @@ static inline int btree_node_locked_type(struct btree_iter *iter, * branches: */ return BTREE_NODE_UNLOCKED + - ((iter->nodes_locked >> level) & 1) + - ((iter->nodes_intent_locked >> level) & 1); + ((path->nodes_locked >> level) & 1) + + ((path->nodes_intent_locked >> level) & 1); } -static inline bool btree_node_intent_locked(struct btree_iter *iter, +static inline bool btree_node_intent_locked(struct btree_path *path, unsigned level) { - return btree_node_locked_type(iter, level) == BTREE_NODE_INTENT_LOCKED; + return btree_node_locked_type(path, level) == BTREE_NODE_INTENT_LOCKED; } -static inline bool btree_node_read_locked(struct btree_iter *iter, +static inline bool btree_node_read_locked(struct btree_path *path, unsigned level) { - return btree_node_locked_type(iter, level) == BTREE_NODE_READ_LOCKED; + return btree_node_locked_type(path, level) == BTREE_NODE_READ_LOCKED; } -static inline bool btree_node_locked(struct btree_iter *iter, unsigned level) +static inline bool btree_node_locked(struct btree_path *path, unsigned level) { - return iter->nodes_locked & (1 << level); + return path->nodes_locked & (1 << level); } -static inline void mark_btree_node_unlocked(struct btree_iter *iter, +static inline void mark_btree_node_unlocked(struct btree_path *path, unsigned level) { - iter->nodes_locked &= ~(1 << level); - iter->nodes_intent_locked &= ~(1 << level); + path->nodes_locked &= ~(1 << level); + path->nodes_intent_locked &= ~(1 << level); } -static inline void mark_btree_node_locked(struct btree_iter *iter, +static inline void mark_btree_node_locked(struct btree_trans *trans, + struct btree_path *path, unsigned level, enum six_lock_type type) { @@ -66,52 +67,62 @@ static inline void mark_btree_node_locked(struct btree_iter *iter, BUILD_BUG_ON(SIX_LOCK_read != 0); BUILD_BUG_ON(SIX_LOCK_intent != 1); - iter->nodes_locked |= 1 << level; - iter->nodes_intent_locked |= type << level; + path->nodes_locked |= 1 << level; + path->nodes_intent_locked |= type << level; +#ifdef CONFIG_BCACHEFS_DEBUG + path->ip_locked = _RET_IP_; + BUG_ON(trans->in_traverse_all && + trans->traverse_all_idx != U8_MAX && + path->sorted_idx > trans->paths[trans->traverse_all_idx].sorted_idx); +#endif } -static inline void mark_btree_node_intent_locked(struct btree_iter *iter, +static inline void mark_btree_node_intent_locked(struct btree_trans *trans, + struct btree_path *path, unsigned level) { - mark_btree_node_locked(iter, level, SIX_LOCK_intent); + mark_btree_node_locked(trans, path, level, SIX_LOCK_intent); } -static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level) +static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level) { - return level < iter->locks_want + return level < path->locks_want ? SIX_LOCK_intent : SIX_LOCK_read; } static inline enum btree_node_locked_type -btree_lock_want(struct btree_iter *iter, int level) +btree_lock_want(struct btree_path *path, int level) { - if (level < iter->level) + if (level < path->level) return BTREE_NODE_UNLOCKED; - if (level < iter->locks_want) + if (level < path->locks_want) return BTREE_NODE_INTENT_LOCKED; - if (level == iter->level) + if (level == path->level) return BTREE_NODE_READ_LOCKED; return BTREE_NODE_UNLOCKED; } -static inline void btree_node_unlock(struct btree_iter *iter, unsigned level) +static inline void btree_node_unlock(struct btree_path *path, unsigned level) { - int lock_type = btree_node_locked_type(iter, level); + int lock_type = btree_node_locked_type(path, level); EBUG_ON(level >= BTREE_MAX_DEPTH); if (lock_type != BTREE_NODE_UNLOCKED) - six_unlock_type(&iter->l[level].b->c.lock, lock_type); - mark_btree_node_unlocked(iter, level); + six_unlock_type(&path->l[level].b->c.lock, lock_type); + mark_btree_node_unlocked(path, level); } -static inline void __bch2_btree_iter_unlock(struct btree_iter *iter) +static inline void __bch2_btree_path_unlock(struct btree_path *path) { - btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK); + btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK); - while (iter->nodes_locked) - btree_node_unlock(iter, __ffs(iter->nodes_locked)); + while (path->nodes_locked) + btree_node_unlock(path, __ffs(path->nodes_locked)); +#ifdef CONFIG_BCACHEFS_DEBUG + path->ip_locked = 0; +#endif } static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type) @@ -155,11 +166,11 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, struct btree *b, unsigned level, enum btree_node_locked_type want) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - if (iter->l[level].b == b && - btree_node_locked_type(iter, level) >= want) { + trans_for_each_path(trans, path) + if (path->l[level].b == b && + btree_node_locked_type(path, level) >= want) { six_lock_increment(&b->c.lock, want); return true; } @@ -167,40 +178,39 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, return false; } -bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned, - struct btree_iter *, enum six_lock_type, +bool __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, + struct btree *, struct bpos, unsigned, + enum six_lock_type, six_lock_should_sleep_fn, void *, unsigned long); -static inline bool btree_node_lock(struct btree *b, - struct bpos pos, unsigned level, - struct btree_iter *iter, +static inline bool btree_node_lock(struct btree_trans *trans, + struct btree_path *path, + struct btree *b, struct bpos pos, unsigned level, enum six_lock_type type, six_lock_should_sleep_fn should_sleep_fn, void *p, unsigned long ip) { - struct btree_trans *trans = iter->trans; - EBUG_ON(level >= BTREE_MAX_DEPTH); - EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); return likely(six_trylock_type(&b->c.lock, type)) || btree_node_lock_increment(trans, b, level, type) || - __bch2_btree_node_lock(b, pos, level, iter, type, + __bch2_btree_node_lock(trans, path, b, pos, level, type, should_sleep_fn, p, ip); } -bool __bch2_btree_node_relock(struct btree_iter *, unsigned); +bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned); -static inline bool bch2_btree_node_relock(struct btree_iter *iter, - unsigned level) +static inline bool bch2_btree_node_relock(struct btree_trans *trans, + struct btree_path *path, unsigned level) { - EBUG_ON(btree_node_locked(iter, level) && - btree_node_locked_type(iter, level) != - __btree_lock_want(iter, level)); + EBUG_ON(btree_node_locked(path, level) && + btree_node_locked_type(path, level) != + __btree_lock_want(path, level)); - return likely(btree_node_locked(iter, level)) || - __bch2_btree_node_relock(iter, level); + return likely(btree_node_locked(path, level)) || + __bch2_btree_node_relock(trans, path, level); } /* @@ -208,30 +218,35 @@ static inline bool bch2_btree_node_relock(struct btree_iter *iter, * succeed: */ static inline void -bch2_btree_node_unlock_write_inlined(struct btree *b, struct btree_iter *iter) +bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path, + struct btree *b) { - struct btree_iter *linked; + struct btree_path *linked; - EBUG_ON(iter->l[b->c.level].b != b); - EBUG_ON(iter->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq); + EBUG_ON(path->l[b->c.level].b != b); + EBUG_ON(path->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq); - trans_for_each_iter_with_node(iter->trans, b, linked) + trans_for_each_path_with_node(trans, b, linked) linked->l[b->c.level].lock_seq += 2; six_unlock_write(&b->c.lock); } -void bch2_btree_node_unlock_write(struct btree *, struct btree_iter *); +void bch2_btree_node_unlock_write(struct btree_trans *, + struct btree_path *, struct btree *); -void __bch2_btree_node_lock_write(struct btree *, struct btree_iter *); +void __bch2_btree_node_lock_write(struct btree_trans *, struct btree *); -static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) +static inline void bch2_btree_node_lock_write(struct btree_trans *trans, + struct btree_path *path, + struct btree *b) { - EBUG_ON(iter->l[b->c.level].b != b); - EBUG_ON(iter->l[b->c.level].lock_seq != b->c.lock.state.seq); + EBUG_ON(path->l[b->c.level].b != b); + EBUG_ON(path->l[b->c.level].lock_seq != b->c.lock.state.seq); + EBUG_ON(!btree_node_intent_locked(path, b->c.level)); if (unlikely(!six_trylock_write(&b->c.lock))) - __bch2_btree_node_lock_write(b, iter); + __bch2_btree_node_lock_write(trans, b); } #endif /* _BCACHEFS_BTREE_LOCKING_H */ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index a1e5deb..ccf91eb 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -176,52 +176,44 @@ struct btree_node_iter { } data[MAX_BSETS]; }; -enum btree_iter_type { - BTREE_ITER_KEYS, - BTREE_ITER_NODES, - BTREE_ITER_CACHED, -}; - -#define BTREE_ITER_TYPE ((1 << 2) - 1) - /* * Iterate over all possible positions, synthesizing deleted keys for holes: */ -#define BTREE_ITER_SLOTS (1 << 2) +#define BTREE_ITER_SLOTS (1 << 0) /* * Indicates that intent locks should be taken on leaf nodes, because we expect * to be doing updates: */ -#define BTREE_ITER_INTENT (1 << 3) +#define BTREE_ITER_INTENT (1 << 1) /* * Causes the btree iterator code to prefetch additional btree nodes from disk: */ -#define BTREE_ITER_PREFETCH (1 << 4) +#define BTREE_ITER_PREFETCH (1 << 2) /* * Indicates that this iterator should not be reused until transaction commit, * either because a pending update references it or because the update depends * on that particular key being locked (e.g. by the str_hash code, for hash * table consistency) */ -#define BTREE_ITER_KEEP_UNTIL_COMMIT (1 << 5) +#define BTREE_ITER_KEEP_UNTIL_COMMIT (1 << 3) /* * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for * @pos or the first key strictly greater than @pos */ -#define BTREE_ITER_IS_EXTENTS (1 << 6) -#define BTREE_ITER_NOT_EXTENTS (1 << 7) -#define BTREE_ITER_ERROR (1 << 8) -#define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 9) -#define BTREE_ITER_CACHED_NOFILL (1 << 10) -#define BTREE_ITER_CACHED_NOCREATE (1 << 11) -#define BTREE_ITER_WITH_UPDATES (1 << 12) -#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13) - -enum btree_iter_uptodate { +#define BTREE_ITER_IS_EXTENTS (1 << 4) +#define BTREE_ITER_NOT_EXTENTS (1 << 5) +#define BTREE_ITER_ERROR (1 << 6) +#define BTREE_ITER_CACHED (1 << 7) +#define BTREE_ITER_CACHED_NOFILL (1 << 8) +#define BTREE_ITER_CACHED_NOCREATE (1 << 9) +#define BTREE_ITER_WITH_UPDATES (1 << 10) +#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11) +#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12) + +enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, - BTREE_ITER_NEED_PEEK = 1, - BTREE_ITER_NEED_RELOCK = 2, - BTREE_ITER_NEED_TRAVERSE = 3, + BTREE_ITER_NEED_RELOCK = 1, + BTREE_ITER_NEED_TRAVERSE = 2, }; #define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1) @@ -233,51 +225,67 @@ enum btree_iter_uptodate { #define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7) #define BTREE_ITER_NO_NODE_CACHED ((struct btree *) 8) -/* - * @pos - iterator's current position - * @level - current btree depth - * @locks_want - btree level below which we start taking intent locks - * @nodes_locked - bitmask indicating which nodes in @nodes are locked - * @nodes_intent_locked - bitmask indicating which locks are intent locks - */ -struct btree_iter { - struct btree_trans *trans; - unsigned long ip_allocated; - +struct btree_path { u8 idx; - u8 child_idx; u8 sorted_idx; + u8 ref; + u8 intent_ref; /* btree_iter_copy starts here: */ - u16 flags; - - /* When we're filtering by snapshot, the snapshot ID we're looking for: */ - unsigned snapshot; - struct bpos pos; - struct bpos real_pos; - struct bpos pos_after_commit; enum btree_id btree_id:4; - enum btree_iter_uptodate uptodate:3; + bool cached:1; + bool preserve:1; + enum btree_path_uptodate uptodate:2; /* - * True if we've returned a key (and thus are expected to keep it - * locked), false after set_pos - for avoiding spurious transaction - * restarts in bch2_trans_relock(): + * When true, failing to relock this path will cause the transaction to + * restart: */ bool should_be_locked:1; - unsigned level:4, - min_depth:4, + unsigned level:3, locks_want:4, nodes_locked:4, nodes_intent_locked:4; - struct btree_iter_level { + struct btree_path_level { struct btree *b; struct btree_node_iter iter; u32 lock_seq; } l[BTREE_MAX_DEPTH]; +#ifdef CONFIG_BCACHEFS_DEBUG + unsigned long ip_allocated; + unsigned long ip_locked; +#endif +}; +static inline struct btree_path_level *path_l(struct btree_path *path) +{ + return path->l + path->level; +} + +/* + * @pos - iterator's current position + * @level - current btree depth + * @locks_want - btree level below which we start taking intent locks + * @nodes_locked - bitmask indicating which nodes in @nodes are locked + * @nodes_intent_locked - bitmask indicating which locks are intent locks + */ +struct btree_iter { + struct btree_trans *trans; + struct btree_path *path; + + enum btree_id btree_id:4; + unsigned min_depth:4; + + /* btree_iter_copy starts here: */ + u16 flags; + + /* When we're filtering by snapshot, the snapshot ID we're looking for: */ + unsigned snapshot; + + struct bpos pos; + struct bpos pos_after_commit; /* * Current unpacked key - so that bch2_btree_iter_next()/ * bch2_btree_iter_next_slot() can correctly advance pos. @@ -285,22 +293,6 @@ struct btree_iter { struct bkey k; }; -static inline enum btree_iter_type -btree_iter_type(const struct btree_iter *iter) -{ - return iter->flags & BTREE_ITER_TYPE; -} - -static inline bool btree_iter_is_cached(const struct btree_iter *iter) -{ - return btree_iter_type(iter) == BTREE_ITER_CACHED; -} - -static inline struct btree_iter_level *iter_l(struct btree_iter *iter) -{ - return iter->l + iter->level; -} - struct btree_key_cache { struct mutex lock; struct rhashtable table; @@ -345,9 +337,11 @@ struct btree_insert_entry { u8 bkey_type; enum btree_id btree_id:8; u8 level; - unsigned trans_triggers_run:1; + bool cached:1; + bool trans_triggers_run:1; struct bkey_i *k; - struct btree_iter *iter; + struct btree_path *path; + unsigned long ip_allocated; }; #ifndef CONFIG_LOCKDEP @@ -371,10 +365,11 @@ struct btree_trans { #ifdef CONFIG_BCACHEFS_DEBUG struct list_head list; struct btree *locking; - unsigned locking_iter_idx; + unsigned locking_path_idx; struct bpos locking_pos; u8 locking_btree_id; u8 locking_level; + u8 traverse_all_idx; pid_t pid; #endif unsigned long ip; @@ -392,16 +387,14 @@ struct btree_trans { */ unsigned extra_journal_res; - u64 iters_linked; - u64 iters_live; - u64 iters_touched; + u64 paths_allocated; unsigned mem_top; unsigned mem_bytes; void *mem; - u8 *sorted; - struct btree_iter *iters; + u8 sorted[BTREE_ITER_MAX]; + struct btree_path *paths; struct btree_insert_entry *updates; /* update path: */ @@ -605,16 +598,6 @@ static inline bool btree_node_is_extents(struct btree *b) return btree_node_type_is_extents(btree_node_type(b)); } -static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter) -{ - return __btree_node_type(iter->level, iter->btree_id); -} - -static inline bool btree_iter_is_extents(struct btree_iter *iter) -{ - return btree_node_type_is_extents(btree_iter_key_type(iter)); -} - #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ ((1U << BKEY_TYPE_extents)| \ (1U << BKEY_TYPE_inodes)| \ diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 217b52e..23b73d3 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -8,10 +8,11 @@ struct bch_fs; struct btree; -void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_iter *, +void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_path *, struct btree *); -bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *, - struct btree_node_iter *, struct bkey_i *); +bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *, + struct btree *, struct btree_node_iter *, + struct bkey_i *); void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); enum btree_insert_flags { @@ -134,4 +135,21 @@ static inline int bch2_trans_commit(struct btree_trans *trans, (_i) < (_trans)->updates + (_trans)->nr_updates; \ (_i)++) +static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans, + enum btree_id btree_id, + struct bpos pos) +{ + struct btree_insert_entry *i; + + trans_for_each_update(trans, i) + if ((cmp_int(btree_id, i->btree_id) ?: + bpos_cmp(pos, i->k->k.p)) <= 0) { + if (btree_id == i->btree_id) + return i->k; + break; + } + + return NULL; +} + #endif /* _BCACHEFS_BTREE_UPDATE_H */ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index c8c3382..5a1420b 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -23,8 +23,9 @@ #include static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *, - struct btree_iter *, struct btree *, + struct btree_path *, struct btree *, struct keylist *, unsigned); +static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); /* Debug code: */ @@ -152,38 +153,25 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b) clear_btree_node_noevict(b); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_lock(&c->btree_cache.lock); list_move(&b->list, &c->btree_cache.freeable); mutex_unlock(&c->btree_cache.lock); } -void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b) +static void bch2_btree_node_free_inmem(struct btree_trans *trans, + struct btree *b) { - struct open_buckets ob = b->ob; + struct bch_fs *c = trans->c; + struct btree_path *path; - b->ob.nr = 0; + trans_for_each_path(trans, path) + BUG_ON(path->l[b->c.level].b == b); - clear_btree_node_dirty(c, b); + six_lock_write(&b->c.lock, NULL, NULL); - btree_node_lock_type(c, b, SIX_LOCK_write); + bch2_btree_node_hash_remove(&c->btree_cache, b); __btree_node_free(c, b); - six_unlock_write(&b->c.lock); - bch2_open_buckets_put(c, &ob); -} - -void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, - struct btree_iter *iter) -{ - struct btree_iter *linked; - - trans_for_each_iter(iter->trans, linked) - BUG_ON(linked->l[b->c.level].b == b); - - six_lock_write(&b->c.lock, NULL, NULL); - __btree_node_free(c, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } @@ -773,7 +761,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) * And it adds @b to the list of @as's new nodes, so that we can update sector * counts in bch2_btree_update_nodes_written: */ -void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b) +static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; @@ -827,7 +815,7 @@ found: closure_put(&as->cl); } -void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b) +static void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b) { while (b->ob.nr) as->open_buckets[as->nr_open_buckets++] = @@ -839,7 +827,7 @@ void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b * nodes and thus outstanding btree_updates - redirect @b's * btree_updates to point to this btree_update: */ -void bch2_btree_interior_update_will_free_node(struct btree_update *as, +static void bch2_btree_interior_update_will_free_node(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; @@ -911,7 +899,7 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, as->nr_old_nodes++; } -void bch2_btree_update_done(struct btree_update *as) +static void bch2_btree_update_done(struct btree_update *as) { BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); @@ -925,11 +913,10 @@ void bch2_btree_update_done(struct btree_update *as) as->c->btree_interior_update_worker); } -struct btree_update * -bch2_btree_update_start(struct btree_iter *iter, unsigned level, - unsigned nr_nodes, unsigned flags) +static struct btree_update * +bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, + unsigned level, unsigned nr_nodes, unsigned flags) { - struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; struct btree_update *as; struct closure cl; @@ -938,7 +925,7 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level, int journal_flags = 0; int ret = 0; - BUG_ON(!iter->should_be_locked); + BUG_ON(!path->should_be_locked); if (flags & BTREE_INSERT_JOURNAL_RESERVED) journal_flags |= JOURNAL_RES_GET_RESERVED; @@ -950,11 +937,11 @@ retry: * XXX: figure out how far we might need to split, * instead of locking/reserving all the way to the root: */ - if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { + if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_, - iter->btree_id, - &iter->real_pos); - return ERR_PTR(-EINTR); + path->btree_id, &path->pos); + ret = btree_trans_restart(trans); + return ERR_PTR(ret); } if (flags & BTREE_INSERT_GC_LOCK_HELD) @@ -974,7 +961,7 @@ retry: as->c = c; as->mode = BTREE_INTERIOR_NO_UPDATE; as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); - as->btree_id = iter->btree_id; + as->btree_id = path->btree_id; INIT_LIST_HEAD(&as->list); INIT_LIST_HEAD(&as->unwritten_list); INIT_LIST_HEAD(&as->write_blocked_list); @@ -1092,8 +1079,10 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) * is nothing new to be done. This just guarantees that there is a * journal write. */ -static void bch2_btree_set_root(struct btree_update *as, struct btree *b, - struct btree_iter *iter) +static void bch2_btree_set_root(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b) { struct bch_fs *c = as->c; struct btree *old; @@ -1108,7 +1097,7 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *b, * Ensure no one is using the old root while we switch to the * new root: */ - bch2_btree_node_lock_write(old, iter); + bch2_btree_node_lock_write(trans, path, old); bch2_btree_set_root_inmem(c, b); @@ -1121,15 +1110,17 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *b, * an intent lock on the new root, and any updates that would * depend on the new root would have to update the new root. */ - bch2_btree_node_unlock_write(old, iter); + bch2_btree_node_unlock_write(trans, path, old); } /* Interior node updates: */ -static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b, - struct btree_iter *iter, - struct bkey_i *insert, - struct btree_node_iter *node_iter) +static void bch2_insert_fixup_btree_ptr(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct btree_node_iter *node_iter, + struct bkey_i *insert) { struct bch_fs *c = as->c; struct bkey_packed *k; @@ -1161,15 +1152,18 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b bkey_iter_pos_cmp(b, k, &insert->k.p) < 0) bch2_btree_node_iter_advance(node_iter, b); - bch2_btree_bset_insert_key(iter, b, node_iter, insert); + bch2_btree_bset_insert_key(trans, path, b, node_iter, insert); set_btree_node_dirty(c, b); set_btree_node_need_write(b); } static void -__bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys, - struct btree_node_iter node_iter) +__bch2_btree_insert_keys_interior(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct btree_node_iter node_iter, + struct keylist *keys) { struct bkey_i *insert = bch2_keylist_front(keys); struct bkey_packed *k; @@ -1181,8 +1175,8 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, ; while (!bch2_keylist_empty(keys)) { - bch2_insert_fixup_btree_ptr(as, b, iter, - bch2_keylist_front(keys), &node_iter); + bch2_insert_fixup_btree_ptr(as, trans, path, b, + &node_iter, bch2_keylist_front(keys)); bch2_keylist_pop_front(keys); } } @@ -1192,8 +1186,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, * node) */ static struct btree *__btree_split_node(struct btree_update *as, - struct btree *n1, - struct btree_iter *iter) + struct btree *n1) { struct bkey_format_state s; size_t nr_packed = 0, nr_unpacked = 0; @@ -1308,8 +1301,10 @@ static struct btree *__btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, struct btree *b, - struct btree_iter *iter, +static void btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, struct keylist *keys) { struct btree_node_iter node_iter; @@ -1319,7 +1314,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, bch2_btree_node_iter_init(&node_iter, b, &k->k.p); - __bch2_btree_insert_keys_interior(as, b, iter, keys, node_iter); + __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); /* * We can't tolerate whiteouts here - with whiteouts there can be @@ -1349,18 +1344,17 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, btree_node_interior_verify(as->c, b); } -static void btree_split(struct btree_update *as, - struct btree_trans *trans, struct btree_iter *iter, - struct btree *b, struct keylist *keys, - unsigned flags) +static void btree_split(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; - struct btree *parent = btree_node_parent(iter, b); + struct btree *parent = btree_node_parent(path, b); struct btree *n1, *n2 = NULL, *n3 = NULL; u64 start_time = local_clock(); BUG_ON(!parent && (b != btree_node_root(c, b))); - BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level)); + BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); bch2_btree_interior_update_will_free_node(as, b); @@ -1368,12 +1362,12 @@ static void btree_split(struct btree_update *as, bch2_btree_update_add_new_node(as, n1); if (keys) - btree_split_insert_keys(as, n1, iter, keys); + btree_split_insert_keys(as, trans, path, n1, keys); if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) { trace_btree_split(c, b); - n2 = __btree_split_node(as, n1, iter); + n2 = __btree_split_node(as, n1); bch2_btree_build_aux_trees(n2); bch2_btree_build_aux_trees(n1); @@ -1398,7 +1392,7 @@ static void btree_split(struct btree_update *as, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, n3, iter, &as->parent_keys); + btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); bch2_btree_node_write(c, n3, SIX_LOCK_intent); } @@ -1418,12 +1412,12 @@ static void btree_split(struct btree_update *as, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); + bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); } else if (n3) { - bch2_btree_set_root(as, n3, iter); + bch2_btree_set_root(as, trans, path, n3); } else { /* Root filled up but didn't need to be split */ - bch2_btree_set_root(as, n1, iter); + bch2_btree_set_root(as, trans, path, n1); } bch2_btree_update_get_open_buckets(as, n1); @@ -1432,15 +1426,14 @@ static void btree_split(struct btree_update *as, if (n3) bch2_btree_update_get_open_buckets(as, n3); - /* Successful split, update the iterator to point to the new nodes: */ + /* Successful split, update the path to point to the new nodes: */ six_lock_increment(&b->c.lock, SIX_LOCK_intent); - bch2_btree_iter_node_drop(iter, b); if (n3) - bch2_btree_iter_node_replace(iter, n3); + bch2_trans_node_add(trans, n3); if (n2) - bch2_btree_iter_node_replace(iter, n2); - bch2_btree_iter_node_replace(iter, n1); + bch2_trans_node_add(trans, n2); + bch2_trans_node_add(trans, n1); /* * The old node must be freed (in memory) _before_ unlocking the new @@ -1448,7 +1441,7 @@ static void btree_split(struct btree_update *as, * node after another thread has locked and updated the new node, thus * seeing stale data: */ - bch2_btree_node_free_inmem(c, b, iter); + bch2_btree_node_free_inmem(trans, b); if (n3) six_unlock_intent(&n3->c.lock); @@ -1456,26 +1449,30 @@ static void btree_split(struct btree_update *as, six_unlock_intent(&n2->c.lock); six_unlock_intent(&n1->c.lock); - bch2_btree_trans_verify_locks(trans); + bch2_trans_verify_locks(trans); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split], start_time); } static void -bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys) +bch2_btree_insert_keys_interior(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct keylist *keys) { - struct btree_iter *linked; + struct btree_path *linked; - __bch2_btree_insert_keys_interior(as, b, iter, keys, iter->l[b->c.level].iter); + __bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); btree_update_updated_node(as, b); - trans_for_each_iter_with_node(iter->trans, b, linked) + trans_for_each_path_with_node(trans, b, linked) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); - bch2_btree_trans_verify_iters(iter->trans, b); + bch2_trans_verify_paths(trans); } /** @@ -1490,10 +1487,9 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, * If a split occurred, this function will return early. This can only happen * for leaf nodes -- inserts into interior nodes have to be atomic. */ -static void bch2_btree_insert_node(struct btree_update *as, - struct btree_trans *trans, struct btree_iter *iter, - struct btree *b, struct keylist *keys, - unsigned flags) +static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); @@ -1501,21 +1497,21 @@ static void bch2_btree_insert_node(struct btree_update *as, int live_u64s_added, u64s_added; lockdep_assert_held(&c->gc_lock); - BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level)); + BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); BUG_ON(!b->c.level); BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - bch2_btree_node_lock_for_insert(trans, iter, b); + bch2_btree_node_lock_for_insert(trans, path, b); if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { - bch2_btree_node_unlock_write(b, iter); + bch2_btree_node_unlock_write(trans, path, b); goto split; } btree_node_interior_verify(c, b); - bch2_btree_insert_keys_interior(as, b, iter, keys); + bch2_btree_insert_keys_interior(as, trans, path, b, keys); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s; @@ -1527,48 +1523,48 @@ static void bch2_btree_insert_node(struct btree_update *as, if (u64s_added > live_u64s_added && bch2_maybe_compact_whiteouts(c, b)) - bch2_btree_iter_reinit_node(iter, b); + bch2_trans_node_reinit_iter(trans, b); - bch2_btree_node_unlock_write(b, iter); + bch2_btree_node_unlock_write(trans, path, b); btree_node_interior_verify(c, b); return; split: - btree_split(as, trans, iter, b, keys, flags); + btree_split(as, trans, path, b, keys, flags); } int bch2_btree_split_leaf(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned flags) { struct bch_fs *c = trans->c; - struct btree *b = iter_l(iter)->b; + struct btree *b = path_l(path)->b; struct btree_update *as; unsigned l; int ret = 0; - as = bch2_btree_update_start(iter, iter->level, + as = bch2_btree_update_start(trans, path, path->level, btree_update_reserve_required(c, b), flags); if (IS_ERR(as)) return PTR_ERR(as); - btree_split(as, trans, iter, b, NULL, flags); + btree_split(as, trans, path, b, NULL, flags); bch2_btree_update_done(as); - for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++) - ret = bch2_foreground_maybe_merge(trans, iter, l, flags); + for (l = path->level + 1; btree_path_node(path, l) && !ret; l++) + ret = bch2_foreground_maybe_merge(trans, path, l, flags); return ret; } int __bch2_foreground_maybe_merge(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned level, unsigned flags, enum btree_node_sibling sib) { struct bch_fs *c = trans->c; - struct btree_iter *sib_iter = NULL; + struct btree_path *sib_path = NULL; struct btree_update *as; struct bkey_format_state new_s; struct bkey_format new_f; @@ -1576,39 +1572,35 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, struct btree *b, *m, *n, *prev, *next, *parent; struct bpos sib_pos; size_t sib_u64s; - int ret = 0, ret2 = 0; - -retry: - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; + int ret = 0; - BUG_ON(!iter->should_be_locked); - BUG_ON(!btree_node_locked(iter, level)); + BUG_ON(!path->should_be_locked); + BUG_ON(!btree_node_locked(path, level)); - b = iter->l[level].b; + b = path->l[level].b; if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) || (sib == btree_next_sib && !bpos_cmp(b->data->max_key, SPOS_MAX))) { b->sib_u64s[sib] = U16_MAX; - goto out; + return 0; } sib_pos = sib == btree_prev_sib ? bpos_predecessor(b->data->min_key) : bpos_successor(b->data->max_key); - sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id, - sib_pos, U8_MAX, level, - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(sib_iter); + sib_path = bch2_path_get(trans, false, path->btree_id, + sib_pos, U8_MAX, level, true); + ret = bch2_btree_path_traverse(trans, sib_path, false); if (ret) goto err; - m = sib_iter->l[level].b; + sib_path->should_be_locked = true; - if (btree_node_parent(iter, b) != - btree_node_parent(sib_iter, m)) { + m = sib_path->l[level].b; + + if (btree_node_parent(path, b) != + btree_node_parent(sib_path, m)) { b->sib_u64s[sib] = U16_MAX; goto out; } @@ -1659,8 +1651,8 @@ retry: if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) goto out; - parent = btree_node_parent(iter, b); - as = bch2_btree_update_start(iter, level, + parent = btree_node_parent(path, b); + as = bch2_btree_update_start(trans, path, level, btree_update_reserve_required(c, parent) + 1, flags| BTREE_INSERT_NOFAIL| @@ -1696,47 +1688,32 @@ retry: bch2_keylist_add(&as->parent_keys, &delete); bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); + bch2_trans_verify_paths(trans); + + bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + + bch2_trans_verify_paths(trans); bch2_btree_update_get_open_buckets(as, n); six_lock_increment(&b->c.lock, SIX_LOCK_intent); six_lock_increment(&m->c.lock, SIX_LOCK_intent); - bch2_btree_iter_node_drop(iter, b); - bch2_btree_iter_node_drop(iter, m); - bch2_btree_iter_node_replace(iter, n); + bch2_trans_node_add(trans, n); - bch2_btree_trans_verify_iters(trans, n); + bch2_trans_verify_paths(trans); - bch2_btree_node_free_inmem(c, b, iter); - bch2_btree_node_free_inmem(c, m, iter); + bch2_btree_node_free_inmem(trans, b); + bch2_btree_node_free_inmem(trans, m); six_unlock_intent(&n->c.lock); bch2_btree_update_done(as); out: - bch2_btree_trans_verify_locks(trans); - bch2_trans_iter_free(trans, sib_iter); - - /* - * Don't downgrade locks here: we're called after successful insert, - * and the caller will downgrade locks after a successful insert - * anyways (in case e.g. a split was required first) - * - * And we're also called when inserting into interior nodes in the - * split path, and downgrading to read locks in there is potentially - * confusing: - */ - return ret ?: ret2; err: - bch2_trans_iter_put(trans, sib_iter); - sib_iter = NULL; - - if (ret == -EINTR && bch2_trans_relock(trans)) - goto retry; - - goto out; + bch2_path_put(trans, sib_path, true); + bch2_trans_verify_locks(trans); + return ret; } /** @@ -1761,8 +1738,8 @@ retry: if (!b || b->data->keys.seq != seq) goto out; - parent = btree_node_parent(iter, b); - as = bch2_btree_update_start(iter, b->c.level, + parent = btree_node_parent(iter->path, b); + as = bch2_btree_update_start(trans, iter->path, b->c.level, (parent ? btree_update_reserve_required(c, parent) : 0) + 1, @@ -1789,23 +1766,22 @@ retry: if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter, parent, + bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys, flags); } else { - bch2_btree_set_root(as, n, iter); + bch2_btree_set_root(as, trans, iter->path, n); } bch2_btree_update_get_open_buckets(as, n); six_lock_increment(&b->c.lock, SIX_LOCK_intent); - bch2_btree_iter_node_drop(iter, b); - bch2_btree_iter_node_replace(iter, n); - bch2_btree_node_free_inmem(c, b, iter); + bch2_trans_node_add(trans, n); + bch2_btree_node_free_inmem(trans, b); six_unlock_intent(&n->c.lock); bch2_btree_update_done(as); out: - bch2_btree_iter_downgrade(iter); + bch2_btree_path_downgrade(iter->path); return ret; } @@ -1824,13 +1800,13 @@ void async_btree_node_rewrite_work(struct work_struct *work) container_of(work, struct async_btree_rewrite, work); struct bch_fs *c = a->c; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_node_iter(&trans, a->btree_id, a->pos, + bch2_trans_node_iter_init(&trans, &iter, a->btree_id, a->pos, BTREE_MAX_DEPTH, a->level, 0); - bch2_btree_node_rewrite(&trans, iter, a->seq, 0); - bch2_trans_iter_put(&trans, iter); + bch2_btree_node_rewrite(&trans, &iter, a->seq, 0); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); percpu_ref_put(&c->writes); kfree(a); @@ -1869,7 +1845,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bool skip_triggers) { struct bch_fs *c = trans->c; - struct btree_iter *iter2 = NULL; + struct btree_iter iter2 = { NULL }; struct btree *parent; u64 journal_entries[BKEY_BTREE_PTR_U64s_MAX]; int ret; @@ -1897,19 +1873,22 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BUG_ON(ret); } - parent = btree_node_parent(iter, b); + parent = btree_node_parent(iter->path, b); if (parent) { - iter2 = bch2_trans_copy_iter(trans, iter); + bch2_trans_copy_iter(&iter2, iter); - BUG_ON(iter2->level != b->c.level); - BUG_ON(bpos_cmp(iter2->pos, new_key->k.p)); + iter2.path = bch2_btree_path_make_mut(trans, iter2.path, + iter2.flags & BTREE_ITER_INTENT); - btree_node_unlock(iter2, iter2->level); - iter2->l[iter2->level].b = BTREE_ITER_NO_NODE_UP; - iter2->level++; + BUG_ON(iter2.path->level != b->c.level); + BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p)); - ret = bch2_btree_iter_traverse(iter2) ?: - bch2_trans_update(trans, iter2, new_key, BTREE_TRIGGER_NORUN); + btree_node_unlock(iter2.path, iter2.path->level); + path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP; + iter2.path->level++; + + ret = bch2_btree_iter_traverse(&iter2) ?: + bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN); if (ret) goto err; } else { @@ -1931,7 +1910,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, if (ret) goto err; - bch2_btree_node_lock_write(b, iter); + bch2_btree_node_lock_write(trans, iter->path, b); if (new_hash) { mutex_lock(&c->btree_cache.lock); @@ -1946,9 +1925,9 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bkey_copy(&b->key, new_key); } - bch2_btree_node_unlock_write(b, iter); + bch2_btree_node_unlock_write(trans, iter->path, b); out: - bch2_trans_iter_put(trans, iter2); + bch2_trans_iter_exit(trans, &iter2); return ret; err: if (new_hash) { @@ -2006,18 +1985,18 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, struct btree *b, struct bkey_i *new_key, bool skip_triggers) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_node_iter(trans, b->c.btree_id, b->key.k.p, - BTREE_MAX_DEPTH, b->c.level, - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter); + bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p, + BTREE_MAX_DEPTH, b->c.level, + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter); if (ret) goto out; /* has node been freed? */ - if (iter->l[b->c.level].b != b) { + if (iter.path->l[b->c.level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); goto out; @@ -2025,9 +2004,9 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, BUG_ON(!btree_node_hashed(b)); - ret = bch2_btree_node_update_key(trans, iter, b, new_key, skip_triggers); + ret = bch2_btree_node_update_key(trans, &iter, b, new_key, skip_triggers); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index e88e737..8e03bd9 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -113,57 +113,39 @@ struct btree_update { u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3]; }; -void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *, - struct btree_iter *); -void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *); - -void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *); - struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, struct btree *, struct bkey_format); -void bch2_btree_update_done(struct btree_update *); -struct btree_update * -bch2_btree_update_start(struct btree_iter *, unsigned, unsigned, unsigned); - -void bch2_btree_interior_update_will_free_node(struct btree_update *, - struct btree *); -void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); - -int bch2_btree_split_leaf(struct btree_trans *, struct btree_iter *, unsigned); +int bch2_btree_split_leaf(struct btree_trans *, struct btree_path *, unsigned); -int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_iter *, +int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_path *, unsigned, unsigned, enum btree_node_sibling); static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned level, unsigned flags, enum btree_node_sibling sib) { struct btree *b; - if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE) - return 0; - - if (!bch2_btree_node_relock(iter, level)) - return 0; + EBUG_ON(!btree_node_locked(path, level)); - b = iter->l[level].b; + b = path->l[level].b; if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold) return 0; - return __bch2_foreground_maybe_merge(trans, iter, level, flags, sib); + return __bch2_foreground_maybe_merge(trans, path, level, flags, sib); } static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned level, unsigned flags) { - return bch2_foreground_maybe_merge_sibling(trans, iter, level, flags, + return bch2_foreground_maybe_merge_sibling(trans, path, level, flags, btree_prev_sib) ?: - bch2_foreground_maybe_merge_sibling(trans, iter, level, flags, + bch2_foreground_maybe_merge_sibling(trans, path, level, flags, btree_next_sib); } diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 7e9909e..9c8c5ca 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -29,40 +29,59 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, bpos_cmp(l->k->k.p, r->k->k.p); } +static inline struct btree_path_level *insert_l(struct btree_insert_entry *i) +{ + return i->path->l + i->level; +} + static inline bool same_leaf_as_prev(struct btree_trans *trans, struct btree_insert_entry *i) { return i != trans->updates && - iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b; + insert_l(&i[0])->b == insert_l(&i[-1])->b; +} + +static inline bool same_leaf_as_next(struct btree_trans *trans, + struct btree_insert_entry *i) +{ + return i + 1 < trans->updates + trans->nr_updates && + insert_l(&i[0])->b == insert_l(&i[1])->b; } -inline void bch2_btree_node_lock_for_insert(struct btree_trans *trans, - struct btree_iter *iter, - struct btree *b) +static inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, + struct btree_path *path, + struct btree *b) { struct bch_fs *c = trans->c; - bch2_btree_node_lock_write(b, iter); - - if (btree_iter_type(iter) == BTREE_ITER_CACHED) + if (path->cached) return; if (unlikely(btree_node_just_written(b)) && bch2_btree_post_write_cleanup(c, b)) - bch2_btree_iter_reinit_node(iter, b); + bch2_trans_node_reinit_iter(trans, b); /* * If the last bset has been written, or if it's gotten too big - start * a new bset to insert into: */ if (want_new_bset(c, b)) - bch2_btree_init_next(trans, iter, b); + bch2_btree_init_next(trans, b); +} + +void bch2_btree_node_lock_for_insert(struct btree_trans *trans, + struct btree_path *path, + struct btree *b) +{ + bch2_btree_node_lock_write(trans, path, b); + bch2_btree_node_prep_for_write(trans, path, b); } /* Inserting into a given leaf node (last stage of insert): */ /* Handle overwrites and do insert, for non extents: */ -bool bch2_btree_bset_insert_key(struct btree_iter *iter, +bool bch2_btree_bset_insert_key(struct btree_trans *trans, + struct btree_path *path, struct btree *b, struct btree_node_iter *node_iter, struct bkey_i *insert) @@ -76,8 +95,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, EBUG_ON(bpos_cmp(insert->k.p, b->data->min_key) < 0); EBUG_ON(bpos_cmp(insert->k.p, b->data->max_key) > 0); EBUG_ON(insert->k.u64s > - bch_btree_keys_u64s_remaining(iter->trans->c, b)); - EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); + bch_btree_keys_u64s_remaining(trans->c, b)); k = bch2_btree_node_iter_peek_all(node_iter, b); if (k && bkey_cmp_left_packed(b, k, &insert->k.p)) @@ -96,7 +114,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, k->type = KEY_TYPE_deleted; if (k->needs_whiteout) - push_whiteout(iter->trans->c, b, insert->k.p); + push_whiteout(trans->c, b, insert->k.p); k->needs_whiteout = false; if (k >= btree_bset_last(b)->start) { @@ -104,7 +122,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, bch2_bset_delete(b, k, clobber_u64s); goto fix_iter; } else { - bch2_btree_iter_fix_key_modified(iter, b, k); + bch2_btree_path_fix_key_modified(trans, b, k); } return true; @@ -122,7 +140,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, clobber_u64s = k->u64s; goto overwrite; } else { - bch2_btree_iter_fix_key_modified(iter, b, k); + bch2_btree_path_fix_key_modified(trans, b, k); } } @@ -132,7 +150,7 @@ overwrite: new_u64s = k->u64s; fix_iter: if (clobber_u64s != new_u64s) - bch2_btree_node_iter_fix(iter, b, node_iter, k, + bch2_btree_node_iter_fix(trans, path, b, node_iter, k, clobber_u64s, new_u64s); return true; } @@ -176,22 +194,21 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c, * btree_insert_key - insert a key one key into a leaf node */ static bool btree_insert_key_leaf(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert) + struct btree_insert_entry *insert) { struct bch_fs *c = trans->c; - struct btree *b = iter_l(iter)->b; + struct btree *b = insert_l(insert)->b; struct bset_tree *t = bset_tree_last(b); struct bset *i = bset(b, t); int old_u64s = bset_u64s(t); int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; - EBUG_ON(!iter->level && + EBUG_ON(!insert->level && !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags)); - if (unlikely(!bch2_btree_bset_insert_key(iter, b, - &iter_l(iter)->iter, insert))) + if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b, + &insert_l(insert)->iter, insert->k))) return false; i->journal_seq = cpu_to_le64(max(trans->journal_res.seq, @@ -212,9 +229,8 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, if (u64s_added > live_u64s_added && bch2_maybe_compact_whiteouts(c, b)) - bch2_btree_iter_reinit_node(iter, b); + bch2_trans_node_reinit_iter(trans, b); - trace_btree_insert_key(c, b, insert); return true; } @@ -225,9 +241,10 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, static inline void btree_insert_entry_checks(struct btree_trans *trans, struct btree_insert_entry *i) { - BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos)); - BUG_ON(i->level != i->iter->level); - BUG_ON(i->btree_id != i->iter->btree_id); + BUG_ON(bpos_cmp(i->k->k.p, i->path->pos)); + BUG_ON(i->cached != i->path->cached); + BUG_ON(i->level != i->path->level); + BUG_ON(i->btree_id != i->path->btree_id); } static noinline int @@ -267,13 +284,12 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans, return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret; } -static enum btree_insert_ret +static inline enum btree_insert_ret btree_key_can_insert(struct btree_trans *trans, - struct btree_iter *iter, + struct btree *b, unsigned u64s) { struct bch_fs *c = trans->c; - struct btree *b = iter_l(iter)->b; if (!bch2_btree_node_insert_fits(c, b, u64s)) return BTREE_INSERT_BTREE_NODE_FULL; @@ -283,14 +299,14 @@ btree_key_can_insert(struct btree_trans *trans, static enum btree_insert_ret btree_key_can_insert_cached(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned u64s) { - struct bkey_cached *ck = (void *) iter->l[0].b; + struct bkey_cached *ck = (void *) path->l[0].b; unsigned new_u64s; struct bkey_i *new_k; - BUG_ON(iter->level); + EBUG_ON(path->level); if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && bch2_btree_key_cache_must_wait(trans->c) && @@ -328,9 +344,9 @@ static inline void do_btree_insert_one(struct btree_trans *trans, i->k->k.needs_whiteout = false; - did_work = (btree_iter_type(i->iter) != BTREE_ITER_CACHED) - ? btree_insert_key_leaf(trans, i->iter, i->k) - : bch2_btree_insert_key_cached(trans, i->iter, i->k); + did_work = !i->cached + ? btree_insert_key_leaf(trans, i) + : bch2_btree_insert_key_cached(trans, i->path, i->k); if (!did_work) return; @@ -356,11 +372,12 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans) trans_for_each_update(trans, i) { /* * XXX: synchronization of cached update triggers with gc + * XXX: synchronization of interior node updates with gc */ - BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED); + BUG_ON(i->cached || i->level); - if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) - bch2_mark_update(trans, i->iter, i->k, + if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) + bch2_mark_update(trans, i->path, i->k, i->flags|BTREE_TRIGGER_GC); } } @@ -405,9 +422,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, u64s = 0; u64s += i->k->k.u64s; - ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED - ? btree_key_can_insert(trans, i->iter, u64s) - : btree_key_can_insert_cached(trans, i->iter, u64s); + ret = !i->cached + ? btree_key_can_insert(trans, insert_l(i)->b, u64s) + : btree_key_can_insert_cached(trans, i->path, u64s); if (ret) { *stopped_at = i; return ret; @@ -466,8 +483,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, trans_for_each_update(trans, i) if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) - bch2_mark_update(trans, i->iter, i->k, - i->flags); + bch2_mark_update(trans, i->path, i->k, i->flags); if (marking && trans->fs_usage_deltas) bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas); @@ -485,42 +501,96 @@ err: return ret; } -static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree_iter *iter) +static inline void upgrade_readers(struct btree_trans *trans, struct btree_path *path) { - struct btree_insert_entry *i; - struct btree *b = iter_l(iter)->b; - struct bkey_s_c old; - int u64s_delta = 0; - int ret; + struct btree *b = path_l(path)->b; - /* - * Inserting directly into interior nodes is an uncommon operation with - * various weird edge cases: also, a lot of things about - * BTREE_ITER_NODES iters need to be audited - */ - if (unlikely(btree_iter_type(iter) != BTREE_ITER_KEYS)) - return 0; + do { + if (path->nodes_locked && + path->nodes_locked != path->nodes_intent_locked) + BUG_ON(!bch2_btree_path_upgrade(trans, path, path->level + 1)); + } while ((path = prev_btree_path(trans, path)) && + path_l(path)->b == b); +} + +/* + * Check for nodes that we have both read and intent locks on, and upgrade the + * readers to intent: + */ +static inline void normalize_read_intent_locks(struct btree_trans *trans) +{ + struct btree_path *path; + unsigned i, nr_read = 0, nr_intent = 0; + + trans_for_each_path_inorder(trans, path, i) { + struct btree_path *next = i + 1 < trans->nr_sorted + ? trans->paths + trans->sorted[i + 1] + : NULL; + + if (path->nodes_locked) { + if (path->nodes_intent_locked) + nr_intent++; + else + nr_read++; + } + + if (!next || path_l(path)->b != path_l(next)->b) { + if (nr_read && nr_intent) + upgrade_readers(trans, path); + + nr_read = nr_intent = 0; + } + } + + bch2_trans_verify_locks(trans); +} + +static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct btree_path *pos) +{ + struct btree_path *path; + unsigned i; - BUG_ON(iter->level); + trans_for_each_path_inorder(trans, path, i) { + //if (path == pos) + // break; + + if (path->nodes_locked != path->nodes_intent_locked) + return true; + } + + return false; +} + +static inline int trans_lock_write(struct btree_trans *trans) +{ + struct btree_insert_entry *i; trans_for_each_update(trans, i) { - if (iter_l(i->iter)->b != b) + if (same_leaf_as_prev(trans, i)) continue; - old = bch2_btree_iter_peek_slot(i->iter); - ret = bkey_err(old); - if (ret) - return ret; + if (!six_trylock_write(&insert_l(i)->b->c.lock)) { + if (have_conflicting_read_lock(trans, i->path)) + goto fail; - u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0; - u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0; + __btree_node_lock_type(trans->c, insert_l(i)->b, + SIX_LOCK_write); + } + + bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); } - if (u64s_delta > 0) - return 0; + return 0; +fail: + while (--i >= trans->updates) { + if (same_leaf_as_prev(trans, i)) + continue; + + bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b); + } - return bch2_foreground_maybe_merge(trans, iter, - iter->level, trans->flags); + trace_trans_restart_would_deadlock_write(trans->ip); + return btree_trans_restart(trans); } /* @@ -532,29 +602,55 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - struct btree_iter *iter; - int ret; + struct bkey_s_c old; + int ret, u64s_delta = 0; trans_for_each_update(trans, i) { - struct btree *b; + const char *invalid = bch2_bkey_invalid(c, + bkey_i_to_s_c(i->k), i->bkey_type); + if (invalid) { + char buf[200]; - BUG_ON(!btree_node_intent_locked(i->iter, i->level)); + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); + bch_err(c, "invalid bkey %s on insert from %ps -> %ps: %s\n", + buf, (void *) trans->ip, + (void *) i->ip_allocated, invalid); + bch2_fatal_error(c); + return -EINVAL; + } + btree_insert_entry_checks(trans, i); + } + + trans_for_each_update(trans, i) { + struct bkey u; - if (btree_iter_type(i->iter) == BTREE_ITER_CACHED) + /* + * peek_slot() doesn't yet work on iterators that point to + * interior nodes: + */ + if (i->cached || i->level) continue; - b = iter_l(i->iter)->b; - if (b->sib_u64s[0] < c->btree_foreground_merge_threshold || - b->sib_u64s[1] < c->btree_foreground_merge_threshold) { - ret = maybe_do_btree_merge(trans, i->iter); - if (unlikely(ret)) - return ret; + old = bch2_btree_path_peek_slot(i->path, &u); + ret = bkey_err(old); + if (unlikely(ret)) + return ret; + + u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0; + u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0; + + if (!same_leaf_as_next(trans, i)) { + if (u64s_delta <= 0) { + ret = bch2_foreground_maybe_merge(trans, i->path, + i->level, trans->flags); + if (unlikely(ret)) + return ret; + } + + u64s_delta = 0; } } - trans_for_each_update(trans, i) - BUG_ON(!btree_node_intent_locked(i->iter, i->level)); - ret = bch2_journal_preres_get(&c->journal, &trans->journal_preres, trans->journal_preres_u64s, JOURNAL_RES_GET_NONBLOCK| @@ -566,52 +662,18 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, if (unlikely(ret)) return ret; - /* - * Can't be holding any read locks when we go to take write locks: - * another thread could be holding an intent lock on the same node we - * have a read lock on, and it'll block trying to take a write lock - * (because we hold a read lock) and it could be blocking us by holding - * its own read lock (while we're trying to to take write locks). - * - * note - this must be done after bch2_trans_journal_preres_get_cold() - * or anything else that might call bch2_trans_relock(), since that - * would just retake the read locks: - */ - trans_for_each_iter(trans, iter) - if (iter->nodes_locked != iter->nodes_intent_locked && - !bch2_btree_iter_upgrade(iter, 1)) { - trace_trans_restart_upgrade(trans->ip, trace_ip, - iter->btree_id, - &iter->real_pos); - trans->restarted = true; - return -EINTR; - } - - trans_for_each_update(trans, i) { - const char *invalid = bch2_bkey_invalid(c, - bkey_i_to_s_c(i->k), i->bkey_type); - if (invalid) { - char buf[200]; - - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); - bch_err(c, "invalid bkey %s on insert: %s\n", buf, invalid); - bch2_fatal_error(c); - } - btree_insert_entry_checks(trans, i); - } - bch2_btree_trans_verify_locks(trans); + normalize_read_intent_locks(trans); - trans_for_each_update(trans, i) - if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_lock_for_insert(trans, i->iter, - iter_l(i->iter)->b); + ret = trans_lock_write(trans); + if (unlikely(ret)) + return ret; ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip); trans_for_each_update(trans, i) if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b, - i->iter); + bch2_btree_node_unlock_write_inlined(trans, i->path, + insert_l(i)->b); if (!ret && trans->journal_pin) bch2_journal_pin_add(&c->journal, trans->journal_res.seq, @@ -650,14 +712,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, switch (ret) { case BTREE_INSERT_BTREE_NODE_FULL: - ret = bch2_btree_split_leaf(trans, i->iter, trans->flags); + ret = bch2_btree_split_leaf(trans, i->path, trans->flags); if (!ret) return 0; if (ret == -EINTR) trace_trans_restart_btree_node_split(trans->ip, trace_ip, - i->iter->btree_id, - &i->iter->real_pos); + i->btree_id, &i->path->pos); break; case BTREE_INSERT_NEED_MARK_REPLICAS: bch2_trans_unlock(trans); @@ -738,120 +799,9 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) return 0; } -static int extent_handle_overwrites(struct btree_trans *trans, - struct btree_insert_entry *i) -{ - struct bch_fs *c = trans->c; - struct btree_iter *iter, *update_iter; - struct bpos start = bkey_start_pos(&i->k->k); - struct bkey_i *update; - struct bkey_s_c k; - int ret = 0, compressed_sectors; - - iter = bch2_trans_get_iter(trans, i->btree_id, start, - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES| - BTREE_ITER_NOT_EXTENTS); - k = bch2_btree_iter_peek(iter); - if (!k.k || (ret = bkey_err(k))) - goto out; - - if (bch2_bkey_maybe_mergable(k.k, &i->k->k)) { - update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); - if ((ret = PTR_ERR_OR_ZERO(update))) - goto out; - - bkey_reassemble(update, k); - - if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(i->k))) { - update_iter = bch2_trans_copy_iter(trans, iter); - ret = bch2_btree_delete_at(trans, update_iter, i->flags); - bch2_trans_iter_put(trans, update_iter); - - if (ret) - goto out; - - i->k = update; - goto next; - } - } - - if (!bkey_cmp(k.k->p, bkey_start_pos(&i->k->k))) - goto next; - - while (bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) > 0) { - /* - * If we're going to be splitting a compressed extent, note it - * so that __bch2_trans_commit() can increase our disk - * reservation: - */ - if (bkey_cmp(bkey_start_pos(k.k), start) < 0 && - bkey_cmp(k.k->p, i->k->k.p) > 0 && - (compressed_sectors = bch2_bkey_sectors_compressed(k))) - trans->extra_journal_res += compressed_sectors; - - if (bkey_cmp(bkey_start_pos(k.k), start) < 0) { - update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); - if ((ret = PTR_ERR_OR_ZERO(update))) - goto out; - - bkey_reassemble(update, k); - - bch2_cut_back(start, update); - - update_iter = bch2_trans_get_iter(trans, i->btree_id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(update_iter); - if (ret) { - bch2_trans_iter_put(trans, update_iter); - goto out; - } - - bch2_trans_update(trans, update_iter, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| - i->flags); - bch2_trans_iter_put(trans, update_iter); - } - - if (bkey_cmp(k.k->p, i->k->k.p) <= 0) { - update_iter = bch2_trans_copy_iter(trans, iter); - ret = bch2_btree_delete_at(trans, update_iter, - i->flags); - bch2_trans_iter_put(trans, update_iter); - - if (ret) - goto out; - } - - if (bkey_cmp(k.k->p, i->k->k.p) > 0) { - update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); - if ((ret = PTR_ERR_OR_ZERO(update))) - goto out; - - bkey_reassemble(update, k); - bch2_cut_front(i->k->k.p, update); - - bch2_trans_update(trans, iter, update, i->flags); - goto out; - } -next: - k = bch2_btree_iter_next(iter); - if (!k.k || (ret = bkey_err(k))) - goto out; - } - - bch2_bkey_merge(c, bkey_i_to_s(i->k), k); -out: - bch2_trans_iter_put(trans, iter); - - return ret; -} - int __bch2_trans_commit(struct btree_trans *trans) { struct btree_insert_entry *i = NULL; - struct btree_iter *iter; bool trans_trigger_run; unsigned u64s; int ret = 0; @@ -876,8 +826,12 @@ int __bch2_trans_commit(struct btree_trans *trans) } #ifdef CONFIG_BCACHEFS_DEBUG + /* + * if BTREE_TRIGGER_NORUN is set, it means we're probably being called + * from the key cache flush code: + */ trans_for_each_update(trans, i) - if (btree_iter_type(i->iter) != BTREE_ITER_CACHED && + if (!i->cached && !(i->flags & BTREE_TRIGGER_NORUN)) bch2_btree_key_cache_verify_clean(trans, i->btree_id, i->k->k.p); @@ -896,13 +850,12 @@ int __bch2_trans_commit(struct btree_trans *trans) i->trans_triggers_run = true; trans_trigger_run = true; - ret = bch2_trans_mark_update(trans, i->iter, + ret = bch2_trans_mark_update(trans, i->path, i->k, i->flags); if (unlikely(ret)) { if (ret == -EINTR) trace_trans_restart_mark(trans->ip, _RET_IP_, - i->iter->btree_id, - &i->iter->pos); + i->btree_id, &i->path->pos); goto out; } } @@ -910,21 +863,19 @@ int __bch2_trans_commit(struct btree_trans *trans) } while (trans_trigger_run); trans_for_each_update(trans, i) { - BUG_ON(!i->iter->should_be_locked); + BUG_ON(!i->path->should_be_locked); - if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) { + if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { trace_trans_restart_upgrade(trans->ip, _RET_IP_, - i->iter->btree_id, - &i->iter->pos); - trans->restarted = true; - ret = -EINTR; + i->btree_id, &i->path->pos); + ret = btree_trans_restart(trans); goto out; } - BUG_ON(!btree_node_intent_locked(i->iter, i->level)); + BUG_ON(!btree_node_intent_locked(i->path, i->level)); u64s = jset_u64s(i->k->k.u64s); - if (btree_iter_type(i->iter) == BTREE_ITER_CACHED && + if (i->cached && likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) trans->journal_preres_u64s += u64s; trans->journal_u64s += u64s; @@ -945,21 +896,19 @@ retry: ret = do_bch2_trans_commit(trans, &i, _RET_IP_); /* make sure we didn't drop or screw up locks: */ - bch2_btree_trans_verify_locks(trans); + bch2_trans_verify_locks(trans); if (ret) goto err; - - trans_for_each_iter(trans, iter) - if (btree_iter_live(trans, iter) && - (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) - bch2_btree_iter_set_pos(iter, iter->pos_after_commit); out: bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres); if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) percpu_ref_put(&trans->c->writes); out_reset: + trans_for_each_update(trans, i) + bch2_path_put(trans, i->path, true); + trans->extra_journal_res = 0; trans->nr_updates = 0; trans->hooks = NULL; @@ -982,22 +931,154 @@ err: goto retry; } +static int bch2_trans_update_extent(struct btree_trans *trans, + struct btree_iter *orig_iter, + struct bkey_i *insert, + enum btree_update_flags flags) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter, update_iter; + struct bpos start = bkey_start_pos(&insert->k); + struct bkey_i *update; + struct bkey_s_c k; + enum btree_id btree_id = orig_iter->btree_id; + int ret = 0, compressed_sectors; + + bch2_trans_iter_init(trans, &iter, btree_id, start, + BTREE_ITER_INTENT| + BTREE_ITER_WITH_UPDATES| + BTREE_ITER_NOT_EXTENTS); + k = bch2_btree_iter_peek(&iter); + if ((ret = bkey_err(k))) + goto err; + if (!k.k) + goto out; + + if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { + update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + bkey_reassemble(update, k); + + if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(insert))) { + ret = bch2_btree_delete_at(trans, &iter, flags); + if (ret) + goto err; + + insert = update; + goto next; + } + } + + if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k))) + goto next; + + while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) { + /* + * If we're going to be splitting a compressed extent, note it + * so that __bch2_trans_commit() can increase our disk + * reservation: + */ + if (bkey_cmp(bkey_start_pos(k.k), start) < 0 && + bkey_cmp(k.k->p, insert->k.p) > 0 && + (compressed_sectors = bch2_bkey_sectors_compressed(k))) + trans->extra_journal_res += compressed_sectors; + + if (bkey_cmp(bkey_start_pos(k.k), start) < 0) { + update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + bkey_reassemble(update, k); + + bch2_cut_back(start, update); + + bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&update_iter) ?: + bch2_trans_update(trans, &update_iter, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| + flags); + bch2_trans_iter_exit(trans, &update_iter); + if (ret) + goto err; + } + + if (bkey_cmp(k.k->p, insert->k.p) <= 0) { + ret = bch2_btree_delete_at(trans, &iter, flags); + if (ret) + goto err; + } + + if (bkey_cmp(k.k->p, insert->k.p) > 0) { + update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + bkey_reassemble(update, k); + bch2_cut_front(insert->k.p, update); + + ret = bch2_trans_update(trans, &iter, update, flags); + if (ret) + goto err; + + goto out; + } +next: + k = bch2_btree_iter_next(&iter); + if ((ret = bkey_err(k))) + goto err; + if (!k.k) + goto out; + } + + bch2_bkey_merge(c, bkey_i_to_s(insert), k); +out: + if (!bkey_deleted(&insert->k)) { + /* + * Rewinding iterators is expensive: get a new one and the one + * that points to the start of insert will be cloned from: + */ + bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_init(trans, &iter, btree_id, insert->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, insert, flags); + } +err: + bch2_trans_iter_exit(trans, &iter); + + return ret; +} + int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k, enum btree_update_flags flags) { - struct btree_insert_entry *i, n = (struct btree_insert_entry) { + struct btree_insert_entry *i, n; + + BUG_ON(!iter->path->should_be_locked); + + if (iter->flags & BTREE_ITER_IS_EXTENTS) + return bch2_trans_update_extent(trans, iter, k, flags); + + BUG_ON(trans->nr_updates >= BTREE_ITER_MAX); + BUG_ON(bpos_cmp(k->k.p, iter->path->pos)); + + n = (struct btree_insert_entry) { .flags = flags, - .bkey_type = __btree_node_type(iter->level, iter->btree_id), + .bkey_type = __btree_node_type(iter->path->level, iter->btree_id), .btree_id = iter->btree_id, - .level = iter->level, - .iter = iter, - .k = k + .level = iter->path->level, + .cached = iter->flags & BTREE_ITER_CACHED, + .path = iter->path, + .k = k, + .ip_allocated = _RET_IP_, }; - bool is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0; - int ret = 0; - BUG_ON(trans->nr_updates >= BTREE_ITER_MAX); - BUG_ON(!iter->should_be_locked); + __btree_path_get(n.path, true); #ifdef CONFIG_BCACHEFS_DEBUG trans_for_each_update(trans, i) @@ -1005,31 +1086,6 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, btree_insert_entry_cmp(i - 1, i) >= 0); #endif - if (is_extent) { - ret = extent_handle_overwrites(trans, &n); - if (ret) - return ret; - - iter->pos_after_commit = k->k.p; - iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT; - - if (bkey_deleted(&n.k->k)) - return 0; - - n.iter = bch2_trans_get_iter(trans, n.btree_id, n.k->k.p, - BTREE_ITER_INTENT| - BTREE_ITER_NOT_EXTENTS); - ret = bch2_btree_iter_traverse(n.iter); - bch2_trans_iter_put(trans, n.iter); - - if (ret) - return ret; - } - - BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS); - - n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; - /* * Pending updates are kept sorted: first, find position of new update, * then delete/trim any updates the new update overwrites: @@ -1047,11 +1103,13 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, * not the key cache, which helps with cache coherency issues in * other areas: */ - if (btree_iter_type(n.iter) == BTREE_ITER_CACHED && - btree_iter_type(i->iter) != BTREE_ITER_CACHED) { + if (n.cached && !i->cached) { i->k = n.k; i->flags = n.flags; + + __btree_path_get(n.path, false); } else { + bch2_path_put(trans, i->path, true); *i = n; } } else @@ -1071,15 +1129,14 @@ void bch2_trans_commit_hook(struct btree_trans *trans, int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id, struct bkey_i *k) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k), - BTREE_ITER_INTENT); - - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, k, 0); - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, 0); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1117,16 +1174,16 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, struct bpos start, struct bpos end, u64 *journal_seq) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; - iter = bch2_trans_get_iter(trans, id, start, BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); retry: while ((bch2_trans_begin(trans), - (k = bch2_btree_iter_peek(iter)).k) && + (k = bch2_btree_iter_peek(&iter)).k) && !(ret = bkey_err(k)) && - bkey_cmp(iter->pos, end) < 0) { + bkey_cmp(iter.pos, end) < 0) { struct bkey_i delete; bkey_init(&delete.k); @@ -1145,9 +1202,9 @@ retry: * (bch2_btree_iter_peek() does guarantee that iter.pos >= * bkey_start_pos(k.k)). */ - delete.k.p = iter->pos; + delete.k.p = iter.pos; - if (btree_node_type_is_extents(iter->btree_id)) { + if (btree_node_type_is_extents(id)) { unsigned max_sectors = KEY_SIZE_MAX & (~0 << trans->c->block_bits); @@ -1155,12 +1212,12 @@ retry: bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end, &delete); - ret = bch2_extent_trim_atomic(&delete, iter); + ret = bch2_extent_trim_atomic(trans, &iter, &delete); if (ret) break; } - ret = bch2_trans_update(trans, iter, &delete, 0) ?: + ret = bch2_trans_update(trans, &iter, &delete, 0) ?: bch2_trans_commit(trans, NULL, journal_seq, BTREE_INSERT_NOFAIL); if (ret) @@ -1174,7 +1231,7 @@ retry: goto retry; } - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 76945e5..df12416 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -662,8 +662,11 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) { - return p.crc.compression_type - ? DIV_ROUND_UP(sectors * p.crc.compressed_size, + EBUG_ON(sectors < 0); + + return p.crc.compression_type && + p.crc.compression_type != BCH_COMPRESSION_TYPE_incompressible + ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, p.crc.uncompressed_size) : sectors; } @@ -925,9 +928,6 @@ static int bch2_mark_extent(struct bch_fs *c, BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)); - if (flags & BTREE_TRIGGER_OVERWRITE) - sectors = -sectors; - r.e.data_type = data_type; r.e.nr_devs = 0; r.e.nr_required = 1; @@ -935,6 +935,9 @@ static int bch2_mark_extent(struct bch_fs *c, bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { s64 disk_sectors = ptr_disk_sectors(sectors, p); + if (flags & BTREE_TRIGGER_OVERWRITE) + disk_sectors = -disk_sectors; + ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type, journal_seq, flags); if (ret < 0) @@ -1215,38 +1218,23 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new, unsigned flags) return ret; } -int bch2_mark_update(struct btree_trans *trans, struct btree_iter *iter, +int bch2_mark_update(struct btree_trans *trans, struct btree_path *path, struct bkey_i *new, unsigned flags) { struct bch_fs *c = trans->c; struct bkey _deleted = KEY(0, 0, 0); struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; struct bkey_s_c old; - int iter_flags, ret; + struct bkey unpacked; + int ret; if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; - if (!btree_node_type_needs_gc(iter->btree_id)) + if (!btree_node_type_needs_gc(path->btree_id)) return 0; - if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) { - iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES; - iter->flags &= ~BTREE_ITER_WITH_UPDATES; - - old = bch2_btree_iter_peek_slot(iter); - iter->flags |= iter_flags; - - ret = bkey_err(old); - if (ret) - return ret; - } else { - /* - * If BTREE_ITER_CACHED_NOFILL was used, we better not be - * running triggers that do anything on removal (alloc btree): - */ - old = deleted; - } + old = bch2_btree_path_peek_slot(path, &unpacked); if (old.k->type == new->k.type && ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { @@ -1283,23 +1271,14 @@ void fs_usage_apply_warn(struct btree_trans *trans, pr_err("%s", buf); pr_err("overlapping with"); - if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) { - struct btree_iter *copy = bch2_trans_copy_iter(trans, i->iter); - struct bkey_s_c k; - int ret; - - for_each_btree_key_continue(copy, 0, k, ret) { - if (btree_node_type_is_extents(i->iter->btree_id) - ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0 - : bkey_cmp(i->k->k.p, k.k->p)) - break; + if (!i->cached) { + struct bkey u; + struct bkey_s_c k = bch2_btree_path_peek_slot(i->path, &u); - bch2_bkey_val_to_text(&PBUF(buf), c, k); - pr_err("%s", buf); - } - bch2_trans_iter_put(trans, copy); + bch2_bkey_val_to_text(&PBUF(buf), c, k); + pr_err("%s", buf); } else { - struct bkey_cached *ck = (void *) i->iter->l[0].b; + struct bkey_cached *ck = (void *) i->path->l[0].b; if (ck->valid) { bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k)); @@ -1378,31 +1357,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, /* trans_mark: */ -static struct btree_iter *trans_get_update(struct btree_trans *trans, - enum btree_id btree_id, struct bpos pos, - struct bkey_s_c *k) -{ - struct btree_insert_entry *i; - - trans_for_each_update(trans, i) - if (i->iter->btree_id == btree_id && - (btree_node_type_is_extents(btree_id) - ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 && - bkey_cmp(pos, i->k->k.p) < 0 - : !bkey_cmp(pos, i->iter->pos))) { - *k = bkey_i_to_s_c(i->k); - - /* ugly hack.. */ - BUG_ON(btree_iter_live(trans, i->iter)); - trans->iters_live |= 1ULL << i->iter->idx; - return i->iter; - } - - return NULL; -} - static struct bkey_alloc_buf * -bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, +bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, const struct bch_extent_ptr *ptr, struct bkey_alloc_unpacked *u) { @@ -1410,36 +1366,33 @@ bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_it struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); struct bucket *g; - struct btree_iter *iter; - struct bkey_s_c k; struct bkey_alloc_buf *a; + struct bkey_i *update = btree_trans_peek_updates(trans, BTREE_ID_alloc, pos); int ret; a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); if (IS_ERR(a)) return a; - iter = trans_get_update(trans, BTREE_ID_alloc, pos, &k); - if (iter) { - *u = bch2_alloc_unpack(k); - } else { - iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, pos, - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter); - if (ret) { - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); - } + bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(iter); + if (ret) { + bch2_trans_iter_exit(trans, iter); + return ERR_PTR(ret); + } + if (update && !bpos_cmp(update->k.p, pos)) { + *u = bch2_alloc_unpack(bkey_i_to_s_c(update)); + } else { percpu_down_read(&c->mark_lock); g = bucket(ca, pos.offset); *u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); } - *_iter = iter; return a; } @@ -1448,7 +1401,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, s64 sectors, enum bch_data_type data_type) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_alloc_unpacked u; struct bkey_alloc_buf *a; int ret; @@ -1463,9 +1416,9 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, goto out; bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, iter, &a->k, 0); + bch2_trans_update(trans, &iter, &a->k, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1474,16 +1427,16 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, s64 sectors, enum bch_data_type data_type) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_i_stripe *s; struct bch_replicas_padded r; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, POS(0, p.ec.idx), - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes, POS(0, p.ec.idx), + BTREE_ITER_INTENT| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -1514,13 +1467,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, stripe_blockcount_set(&s->v, p.ec.block, stripe_blockcount_get(&s->v, p.ec.block) + sectors); - bch2_trans_update(trans, iter, &s->k_i, 0); + bch2_trans_update(trans, &iter, &s->k_i, 0); bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); r.e.data_type = data_type; update_replicas_list(trans, &r.e, sectors); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1545,9 +1498,6 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)); - if (flags & BTREE_TRIGGER_OVERWRITE) - sectors = -sectors; - r.e.data_type = data_type; r.e.nr_devs = 0; r.e.nr_required = 1; @@ -1555,6 +1505,9 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { s64 disk_sectors = ptr_disk_sectors(sectors, p); + if (flags & BTREE_TRIGGER_OVERWRITE) + disk_sectors = -disk_sectors; + ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors, data_type); if (ret < 0) @@ -1592,7 +1545,7 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; struct bkey_alloc_buf *a; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_alloc_unpacked u; bool parity = idx >= s.v->nr_blocks - s.v->nr_redundant; int ret = 0; @@ -1616,7 +1569,7 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, if (!deleting) { if (bch2_fs_inconsistent_on(u.stripe && u.stripe != s.k->p.offset, c, "bucket %llu:%llu gen %u: multiple stripes using same bucket (%u, %llu)", - iter->pos.inode, iter->pos.offset, u.gen, + iter.pos.inode, iter.pos.offset, u.gen, u.stripe, s.k->p.offset)) { ret = -EIO; goto err; @@ -1630,9 +1583,9 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, } bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, iter, &a->k, 0); + bch2_trans_update(trans, &iter, &a->k, 0); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1737,17 +1690,17 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, u64 idx, unsigned flags) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_i *n; __le64 *refcount; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; s64 ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, POS(0, idx), - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, idx), + BTREE_ITER_INTENT| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -1777,14 +1730,14 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, set_bkey_val_u64s(&n->k, 0); } - bch2_btree_iter_set_pos_to_extent_start(iter); - ret = bch2_trans_update(trans, iter, n, 0); + bch2_btree_iter_set_pos_to_extent_start(&iter); + ret = bch2_trans_update(trans, &iter, n, 0); if (ret) goto err; ret = k.k->p.offset - idx; err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1836,39 +1789,23 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, } int bch2_trans_mark_update(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct bkey_i *new, unsigned flags) { struct bkey _deleted = KEY(0, 0, 0); struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; struct bkey_s_c old; - int iter_flags, ret; + struct bkey unpacked; + int ret; if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; - if (!btree_node_type_needs_gc(iter->btree_id)) + if (!btree_node_type_needs_gc(path->btree_id)) return 0; - - if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) { - iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES; - iter->flags &= ~BTREE_ITER_WITH_UPDATES; - - old = bch2_btree_iter_peek_slot(iter); - iter->flags |= iter_flags; - - ret = bkey_err(old); - if (ret) - return ret; - } else { - /* - * If BTREE_ITER_CACHED_NOFILL was used, we better not be - * running triggers that do anything on removal (alloc btree): - */ - old = deleted; - } + old = bch2_btree_path_peek_slot(path, &unpacked); if (old.k->type == new->k.type && ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { @@ -1890,7 +1827,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, unsigned sectors) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_alloc_unpacked u; struct bkey_alloc_buf *a; struct bch_extent_ptr ptr = { @@ -1913,7 +1850,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", - iter->pos.inode, iter->pos.offset, u.gen, + iter.pos.inode, iter.pos.offset, u.gen, bch2_data_types[u.data_type], bch2_data_types[type], bch2_data_types[type]); @@ -1925,9 +1862,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, u.dirty_sectors = sectors; bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, iter, &a->k, 0); + bch2_trans_update(trans, &iter, &a->k, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 0f544b6..61c2c0f 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -228,13 +228,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned); -int bch2_mark_update(struct btree_trans *, struct btree_iter *, +int bch2_mark_update(struct btree_trans *, struct btree_path *, struct bkey_i *, unsigned); int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter, - struct bkey_i *insert, unsigned); +int bch2_trans_mark_update(struct btree_trans *, struct btree_path *, + struct bkey_i *, unsigned); void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index b0a8eb5..9f14bf4 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -243,7 +243,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, { struct dump_iter *i = file->private_data; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int err; @@ -260,10 +260,10 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - iter = bch2_trans_get_iter(&trans, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(iter); + bch2_trans_iter_init(&trans, &iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS); + k = bch2_btree_iter_peek(&iter); while (k.k && !(err = bkey_err(k))) { bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); @@ -272,8 +272,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->buf[i->bytes] = '\n'; i->bytes++; - k = bch2_btree_iter_next(iter); - i->from = iter->pos; + k = bch2_btree_iter_next(&iter); + i->from = iter.pos; err = flush_buf(i); if (err) @@ -282,7 +282,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); @@ -301,7 +301,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, { struct dump_iter *i = file->private_data; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; int err; @@ -336,7 +336,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); @@ -355,7 +355,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, { struct dump_iter *i = file->private_data; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct btree *prev_node = NULL; int err; @@ -373,11 +373,11 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, i->id, i->from, BTREE_ITER_PREFETCH); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(err = bkey_err(k))) { - struct btree_iter_level *l = &iter->l[0]; + struct btree_path_level *l = &iter.path->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); @@ -396,8 +396,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (err) break; - bch2_btree_iter_advance(iter); - i->from = iter->pos; + bch2_btree_iter_advance(&iter); + i->from = iter.pos; err = flush_buf(i); if (err) diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 02b2968..1d510f7 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -183,7 +183,8 @@ int bch2_dirent_rename(struct btree_trans *trans, const struct qstr *dst_name, u64 *dst_inum, u64 *dst_offset, enum bch_rename_mode mode) { - struct btree_iter *src_iter = NULL, *dst_iter = NULL; + struct btree_iter src_iter = { NULL }; + struct btree_iter dst_iter = { NULL }; struct bkey_s_c old_src, old_dst; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; struct bpos dst_pos = @@ -199,17 +200,16 @@ int bch2_dirent_rename(struct btree_trans *trans, * the target already exists - we're relying on the VFS * to do that check for us for correctness: */ - dst_iter = mode == BCH_RENAME - ? bch2_hash_hole(trans, bch2_dirent_hash_desc, + ret = mode == BCH_RENAME + ? bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc, dst_hash, dst_dir, dst_name) - : bch2_hash_lookup(trans, bch2_dirent_hash_desc, + : bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc, dst_hash, dst_dir, dst_name, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dst_iter); if (ret) goto out; - old_dst = bch2_btree_iter_peek_slot(dst_iter); + old_dst = bch2_btree_iter_peek_slot(&dst_iter); ret = bkey_err(old_dst); if (ret) goto out; @@ -217,17 +217,16 @@ int bch2_dirent_rename(struct btree_trans *trans, if (mode != BCH_RENAME) *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum); if (mode != BCH_RENAME_EXCHANGE) - *src_offset = dst_iter->pos.offset; + *src_offset = dst_iter.pos.offset; /* Lookup src: */ - src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc, - src_hash, src_dir, src_name, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(src_iter); + ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc, + src_hash, src_dir, src_name, + BTREE_ITER_INTENT); if (ret) goto out; - old_src = bch2_btree_iter_peek_slot(src_iter); + old_src = bch2_btree_iter_peek_slot(&src_iter); ret = bkey_err(old_src); if (ret) goto out; @@ -241,7 +240,7 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src)); - new_dst->k.p = dst_iter->pos; + new_dst->k.p = dst_iter.pos; /* Create new src key: */ if (mode == BCH_RENAME_EXCHANGE) { @@ -251,7 +250,7 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst)); - new_src->k.p = src_iter->pos; + new_src->k.p = src_iter.pos; } else { new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); ret = PTR_ERR_OR_ZERO(new_src); @@ -259,10 +258,10 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; bkey_init(&new_src->k); - new_src->k.p = src_iter->pos; + new_src->k.p = src_iter.pos; - if (bkey_cmp(dst_pos, src_iter->pos) <= 0 && - bkey_cmp(src_iter->pos, dst_iter->pos) < 0) { + if (bkey_cmp(dst_pos, src_iter.pos) <= 0 && + bkey_cmp(src_iter.pos, dst_iter.pos) < 0) { /* * We have a hash collision for the new dst key, * and new_src - the key we're deleting - is between @@ -275,8 +274,8 @@ int bch2_dirent_rename(struct btree_trans *trans, * If we're not overwriting, we can just insert * new_dst at the src position: */ - new_dst->k.p = src_iter->pos; - bch2_trans_update(trans, src_iter, + new_dst->k.p = src_iter.pos; + bch2_trans_update(trans, &src_iter, &new_dst->k_i, 0); goto out_set_offset; } else { @@ -290,7 +289,7 @@ int bch2_dirent_rename(struct btree_trans *trans, } else { /* Check if we need a whiteout to delete src: */ ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc, - src_hash, src_iter); + src_hash, &src_iter); if (ret < 0) goto out; @@ -299,15 +298,15 @@ int bch2_dirent_rename(struct btree_trans *trans, } } - bch2_trans_update(trans, src_iter, &new_src->k_i, 0); - bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0); + bch2_trans_update(trans, &src_iter, &new_src->k_i, 0); + bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); out_set_offset: if (mode == BCH_RENAME_EXCHANGE) *src_offset = new_src->k.p.offset; *dst_offset = new_dst->k.p.offset; out: - bch2_trans_iter_put(trans, src_iter); - bch2_trans_iter_put(trans, dst_iter); + bch2_trans_iter_exit(trans, &src_iter); + bch2_trans_iter_exit(trans, &dst_iter); return ret; } @@ -319,12 +318,13 @@ int bch2_dirent_delete_at(struct btree_trans *trans, hash_info, iter); } -struct btree_iter * -__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum, - const struct bch_hash_info *hash_info, - const struct qstr *name, unsigned flags) +int __bch2_dirent_lookup_trans(struct btree_trans *trans, + struct btree_iter *iter, + u64 dir_inum, + const struct bch_hash_info *hash_info, + const struct qstr *name, unsigned flags) { - return bch2_hash_lookup(trans, bch2_dirent_hash_desc, + return bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, hash_info, dir_inum, name, flags); } @@ -333,26 +333,25 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, const struct qstr *name) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 inum = 0; int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = __bch2_dirent_lookup_trans(&trans, dir_inum, - hash_info, name, 0); - ret = PTR_ERR_OR_ZERO(iter); + ret = __bch2_dirent_lookup_trans(&trans, &iter, dir_inum, + hash_info, name, 0); if (ret) goto out; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto out; inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); out: BUG_ON(ret == -EINTR); bch2_trans_exit(&trans); @@ -361,7 +360,7 @@ out: int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; @@ -375,7 +374,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) break; } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -383,7 +382,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; int ret; @@ -412,7 +411,7 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) break; ctx->pos = dirent.k->p.offset + 1; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index e1d8ce3..c14f602 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -50,8 +50,7 @@ int bch2_dirent_rename(struct btree_trans *, const struct qstr *, u64 *, u64 *, enum bch_rename_mode); -struct btree_iter * -__bch2_dirent_lookup_trans(struct btree_trans *, u64, +int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *, u64, const struct bch_hash_info *, const struct qstr *, unsigned); u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *, diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 328e042..f0bdbdb 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -429,13 +429,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, + POS(0, idx), BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -445,6 +446,7 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip } bkey_reassemble(&stripe->key.k_i, k); err: + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -552,19 +554,19 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) return 0; } -static int ec_stripe_mem_alloc(struct bch_fs *c, +static int ec_stripe_mem_alloc(struct btree_trans *trans, struct btree_iter *iter) { size_t idx = iter->pos.offset; int ret = 0; - if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN)) + if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_NOWAIT|__GFP_NOWARN)) return ret; - bch2_trans_unlock(iter->trans); + bch2_trans_unlock(trans); ret = -EINTR; - if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL)) + if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL)) return ret; return -ENOMEM; @@ -704,7 +706,7 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, struct disk_reservation *res) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bpos min_pos = POS(0, 1); struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); @@ -719,7 +721,7 @@ retry: if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) { if (start_pos.offset) { start_pos = min_pos; - bch2_btree_iter_set_pos(iter, start_pos); + bch2_btree_iter_set_pos(&iter, start_pos); continue; } @@ -733,19 +735,19 @@ retry: goto err; found_slot: - start_pos = iter->pos; + start_pos = iter.pos; - ret = ec_stripe_mem_alloc(c, iter); + ret = ec_stripe_mem_alloc(&trans, &iter); if (ret) goto err; - stripe->k.p = iter->pos; + stripe->k.p = iter.pos; - ret = bch2_trans_update(&trans, iter, &stripe->k_i, 0) ?: + ret = bch2_trans_update(&trans, &iter, &stripe->k_i, 0) ?: bch2_trans_commit(&trans, res, NULL, BTREE_INSERT_NOFAIL); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; @@ -759,15 +761,15 @@ err: static int ec_stripe_bkey_update(struct btree_trans *trans, struct bkey_i_stripe *new) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; const struct bch_stripe *existing; unsigned i; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, - new->k.p, BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes, + new->k.p, BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -790,9 +792,9 @@ static int ec_stripe_bkey_update(struct btree_trans *trans, stripe_blockcount_set(&new->v, i, stripe_blockcount_get(existing, i)); - ret = bch2_trans_update(trans, iter, &new->k_i, 0); + ret = bch2_trans_update(trans, &iter, &new->k_i, 0); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -820,10 +822,11 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct bkey *pos) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_extent e; struct bkey_buf sk; + struct bpos next_pos; int ret = 0, dev, block; bch2_bkey_buf_init(&sk); @@ -831,23 +834,23 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, /* XXX this doesn't support the reflink btree */ - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - bkey_start_pos(pos), - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bkey_start_pos(pos), + BTREE_ITER_INTENT); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k)) && bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) { struct bch_extent_ptr *ptr, *ec_ptr = NULL; if (extent_has_stripe_ptr(k, s->key.k.p.offset)) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } block = bkey_matches_stripe(&s->key.v, k); if (block < 0) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } @@ -862,17 +865,21 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ec_ptr, block); - bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, sk.k, 0) ?: + bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k)); + next_pos = sk.k->k.p; + + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); + if (!ret) + bch2_btree_iter_set_pos(&iter, next_pos); if (ret == -EINTR) ret = 0; if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1593,7 +1600,7 @@ write: int bch2_stripes_write(struct bch_fs *c, unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct genradix_iter giter; struct bkey_i_stripe *new_key; struct stripe *m; @@ -1604,8 +1611,8 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS_MIN, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); genradix_for_each(&c->stripes[0], giter, m) { if (!m->alive) @@ -1613,13 +1620,13 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags) ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL|flags, - __bch2_stripe_write_key(&trans, iter, m, + __bch2_stripe_write_key(&trans, &iter, m, giter.pos, new_key)); if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); @@ -1654,19 +1661,19 @@ int bch2_stripes_read(struct bch_fs *c) int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; size_t i, idx = 0; int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, U64_MAX), 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS(0, U64_MAX), 0); - k = bch2_btree_iter_prev(iter); + k = bch2_btree_iter_prev(&iter); if (!IS_ERR_OR_NULL(k.k)) idx = k.k->p.offset + 1; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index 4a8dd08..9d959b0 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -58,7 +58,7 @@ static int count_iters_for_insert(struct btree_trans *trans, u64 idx = le64_to_cpu(p.v->idx); unsigned sectors = bpos_min(*end, p.k->p).offset - bkey_start_offset(p.k); - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c r_k; for_each_btree_key(trans, iter, @@ -83,8 +83,8 @@ static int count_iters_for_insert(struct btree_trans *trans, break; } } + bch2_trans_iter_exit(trans, &iter); - bch2_trans_iter_put(trans, iter); break; } } @@ -94,12 +94,12 @@ static int count_iters_for_insert(struct btree_trans *trans, #define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) -int bch2_extent_atomic_end(struct btree_iter *iter, +int bch2_extent_atomic_end(struct btree_trans *trans, + struct btree_iter *iter, struct bkey_i *insert, struct bpos *end) { - struct btree_trans *trans = iter->trans; - struct btree_iter *copy; + struct btree_iter copy; struct bkey_s_c k; unsigned nr_iters = 0; int ret; @@ -118,7 +118,7 @@ int bch2_extent_atomic_end(struct btree_iter *iter, if (ret < 0) return ret; - copy = bch2_trans_copy_iter(trans, iter); + bch2_trans_copy_iter(©, iter); for_each_btree_key_continue(copy, 0, k, ret) { unsigned offset = 0; @@ -149,31 +149,21 @@ int bch2_extent_atomic_end(struct btree_iter *iter, break; } - bch2_trans_iter_put(trans, copy); + bch2_trans_iter_exit(trans, ©); return ret < 0 ? ret : 0; } -int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) +int bch2_extent_trim_atomic(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *k) { struct bpos end; int ret; - ret = bch2_extent_atomic_end(iter, k, &end); + ret = bch2_extent_atomic_end(trans, iter, k, &end); if (ret) return ret; bch2_cut_back(end, k); return 0; } - -int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter) -{ - struct bpos end; - int ret; - - ret = bch2_extent_atomic_end(iter, k, &end); - if (ret) - return ret; - - return !bkey_cmp(end, k->k.p); -} diff --git a/libbcachefs/extent_update.h b/libbcachefs/extent_update.h index 2fa4602..6f5cf44 100644 --- a/libbcachefs/extent_update.h +++ b/libbcachefs/extent_update.h @@ -4,9 +4,9 @@ #include "bcachefs.h" -int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *, - struct bpos *); -int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); -int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *); +int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *, + struct bkey_i *, struct bpos *); +int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *, + struct bkey_i *); #endif /* _BCACHEFS_EXTENT_UPDATE_H */ diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 563e130..f66640c 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -616,7 +616,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, unsigned nr_replicas, bool compressed) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bpos end = pos; struct bkey_s_c k; bool ret = true; @@ -637,7 +637,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index 2189a11..6bc8255 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -19,16 +19,15 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, struct posix_acl *acl) { struct bch_fs *c = trans->c; - struct btree_iter *dir_iter = NULL; - struct btree_iter *inode_iter = NULL; + struct btree_iter dir_iter = { NULL }; + struct btree_iter inode_iter = { NULL }; struct bch_hash_info hash = bch2_hash_info_init(c, new_inode); u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); u64 dir_offset = 0; int ret; - dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dir_iter); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -37,8 +36,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, if (!name) new_inode->bi_flags |= BCH_INODE_UNLINKED; - inode_iter = bch2_inode_create(trans, new_inode, U32_MAX, cpu); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_create(trans, &inode_iter, new_inode, U32_MAX, cpu); if (ret) goto err; @@ -63,7 +61,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, if (S_ISDIR(new_inode->bi_mode)) dir_u->bi_nlink++; - ret = bch2_inode_write(trans, dir_iter, dir_u); + ret = bch2_inode_write(trans, &dir_iter, dir_u); if (ret) goto err; @@ -82,14 +80,14 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, } /* XXX use bch2_btree_iter_set_snapshot() */ - inode_iter->snapshot = U32_MAX; - bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX)); + inode_iter.snapshot = U32_MAX; + bch2_btree_iter_set_pos(&inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX)); - ret = bch2_btree_iter_traverse(inode_iter) ?: - bch2_inode_write(trans, inode_iter, new_inode); + ret = bch2_btree_iter_traverse(&inode_iter) ?: + bch2_inode_write(trans, &inode_iter, new_inode); err: - bch2_trans_iter_put(trans, inode_iter); - bch2_trans_iter_put(trans, dir_iter); + bch2_trans_iter_exit(trans, &inode_iter); + bch2_trans_iter_exit(trans, &dir_iter); return ret; } @@ -98,22 +96,21 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, struct bch_inode_unpacked *inode_u, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter *dir_iter = NULL, *inode_iter = NULL; + struct btree_iter dir_iter = { NULL }; + struct btree_iter inode_iter = { NULL }; struct bch_hash_info dir_hash; u64 now = bch2_current_time(c); u64 dir_offset = 0; int ret; - inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); if (ret) goto err; inode_u->bi_ctime = now; bch2_inode_nlink_inc(inode_u); - dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0); - ret = PTR_ERR_OR_ZERO(dir_iter); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -133,11 +130,11 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, inode_u->bi_dir_offset = dir_offset; } - ret = bch2_inode_write(trans, dir_iter, dir_u) ?: - bch2_inode_write(trans, inode_iter, inode_u); + ret = bch2_inode_write(trans, &dir_iter, dir_u) ?: + bch2_inode_write(trans, &inode_iter, inode_u); err: - bch2_trans_iter_put(trans, dir_iter); - bch2_trans_iter_put(trans, inode_iter); + bch2_trans_iter_exit(trans, &dir_iter); + bch2_trans_iter_exit(trans, &inode_iter); return ret; } @@ -147,35 +144,33 @@ int bch2_unlink_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter *dir_iter = NULL, *dirent_iter = NULL, - *inode_iter = NULL; + struct btree_iter dir_iter = { NULL }; + struct btree_iter dirent_iter = { NULL }; + struct btree_iter inode_iter = { NULL }; struct bch_hash_info dir_hash; u64 inum, now = bch2_current_time(c); struct bkey_s_c k; int ret; - dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dir_iter); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); if (ret) goto err; dir_hash = bch2_hash_info_init(c, dir_u); - dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash, - name, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dirent_iter); + ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir_inum, &dir_hash, + name, BTREE_ITER_INTENT); if (ret) goto err; - k = bch2_btree_iter_peek_slot(dirent_iter); + k = bch2_btree_iter_peek_slot(&dirent_iter); ret = bkey_err(k); if (ret) goto err; inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); - inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -192,13 +187,13 @@ int bch2_unlink_trans(struct btree_trans *trans, ret = (S_ISDIR(inode_u->bi_mode) ? bch2_empty_dir_trans(trans, inum) : 0) ?: - bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?: - bch2_inode_write(trans, dir_iter, dir_u) ?: - bch2_inode_write(trans, inode_iter, inode_u); + bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?: + bch2_inode_write(trans, &dir_iter, dir_u) ?: + bch2_inode_write(trans, &inode_iter, inode_u); err: - bch2_trans_iter_put(trans, inode_iter); - bch2_trans_iter_put(trans, dirent_iter); - bch2_trans_iter_put(trans, dir_iter); + bch2_trans_iter_exit(trans, &inode_iter); + bch2_trans_iter_exit(trans, &dirent_iter); + bch2_trans_iter_exit(trans, &dir_iter); return ret; } @@ -236,25 +231,25 @@ int bch2_rename_trans(struct btree_trans *trans, enum bch_rename_mode mode) { struct bch_fs *c = trans->c; - struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL; - struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL; + struct btree_iter src_dir_iter = { NULL }; + struct btree_iter dst_dir_iter = { NULL }; + struct btree_iter src_inode_iter = { NULL }; + struct btree_iter dst_inode_iter = { NULL }; struct bch_hash_info src_hash, dst_hash; u64 src_inode, src_offset, dst_inode, dst_offset; u64 now = bch2_current_time(c); int ret; - src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(src_dir_iter); + ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir, + BTREE_ITER_INTENT); if (ret) goto err; src_hash = bch2_hash_info_init(c, src_dir_u); if (dst_dir != src_dir) { - dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dst_dir_iter); + ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir, + BTREE_ITER_INTENT); if (ret) goto err; @@ -273,16 +268,14 @@ int bch2_rename_trans(struct btree_trans *trans, if (ret) goto err; - src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(src_inode_iter); + ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inode, + BTREE_ITER_INTENT); if (ret) goto err; if (dst_inode) { - dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dst_inode_iter); + ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inode, + BTREE_ITER_INTENT); if (ret) goto err; } @@ -357,18 +350,18 @@ int bch2_rename_trans(struct btree_trans *trans, if (dst_inode) dst_inode_u->bi_ctime = now; - ret = bch2_inode_write(trans, src_dir_iter, src_dir_u) ?: + ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?: (src_dir != dst_dir - ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u) + ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u) : 0 ) ?: - bch2_inode_write(trans, src_inode_iter, src_inode_u) ?: + bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?: (dst_inode - ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u) + ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) : 0 ); err: - bch2_trans_iter_put(trans, dst_inode_iter); - bch2_trans_iter_put(trans, src_inode_iter); - bch2_trans_iter_put(trans, dst_dir_iter); - bch2_trans_iter_put(trans, src_dir_iter); + bch2_trans_iter_exit(trans, &dst_inode_iter); + bch2_trans_iter_exit(trans, &src_inode_iter); + bch2_trans_iter_exit(trans, &dst_dir_iter); + bch2_trans_iter_exit(trans, &src_dir_iter); return ret; } diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 3333f61..2921037 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -884,7 +884,7 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts = io_opts(c, &inode->ei_inode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct page *page; struct readpages_iter readpages_iter; int ret; @@ -893,8 +893,8 @@ void bch2_readahead(struct readahead_control *ractl) BUG_ON(ret); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, - BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN, + BTREE_ITER_SLOTS); bch2_pagecache_add_get(&inode->ei_pagecache_lock); @@ -915,13 +915,13 @@ void bch2_readahead(struct readahead_control *ractl) rbio->bio.bi_end_io = bch2_readpages_end_io; BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0)); - bchfs_read(&trans, iter, rbio, inode->v.i_ino, + bchfs_read(&trans, &iter, rbio, inode->v.i_ino, &readpages_iter); } bch2_pagecache_add_put(&inode->ei_pagecache_lock); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); kfree(readpages_iter.pages); } @@ -930,7 +930,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, u64 inum, struct page *page) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; bch2_page_state_create(page, __GFP_NOFAIL); @@ -940,12 +940,12 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0)); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, - BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN, + BTREE_ITER_SLOTS); - bchfs_read(&trans, iter, rbio, inum, NULL); + bchfs_read(&trans, &iter, rbio, inum, NULL); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } @@ -2151,7 +2151,7 @@ static inline int range_has_data(struct bch_fs *c, struct bpos end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -2166,7 +2166,7 @@ static inline int range_has_data(struct bch_fs *c, break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -2476,7 +2476,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct address_space *mapping = inode->v.i_mapping; struct bkey_buf copy; struct btree_trans trans; - struct btree_iter *src, *dst, *del; + struct btree_iter src, dst, del; loff_t shift, new_size; u64 src_start; int ret = 0; @@ -2541,11 +2541,11 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bch2_bkey_buf_init(©); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - src = bch2_trans_get_iter(&trans, BTREE_ID_extents, + bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); - dst = bch2_trans_copy_iter(&trans, src); - del = bch2_trans_copy_iter(&trans, src); + bch2_trans_copy_iter(&dst, &src); + bch2_trans_copy_iter(&del, &src); while (ret == 0 || ret == -EINTR) { struct disk_reservation disk_res = @@ -2560,8 +2560,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bch2_trans_begin(&trans); k = insert - ? bch2_btree_iter_peek_prev(src) - : bch2_btree_iter_peek(src); + ? bch2_btree_iter_peek_prev(&src) + : bch2_btree_iter_peek(&src); if ((ret = bkey_err(k))) continue; @@ -2579,9 +2579,9 @@ reassemble: bch2_cut_front(move_pos, copy.k); copy.k->k.p.offset += shift >> 9; - bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k->k)); + bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); - ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end); + ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); if (ret) continue; @@ -2599,7 +2599,7 @@ reassemble: delete.k.p = copy.k->k.p; delete.k.size = copy.k->k.size; delete.k.p.offset -= shift >> 9; - bch2_btree_iter_set_pos(del, bkey_start_pos(&delete.k)); + bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; @@ -2620,20 +2620,20 @@ reassemble: BUG_ON(ret); } - ret = bch2_btree_iter_traverse(del) ?: - bch2_trans_update(&trans, del, &delete, trigger_flags) ?: - bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?: + ret = bch2_btree_iter_traverse(&del) ?: + bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: + bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: bch2_trans_commit(&trans, &disk_res, &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); bch2_disk_reservation_put(c, &disk_res); if (!ret) - bch2_btree_iter_set_pos(src, next_pos); + bch2_btree_iter_set_pos(&src, next_pos); } - bch2_trans_iter_put(&trans, del); - bch2_trans_iter_put(&trans, dst); - bch2_trans_iter_put(&trans, src); + bch2_trans_iter_exit(&trans, &del); + bch2_trans_iter_exit(&trans, &dst); + bch2_trans_iter_exit(&trans, &src); bch2_trans_exit(&trans); bch2_bkey_buf_exit(©, c); @@ -2658,18 +2658,18 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bpos end_pos = POS(inode->v.i_ino, end_sector); unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS(inode->v.i_ino, start_sector), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - while (!ret && bkey_cmp(iter->pos, end_pos) < 0) { + while (!ret && bkey_cmp(iter.pos, end_pos) < 0) { s64 i_sectors_delta = 0; struct disk_reservation disk_res = { 0 }; struct quota_res quota_res = { 0 }; @@ -2679,20 +2679,20 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, bch2_trans_begin(&trans); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) goto bkey_err; /* already reserved */ if (k.k->type == KEY_TYPE_reservation && bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } if (bkey_extent_is_data(k.k) && !(mode & FALLOC_FL_ZERO_RANGE)) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } @@ -2701,7 +2701,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, reservation.k.p = k.k->p; reservation.k.size = k.k->size; - bch2_cut_front(iter->pos, &reservation.k_i); + bch2_cut_front(iter.pos, &reservation.k_i); bch2_cut_back(end_pos, &reservation.k_i); sectors = reservation.k.size; @@ -2725,7 +2725,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, reservation.v.nr_replicas = disk_res.nr_replicas; } - ret = bch2_extent_update(&trans, iter, &reservation.k_i, + ret = bch2_extent_update(&trans, &iter, &reservation.k_i, &disk_res, &inode->ei_journal_seq, 0, &i_sectors_delta, true); i_sectors_acct(c, inode, "a_res, i_sectors_delta); @@ -2735,7 +2735,7 @@ bkey_err: if (ret == -EINTR) ret = 0; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -3017,7 +3017,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 isize, next_data = MAX_LFS_FILESIZE; int ret; @@ -3038,7 +3038,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) } else if (k.k->p.offset >> 9 > isize) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -3113,7 +3113,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 isize, next_hole = MAX_LFS_FILESIZE; int ret; @@ -3142,7 +3142,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) offset = max(offset, bkey_start_offset(k.k) << 9); } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 631fb87..6cc5687 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -141,7 +141,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, void *p, unsigned fields) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; @@ -149,11 +149,10 @@ int __must_check bch2_write_inode(struct bch_fs *c, retry: bch2_trans_begin(&trans); - iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(iter) ?: + ret = bch2_inode_peek(&trans, &iter, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT) ?: (set ? set(inode, &inode_u, p) : 0) ?: - bch2_inode_write(&trans, iter, &inode_u) ?: + bch2_inode_write(&trans, &iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); @@ -165,7 +164,7 @@ retry: if (!ret) bch2_inode_update_after_write(c, inode, &inode_u, fields); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; @@ -686,7 +685,7 @@ int bch2_setattr_nonsize(struct user_namespace *mnt_userns, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; struct btree_trans trans; - struct btree_iter *inode_iter; + struct btree_iter inode_iter = { NULL }; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; int ret; @@ -712,9 +711,8 @@ retry: kfree(acl); acl = NULL; - inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -726,12 +724,12 @@ retry: goto btree_err; } - ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: + ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); btree_err: - bch2_trans_iter_put(&trans, inode_iter); + bch2_trans_iter_exit(&trans, &inode_iter); if (ret == -EINTR) goto retry; @@ -881,7 +879,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf cur, prev; struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); @@ -900,23 +898,23 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_init(&prev); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - POS(ei->v.i_ino, start >> 9), 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(ei->v.i_ino, start >> 9), 0); retry: bch2_trans_begin(&trans); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k)) && - bkey_cmp(iter->pos, end) < 0) { + bkey_cmp(iter.pos, end) < 0) { enum btree_id data_btree = BTREE_ID_extents; if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } - offset_into_extent = iter->pos.offset - + offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; @@ -937,7 +935,7 @@ retry: offset_into_extent), cur.k); bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter->pos; + cur.k->k.p = iter.pos; cur.k->k.p.offset += cur.k->k.size; if (have_extent) { @@ -950,8 +948,8 @@ retry: bkey_copy(prev.k, cur.k); have_extent = true; - bch2_btree_iter_set_pos(iter, - POS(iter->pos.inode, iter->pos.offset + sectors)); + bch2_btree_iter_set_pos(&iter, + POS(iter.pos.inode, iter.pos.offset + sectors)); } if (ret == -EINTR) @@ -961,7 +959,7 @@ retry: ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 36eba46..eb979e7 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -19,7 +19,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 sectors = 0; int ret; @@ -33,7 +33,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) sectors += k.k->size; } - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret ?: sectors; } @@ -42,24 +42,24 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, struct bch_inode_unpacked *inode, u32 *snapshot) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, - POS(0, inode_nr), 0); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, + POS(0, inode_nr), 0); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; if (snapshot) - *snapshot = iter->pos.snapshot; + *snapshot = iter.pos.snapshot; ret = k.k->type == KEY_TYPE_inode ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode) : -ENOENT; err: - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -74,13 +74,16 @@ static int __write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 snapshot) { - struct btree_iter *inode_iter = - bch2_trans_get_iter(trans, BTREE_ID_inodes, - SPOS(0, inode->bi_inum, snapshot), - BTREE_ITER_INTENT); - int ret = bch2_btree_iter_traverse(inode_iter) ?: - bch2_inode_write(trans, inode_iter, inode); - bch2_trans_iter_put(trans, inode_iter); + struct btree_iter iter; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, + SPOS(0, inode->bi_inum, snapshot), + BTREE_ITER_INTENT); + + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_inode_write(trans, &iter, inode); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -100,7 +103,7 @@ static int write_inode(struct btree_trans *trans, static int __remove_dirent(struct btree_trans *trans, struct bpos pos) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bch_inode_unpacked dir_inode; struct bch_hash_info dir_hash_info; int ret; @@ -111,11 +114,11 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) dir_hash_info = bch2_hash_info_init(c, &dir_inode); - iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - &dir_hash_info, iter); - bch2_trans_iter_put(trans, iter); + &dir_hash_info, &iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -230,13 +233,13 @@ static int reattach_inode(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, - POS(inode->bi_dir, inode->bi_dir_offset), 0); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, + POS(inode->bi_dir, inode->bi_dir_offset), 0); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto out; @@ -247,7 +250,7 @@ static int remove_backpointer(struct btree_trans *trans, ret = remove_dirent(trans, k.k->p); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -343,7 +346,7 @@ static int hash_check_key(struct btree_trans *trans, struct btree_iter *k_iter, struct bkey_s_c hash_k) { struct bch_fs *c = trans->c; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; char buf[200]; struct bkey_s_c k; u64 hash; @@ -378,12 +381,12 @@ static int hash_check_key(struct btree_trans *trans, } if (bkey_deleted(k.k)) { - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); goto bad_hash; } } - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; bad_hash: if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " @@ -513,7 +516,7 @@ noinline_for_stack static int check_inodes(struct bch_fs *c, bool full) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_inode inode; int ret; @@ -532,12 +535,12 @@ static int check_inodes(struct bch_fs *c, bool full) (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| BCH_INODE_I_SECTORS_DIRTY| BCH_INODE_UNLINKED))) { - ret = check_inode(&trans, iter, inode); + ret = check_inode(&trans, &iter, inode); if (ret) break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(ret == -EINTR); @@ -547,7 +550,7 @@ static int check_inodes(struct bch_fs *c, bool full) static int fix_overlapping_extent(struct btree_trans *trans, struct bkey_s_c k, struct bpos cut_at) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i *u; int ret; @@ -567,29 +570,29 @@ static int fix_overlapping_extent(struct btree_trans *trans, * assume things about extent overwrites - we should be running the * triggers manually here */ - iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p, - BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p, + BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); - BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?: + BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } static int inode_backpointer_exists(struct btree_trans *trans, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, - POS(inode->bi_dir, inode->bi_dir_offset), 0); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, + POS(inode->bi_dir, inode->bi_dir_offset), 0); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto out; @@ -598,7 +601,7 @@ static int inode_backpointer_exists(struct btree_trans *trans, ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum; out: - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -618,7 +621,7 @@ static int check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf prev; u64 i_sectors = 0; @@ -630,12 +633,12 @@ static int check_extents(struct bch_fs *c) bch_verbose(c, "checking extents"); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH); retry: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k))) { if (w.have_inode && w.cur_inum != k.k->p.inode && @@ -700,12 +703,12 @@ retry: i_sectors += k.k->size; bch2_bkey_buf_reassemble(&prev, c, k); - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } fsck_err: if (ret == -EINTR) goto retry; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_bkey_buf_exit(&prev, c); return bch2_trans_exit(&trans) ?: ret; } @@ -890,7 +893,7 @@ static int check_dirents(struct bch_fs *c) struct inode_walker w = inode_walker_init(); struct bch_hash_info hash_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; unsigned nr_subdirs = 0; int ret = 0; @@ -898,18 +901,18 @@ static int check_dirents(struct bch_fs *c) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH); do { ret = lockrestart_do(&trans, - check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs)); + check_dirent(&trans, &iter, &hash_info, &w, &nr_subdirs)); if (ret) break; - } while (bch2_btree_iter_advance(iter)); - bch2_trans_iter_put(&trans, iter); + } while (bch2_btree_iter_advance(&iter)); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -923,7 +926,7 @@ static int check_xattrs(struct bch_fs *c) struct inode_walker w = inode_walker_init(); struct bch_hash_info hash_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -931,12 +934,12 @@ static int check_xattrs(struct bch_fs *c) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH); retry: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k))) { ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) @@ -945,7 +948,7 @@ retry: if (fsck_err_on(!w.have_inode, c, "xattr for missing inode %llu", k.k->p.inode)) { - ret = bch2_btree_delete_at(&trans, iter, 0); + ret = bch2_btree_delete_at(&trans, &iter, 0); if (ret) break; continue; @@ -955,17 +958,17 @@ retry: hash_info = bch2_hash_info_init(c, &w.inode); ret = hash_check_key(&trans, bch2_xattr_hash_desc, - &hash_info, iter, k); + &hash_info, &iter, k); if (ret) break; - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } fsck_err: if (ret == -EINTR) goto retry; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -1114,7 +1117,7 @@ fsck_err: static int check_directory_structure(struct bch_fs *c) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked u; struct pathbuf path = { 0, 0, NULL }; @@ -1139,7 +1142,7 @@ static int check_directory_structure(struct bch_fs *c) if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(ret == -EINTR); @@ -1215,7 +1218,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, u64 start, u64 *end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_inode inode; struct bch_inode_unpacked u; @@ -1253,7 +1256,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); if (ret) @@ -1267,7 +1270,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links u64 range_start, u64 range_end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent d; int ret; @@ -1289,7 +1292,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links bch2_trans_cond_resched(&trans); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -1304,7 +1307,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, u64 range_start, u64 range_end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_inode inode; struct bch_inode_unpacked u; @@ -1346,14 +1349,14 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, - bch2_btree_iter_traverse(iter) ?: - bch2_inode_write(&trans, iter, &u)); + bch2_btree_iter_traverse(&iter) ?: + bch2_inode_write(&trans, &iter, &u)); if (ret) bch_err(c, "error in fsck: error %i updating inode", ret); } } fsck_err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); if (ret) diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 3b67108..14b0e8c 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -292,18 +292,18 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, return 0; } -struct btree_iter *bch2_inode_peek(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u64 inum, unsigned flags) +int bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + u64 inum, unsigned flags) { - struct btree_iter *iter; struct bkey_s_c k; int ret; if (trans->c->opts.inodes_use_key_cache) flags |= BTREE_ITER_CACHED; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, inum), flags); + bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, inum), flags); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) @@ -317,10 +317,10 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans, if (ret) goto err; - return iter; + return 0; err: - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); + bch2_trans_iter_exit(trans, iter); + return ret; } int bch2_inode_write(struct btree_trans *trans, @@ -482,12 +482,12 @@ static inline u32 bkey_generation(struct bkey_s_c k) } } -struct btree_iter *bch2_inode_create(struct btree_trans *trans, - struct bch_inode_unpacked *inode_u, - u32 snapshot, u64 cpu) +int bch2_inode_create(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode_u, + u32 snapshot, u64 cpu) { struct bch_fs *c = trans->c; - struct btree_iter *iter = NULL; struct bkey_s_c k; u64 min, max, start, pos, *hint; int ret = 0; @@ -513,9 +513,9 @@ struct btree_iter *bch2_inode_create(struct btree_trans *trans, start = min; pos = start; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, pos), - BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_INTENT); again: while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k)) && @@ -553,8 +553,8 @@ again: ret = -ENOSPC; if (ret) { - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); + bch2_trans_iter_exit(trans, iter); + return ret; } /* Retry from start */ @@ -566,8 +566,8 @@ found_slot: k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) { - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); + bch2_trans_iter_exit(trans, iter); + return ret; } /* We may have raced while the iterator wasn't pointing at pos: */ @@ -578,13 +578,13 @@ found_slot: *hint = k.k->p.offset; inode_u->bi_inum = k.k->p.offset; inode_u->bi_generation = bkey_generation(k); - return iter; + return 0; } int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) { struct btree_trans trans; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; struct bkey_i_inode_generation delete; struct bpos start = POS(inode_nr, 0); struct bpos end = POS(inode_nr + 1, 0); @@ -617,9 +617,9 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) retry: bch2_trans_begin(&trans); - iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, - POS(0, inode_nr), iter_flags); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, + POS(0, inode_nr), iter_flags); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) @@ -636,14 +636,14 @@ retry: bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); bkey_inode_generation_init(&delete.k_i); - delete.k.p = iter->pos; + delete.k.p = iter.pos; delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); - ret = bch2_trans_update(&trans, iter, &delete.k_i, 0) ?: + ret = bch2_trans_update(&trans, &iter, &delete.k_i, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; @@ -654,12 +654,11 @@ err: static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter = { NULL }; int ret; - iter = bch2_inode_peek(trans, inode, inode_nr, 0); - ret = PTR_ERR_OR_ZERO(iter); - bch2_trans_iter_put(trans, iter); + ret = bch2_inode_peek(trans, &iter, inode, inode_nr, 0); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index d67af4f..25bef10 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -57,8 +57,8 @@ int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); -struct btree_iter *bch2_inode_peek(struct btree_trans *, - struct bch_inode_unpacked *, u64, unsigned); +int bch2_inode_peek(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, u64, unsigned); int bch2_inode_write(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *); @@ -71,8 +71,8 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, uid_t, gid_t, umode_t, dev_t, struct bch_inode_unpacked *); -struct btree_iter *bch2_inode_create(struct btree_trans *, - struct bch_inode_unpacked *, u32, u64); +int bch2_inode_create(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, u32, u64); int bch2_inode_rm(struct bch_fs *, u64, bool); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 4585a40..ccde900 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -192,7 +192,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, s64 *disk_sectors_delta) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c old; unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new)); bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new)); @@ -203,7 +203,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, *i_sectors_delta = 0; *disk_sectors_delta = 0; - iter = bch2_trans_copy_iter(trans, extent_iter); + bch2_trans_copy_iter(&iter, extent_iter); for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) { s64 sectors = min(new->k.p.offset, old.k->p.offset) - @@ -236,7 +236,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, * less: */ if (!bkey_cmp(old.k->p, new->k.p)) { - old = bch2_btree_iter_next(iter); + old = bch2_btree_iter_next(&iter); ret = bkey_err(old); if (ret) break; @@ -251,7 +251,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -266,11 +266,22 @@ int bch2_extent_update(struct btree_trans *trans, { /* this must live until after bch2_trans_commit(): */ struct bkey_inode_buf inode_p; + struct bpos next_pos; bool extending = false, usage_increasing; s64 i_sectors_delta = 0, disk_sectors_delta = 0; int ret; - ret = bch2_extent_trim_atomic(k, iter); + /* + * This traverses us the iterator without changing iter->path->pos to + * search_key() (which is pos + 1 for extents): we want there to be a + * path already traversed at iter->pos because + * bch2_trans_extent_update() will use it to attempt extent merging + */ + ret = __bch2_btree_iter_traverse(iter); + if (ret) + return ret; + + ret = bch2_extent_trim_atomic(trans, iter, k); if (ret) return ret; @@ -300,12 +311,11 @@ int bch2_extent_update(struct btree_trans *trans, : 0; if (i_sectors_delta || new_i_size) { - struct btree_iter *inode_iter; + struct btree_iter inode_iter; struct bch_inode_unpacked inode_u; - inode_iter = bch2_inode_peek(trans, &inode_u, + ret = bch2_inode_peek(trans, &inode_iter, &inode_u, k->k.p.inode, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); if (ret) return ret; @@ -334,16 +344,18 @@ int bch2_extent_update(struct btree_trans *trans, inode_p.inode.k.p.snapshot = iter->snapshot; - ret = bch2_trans_update(trans, inode_iter, + ret = bch2_trans_update(trans, &inode_iter, &inode_p.inode.k_i, 0); } - bch2_trans_iter_put(trans, inode_iter); + bch2_trans_iter_exit(trans, &inode_iter); if (ret) return ret; } + next_pos = k->k.p; + ret = bch2_trans_update(trans, iter, k, 0) ?: bch2_trans_commit(trans, disk_res, journal_seq, BTREE_INSERT_NOCHECK_RW| @@ -352,6 +364,8 @@ int bch2_extent_update(struct btree_trans *trans, if (ret) return ret; + bch2_btree_iter_set_pos(iter, next_pos); + if (i_sectors_delta_total) *i_sectors_delta_total += i_sectors_delta; return 0; @@ -409,18 +423,18 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, u64 *journal_seq, s64 *i_sectors_delta) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS(inum, start), BTREE_ITER_INTENT); - ret = bch2_fpunch_at(&trans, iter, POS(inum, end), + ret = bch2_fpunch_at(&trans, &iter, POS(inum, end), journal_seq, i_sectors_delta); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); if (ret == -EINTR) @@ -436,28 +450,28 @@ int bch2_write_index_default(struct bch_write_op *op) struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; int ret; bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - bkey_start_pos(&k->k), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bkey_start_pos(&k->k), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); do { bch2_trans_begin(&trans); k = bch2_keylist_front(keys); - k->k.p.snapshot = iter->snapshot; + k->k.p.snapshot = iter.snapshot; bch2_bkey_buf_realloc(&sk, c, k->k.u64s); bkey_copy(sk.k, k); - bch2_cut_front(iter->pos, sk.k); + bch2_cut_front(iter.pos, sk.k); - ret = bch2_extent_update(&trans, iter, sk.k, + ret = bch2_extent_update(&trans, &iter, sk.k, &op->res, op_journal_seq(op), op->new_i_size, &op->i_sectors_delta, op->flags & BCH_WRITE_CHECK_ENOSPC); @@ -466,11 +480,11 @@ int bch2_write_index_default(struct bch_write_op *op) if (ret) break; - if (bkey_cmp(iter->pos, k->k.p) >= 0) + if (bkey_cmp(iter.pos, k->k.p) >= 0) bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1636,7 +1650,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; int ret; @@ -1647,12 +1661,12 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, rbio->data_btree, - rbio->read_pos, BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, rbio->data_btree, + rbio->read_pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); if (bkey_err(k)) goto err; @@ -1679,7 +1693,7 @@ retry: goto err; out: bch2_rbio_done(rbio); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); return; @@ -1745,7 +1759,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_fs *c = rbio->c; u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; struct bch_extent_crc_unpacked new_crc; - struct btree_iter *iter = NULL; + struct btree_iter iter; struct bkey_i *new; struct bkey_s_c k; int ret = 0; @@ -1753,9 +1767,9 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (crc_is_compressed(rbio->pick.crc)) return 0; - iter = bch2_trans_get_iter(trans, rbio->data_btree, rbio->data_pos, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->data_pos, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) goto out; @@ -1790,9 +1804,9 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (!bch2_bkey_narrow_crcs(new, new_crc)) goto out; - ret = bch2_trans_update(trans, iter, new, 0); + ret = bch2_trans_update(trans, &iter, new, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1963,7 +1977,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, struct bkey_buf *orig_k) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 reflink_offset; int ret; @@ -1971,10 +1985,10 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + *offset_into_extent; - iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, - POS(0, reflink_offset), - BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, + POS(0, reflink_offset), + BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -1991,10 +2005,10 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, goto err; } - *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); bch2_bkey_buf_reassemble(orig_k, trans->c, k); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -2264,7 +2278,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, struct bch_io_failures *failed, unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; int ret; @@ -2273,10 +2287,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - POS(inode, bvec_iter.bi_sector), - BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(inode, bvec_iter.bi_sector), + BTREE_ITER_SLOTS); retry: bch2_trans_begin(&trans); @@ -2293,15 +2306,15 @@ retry: break; } - bch2_btree_iter_set_pos(iter, + bch2_btree_iter_set_pos(&iter, POS(inode, bvec_iter.bi_sector)); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) break; - offset_into_extent = iter->pos.offset - + offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; @@ -2332,7 +2345,7 @@ retry: if (bvec_iter.bi_size == bytes) flags |= BCH_READ_LAST_FRAGMENT; - ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos, + ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos, data_btree, k, offset_into_extent, failed, flags); if (ret) @@ -2348,7 +2361,7 @@ retry: if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) goto retry; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c index f2060f9..68fb2eb 100644 --- a/libbcachefs/journal_seq_blacklist.c +++ b/libbcachefs/journal_seq_blacklist.c @@ -250,7 +250,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work) bch2_trans_init(&trans, c, 0, 0); for (i = 0; i < BTREE_ID_NR; i++) { - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; for_each_btree_node(&trans, iter, i, POS_MIN, @@ -259,7 +259,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work) bch2_trans_exit(&trans); return; } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); } ret = bch2_trans_exit(&trans); diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 1f65eca..1899326 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -39,7 +39,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags enum btree_id btree_id) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf sk; int ret = 0; @@ -47,13 +47,13 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN, + BTREE_ITER_PREFETCH); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k))) { if (!bch2_bkey_has_device(k, dev_idx)) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } @@ -71,10 +71,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags */ bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); + bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k)); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, sk.k, 0) ?: + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); @@ -88,7 +88,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&sk, c); @@ -107,7 +107,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct closure cl; struct btree *b; struct bkey_buf k; @@ -139,9 +139,9 @@ retry: break; } - ret = bch2_btree_node_update_key(&trans, iter, b, k.k, false); + ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, false); if (ret == -EINTR) { - b = bch2_btree_iter_peek_node(iter); + b = bch2_btree_iter_peek_node(&iter); ret = 0; goto retry; } @@ -150,7 +150,7 @@ retry: break; } } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret) goto err; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index ee0f155..fb7c0ab 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -57,7 +57,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) { struct bch_fs *c = op->c; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct migrate_write *m = container_of(op, struct migrate_write, op); struct keylist *keys = &op->insert_keys; @@ -70,9 +70,9 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, m->btree_id, - bkey_start_pos(&bch2_keylist_front(keys)->k), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, m->btree_id, + bkey_start_pos(&bch2_keylist_front(keys)->k), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); while (1) { struct bkey_s_c k; @@ -80,13 +80,14 @@ static int bch2_migrate_index_update(struct bch_write_op *op) struct bkey_i_extent *new; const union bch_extent_entry *entry; struct extent_ptr_decoded p; + struct bpos next_pos; bool did_work = false; bool extending = false, should_check_enospc; s64 i_sectors_delta = 0, disk_sectors_delta = 0; bch2_trans_begin(&trans); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -102,9 +103,9 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); new = bkey_i_to_extent(_new.k); - bch2_cut_front(iter->pos, &new->k_i); + bch2_cut_front(iter.pos, &new->k_i); - bch2_cut_front(iter->pos, insert); + bch2_cut_front(iter.pos, insert); bch2_cut_back(new->k.p, insert); bch2_cut_back(insert->k.p, &new->k_i); @@ -146,7 +147,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) op->opts.background_target, op->opts.data_replicas); - ret = bch2_sum_sector_overwrites(&trans, iter, insert, + ret = bch2_sum_sector_overwrites(&trans, &iter, insert, &extending, &should_check_enospc, &i_sectors_delta, @@ -163,20 +164,24 @@ static int bch2_migrate_index_update(struct bch_write_op *op) goto out; } - ret = bch2_trans_update(&trans, iter, insert, 0) ?: + next_pos = insert->k.p; + + ret = bch2_trans_update(&trans, &iter, insert, 0) ?: bch2_trans_commit(&trans, &op->res, op_journal_seq(op), BTREE_INSERT_NOFAIL| m->data_opts.btree_insert_flags); -err: - if (!ret) + if (!ret) { + bch2_btree_iter_set_pos(&iter, next_pos); atomic_long_inc(&c->extent_migrate_done); + } +err: if (ret == -EINTR) ret = 0; if (ret) break; next: - while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) { + while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) { bch2_keylist_pop_front(keys); if (bch2_keylist_empty(keys)) goto out; @@ -184,18 +189,18 @@ next: continue; nomatch: if (m->ctxt) { - BUG_ON(k.k->p.offset <= iter->pos.offset); + BUG_ON(k.k->p.offset <= iter.pos.offset); atomic64_inc(&m->ctxt->stats->keys_raced); - atomic64_add(k.k->p.offset - iter->pos.offset, + atomic64_add(k.k->p.offset - iter.pos.offset, &m->ctxt->stats->sectors_raced); } atomic_long_inc(&c->extent_migrate_raced); trace_move_race(&new->k); - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); goto next; } out: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&_insert, c); bch2_bkey_buf_exit(&_new, c); @@ -216,11 +221,6 @@ void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio) m->op.crc = rbio->pick.crc; m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; - if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) { - m->op.nonce = m->op.crc.nonce + m->op.crc.offset; - m->op.csum_type = m->op.crc.csum_type; - } - if (m->data_cmd == DATA_REWRITE) bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); } @@ -235,6 +235,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; + struct bch_extent_crc_unpacked crc; struct extent_ptr_decoded p; int ret; @@ -255,6 +256,18 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, m->op.target = data_opts.target, m->op.write_point = wp; + /* + * op->csum_type is normally initialized from the fs/file's current + * options - but if an extent is encrypted, we require that it stays + * encrypted: + */ + bkey_for_each_crc(k.k, ptrs, crc, entry) + if (bch2_csum_type_is_encryption(crc.csum_type)) { + m->op.nonce = crc.nonce + m->op.crc.offset; + m->op.csum_type = crc.csum_type; + break; + } + if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { m->op.alloc_reserve = RESERVE_MOVINGGC; m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; @@ -511,13 +524,13 @@ err: static int lookup_inode(struct btree_trans *trans, struct bpos pos, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, pos, - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos, + BTREE_ITER_ALL_SNAPSHOTS); + k = bch2_btree_iter_peek(&iter); ret = bkey_err(k); if (ret) goto err; @@ -535,7 +548,7 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos, if (ret) goto err; err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -553,7 +566,7 @@ static int __bch2_move_data(struct bch_fs *c, struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct bkey_buf sk; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct data_opts data_opts; enum data_cmd data_cmd; @@ -567,8 +580,8 @@ static int __bch2_move_data(struct bch_fs *c, stats->btree_id = btree_id; stats->pos = start; - iter = bch2_trans_get_iter(&trans, btree_id, start, - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, btree_id, start, + BTREE_ITER_PREFETCH); if (rate) bch2_ratelimit_reset(rate); @@ -599,9 +612,9 @@ static int __bch2_move_data(struct bch_fs *c, bch2_trans_begin(&trans); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); - stats->pos = iter->pos; + stats->pos = iter.pos; if (!k.k) break; @@ -674,18 +687,42 @@ next: atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k), &stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); bch2_trans_cond_resched(&trans); } out: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&sk, c); return ret; } +inline void bch_move_stats_init(struct bch_move_stats *stats, char *name) +{ + memset(stats, 0, sizeof(*stats)); + + scnprintf(stats->name, sizeof(stats->name), + "%s", name); +} + +static inline void progress_list_add(struct bch_fs *c, + struct bch_move_stats *stats) +{ + mutex_lock(&c->data_progress_lock); + list_add(&stats->list, &c->data_progress_list); + mutex_unlock(&c->data_progress_lock); +} + +static inline void progress_list_del(struct bch_fs *c, + struct bch_move_stats *stats) +{ + mutex_lock(&c->data_progress_lock); + list_del(&stats->list); + mutex_unlock(&c->data_progress_lock); +} + int bch2_move_data(struct bch_fs *c, enum btree_id start_btree_id, struct bpos start_pos, enum btree_id end_btree_id, struct bpos end_pos, @@ -698,6 +735,7 @@ int bch2_move_data(struct bch_fs *c, enum btree_id id; int ret; + progress_list_add(c, stats); closure_init_stack(&ctxt.cl); INIT_LIST_HEAD(&ctxt.reads); init_waitqueue_head(&ctxt.wait); @@ -731,6 +769,7 @@ int bch2_move_data(struct bch_fs *c, atomic64_read(&stats->sectors_moved), atomic64_read(&stats->keys_moved)); + progress_list_del(c, stats); return ret; } @@ -747,7 +786,7 @@ static int bch2_move_btree(struct bch_fs *c, bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; enum btree_id id; struct data_opts data_opts; @@ -755,6 +794,7 @@ static int bch2_move_btree(struct bch_fs *c, int ret = 0; bch2_trans_init(&trans, c, 0, 0); + progress_list_add(c, stats); stats->data_type = BCH_DATA_btree; @@ -773,7 +813,7 @@ static int bch2_move_btree(struct bch_fs *c, bpos_cmp(b->key.k.p, end_pos)) > 0) break; - stats->pos = iter->pos; + stats->pos = iter.pos; switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { case DATA_SKIP: @@ -787,13 +827,13 @@ static int bch2_move_btree(struct bch_fs *c, BUG(); } - ret = bch2_btree_node_rewrite(&trans, iter, + ret = bch2_btree_node_rewrite(&trans, &iter, b->data->keys.seq, 0) ?: ret; next: bch2_trans_cond_resched(&trans); } + bch2_trans_iter_exit(&trans, &iter); - ret = bch2_trans_iter_free(&trans, iter) ?: ret; if (kthread && kthread_should_stop()) break; } @@ -803,6 +843,7 @@ next: if (ret) bch_err(c, "error %i in bch2_move_btree", ret); + progress_list_del(c, stats); return ret; } @@ -944,6 +985,7 @@ int bch2_data_job(struct bch_fs *c, switch (op.op) { case BCH_DATA_OP_REREPLICATE: + bch_move_stats_init(stats, "rereplicate"); stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); @@ -968,6 +1010,7 @@ int bch2_data_job(struct bch_fs *c, if (op.migrate.dev >= c->sb.nr_devices) return -EINVAL; + bch_move_stats_init(stats, "migrate"); stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); @@ -985,6 +1028,7 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; break; case BCH_DATA_OP_REWRITE_OLD_NODES: + bch_move_stats_init(stats, "rewrite_old_nodes"); ret = bch2_scan_old_btree_nodes(c, stats); break; default: diff --git a/libbcachefs/move.h b/libbcachefs/move.h index 5076153..2a789a1 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -66,4 +66,8 @@ int bch2_data_job(struct bch_fs *, struct bch_move_stats *, struct bch_ioctl_data); +inline void bch_move_stats_init(struct bch_move_stats *stats, + char *name); + + #endif /* _BCACHEFS_MOVE_H */ diff --git a/libbcachefs/move_types.h b/libbcachefs/move_types.h index fc0de16..9df6d18 100644 --- a/libbcachefs/move_types.h +++ b/libbcachefs/move_types.h @@ -6,6 +6,8 @@ struct bch_move_stats { enum bch_data_type data_type; enum btree_id btree_id; struct bpos pos; + struct list_head list; + char name[32]; atomic64_t keys_moved; atomic64_t keys_raced; diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 2acca0d..5c9eafc 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -85,6 +85,7 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, BUG_ON(i != j); #endif if (i >= 0 && + p.ptr.dev == h->data[i].dev && p.ptr.offset < h->data[i].offset + ca->mi.bucket_size && p.ptr.gen == h->data[i].gen) { /* @@ -146,7 +147,8 @@ static int bch2_copygc(struct bch_fs *c) size_t b, heap_size = 0; int ret; - memset(&move_stats, 0, sizeof(move_stats)); + bch_move_stats_init(&move_stats, "copygc"); + /* * Find buckets with lowest sector counts, skipping completely * empty buckets, by building a maxheap sorted by sector count, diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 003c00f..147b402 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -171,7 +171,7 @@ enum opt_type { x(shard_inode_numbers, u8, \ OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - BCH_SB_SHARD_INUMS, false, \ + BCH_SB_SHARD_INUMS, true, \ NULL, "Shard new inode numbers by CPU id") \ x(inodes_use_key_cache, u8, \ OPT_FORMAT|OPT_MOUNT, \ diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index 7861781..9b0f4d3 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -357,7 +357,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -372,7 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -419,7 +419,7 @@ int bch2_fs_quota_read(struct bch_fs *c) unsigned i, qtypes = enabled_qtypes(c); struct bch_memquota_type *q; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bch_inode_unpacked u; struct bkey_s_c k; int ret; @@ -450,7 +450,7 @@ int bch2_fs_quota_read(struct bch_fs *c) KEY_TYPE_QUOTA_NOCHECK); } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -717,13 +717,13 @@ static int bch2_set_quota_trans(struct btree_trans *trans, struct bkey_i_quota *new_quota, struct qc_dqblk *qdq) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_quotas, new_quota->k.p, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_quotas, new_quota->k.p, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (unlikely(ret)) @@ -742,8 +742,8 @@ static int bch2_set_quota_trans(struct btree_trans *trans, if (qdq->d_fieldmask & QC_INO_HARD) new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); - ret = bch2_trans_update(trans, iter, &new_quota->k_i, 0); - bch2_trans_iter_put(trans, iter); + ret = bch2_trans_update(trans, &iter, &new_quota->k_i, 0); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index a0dbf41..a573fed 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -166,6 +166,7 @@ static int bch2_rebalance_thread(void *arg) struct bch_fs_rebalance *r = &c->rebalance; struct io_clock *clock = &c->io_clock[WRITE]; struct rebalance_work w, p; + struct bch_move_stats move_stats; unsigned long start, prev_start; unsigned long prev_run_time, prev_run_cputime; unsigned long cputime, prev_cputime; @@ -179,6 +180,7 @@ static int bch2_rebalance_thread(void *arg) prev_start = jiffies; prev_cputime = curr_cputime(); + bch_move_stats_init(&move_stats, "rebalance"); while (!kthread_wait_freezable(r->enabled)) { cond_resched(); @@ -235,7 +237,7 @@ static int bch2_rebalance_thread(void *arg) prev_cputime = cputime; r->state = REBALANCE_RUNNING; - memset(&r->move_stats, 0, sizeof(r->move_stats)); + memset(&move_stats, 0, sizeof(move_stats)); rebalance_work_reset(c); bch2_move_data(c, @@ -245,7 +247,7 @@ static int bch2_rebalance_thread(void *arg) NULL, /* &r->pd.rate, */ writepoint_ptr(&c->rebalance_write_point), rebalance_pred, NULL, - &r->move_stats); + &move_stats); } return 0; @@ -281,10 +283,7 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) h1); break; case REBALANCE_RUNNING: - pr_buf(out, "running\n" - "pos "); - bch2_bpos_to_text(out, r->move_stats.pos); - pr_buf(out, "\n"); + pr_buf(out, "running\n"); break; } } diff --git a/libbcachefs/rebalance_types.h b/libbcachefs/rebalance_types.h index 2f62a64..7462a92 100644 --- a/libbcachefs/rebalance_types.h +++ b/libbcachefs/rebalance_types.h @@ -19,7 +19,6 @@ struct bch_fs_rebalance { enum rebalance_state state; u64 throttled_until_iotime; unsigned long throttled_until_cputime; - struct bch_move_stats move_stats; unsigned enabled:1; }; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index afb7264..11208e8 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -326,7 +326,7 @@ static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b, (k = bch2_btree_and_journal_iter_peek(&iter)).k) { bch2_bkey_buf_reassemble(&tmp, c, k); - bch2_btree_node_prefetch(c, NULL, tmp.k, + bch2_btree_node_prefetch(c, NULL, NULL, tmp.k, b->c.btree_id, b->c.level - 1); bch2_btree_and_journal_iter_advance(&iter); @@ -518,16 +518,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, enum btree_id id, unsigned level, struct bkey_i *k) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_node_iter(trans, id, k->k.p, - BTREE_MAX_DEPTH, level, - BTREE_ITER_INTENT| - BTREE_ITER_NOT_EXTENTS); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); - bch2_trans_iter_put(trans, iter); + bch2_trans_node_iter_init(trans, &iter, id, k->k.p, + BTREE_MAX_DEPTH, level, + BTREE_ITER_INTENT| + BTREE_ITER_NOT_EXTENTS); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -545,16 +545,16 @@ static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k) static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, k->k.p, - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, k->k.p, + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1216,7 +1216,9 @@ use_clean: if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) || !(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) { - struct bch_move_stats stats = { 0 }; + struct bch_move_stats stats; + + bch_move_stats_init(&stats, "recovery"); bch_info(c, "scanning for old btree nodes"); ret = bch2_fs_read_write(c); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 3d9c5c5..576cfbc 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -116,7 +116,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, struct bkey_i *orig) { struct bch_fs *c = trans->c; - struct btree_iter *reflink_iter; + struct btree_iter reflink_iter = { NULL }; struct bkey_s_c k; struct bkey_i *r_v; struct bkey_i_reflink_p *r_p; @@ -129,8 +129,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, for_each_btree_key(trans, reflink_iter, BTREE_ID_reflink, POS(0, c->reflink_hint), BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) { - if (reflink_iter->pos.inode) { - bch2_btree_iter_set_pos(reflink_iter, POS_MIN); + if (reflink_iter.pos.inode) { + bch2_btree_iter_set_pos(&reflink_iter, POS_MIN); continue; } @@ -142,7 +142,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, goto err; /* rewind iter to start of hole, if necessary: */ - bch2_btree_iter_set_pos_to_extent_start(reflink_iter); + bch2_btree_iter_set_pos_to_extent_start(&reflink_iter); r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); @@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bkey_init(&r_v->k); r_v->k.type = bkey_type_to_indirect(&orig->k); - r_v->k.p = reflink_iter->pos; + r_v->k.p = reflink_iter.pos; bch2_key_resize(&r_v->k, orig->k.size); r_v->k.version = orig->k.version; @@ -161,7 +161,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); - ret = bch2_trans_update(trans, reflink_iter, r_v, 0); + ret = bch2_trans_update(trans, &reflink_iter, r_v, 0); if (ret) goto err; @@ -172,9 +172,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, 0); err: - if (!IS_ERR(reflink_iter)) - c->reflink_hint = reflink_iter->pos.offset; - bch2_trans_iter_put(trans, reflink_iter); + c->reflink_hint = reflink_iter.pos.offset; + bch2_trans_iter_exit(trans, &reflink_iter); return ret; } @@ -184,7 +183,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) struct bkey_s_c k; int ret; - for_each_btree_key_continue(iter, 0, k, ret) { + for_each_btree_key_continue(*iter, 0, k, ret) { if (bkey_cmp(iter->pos, end) >= 0) break; @@ -203,7 +202,7 @@ s64 bch2_remap_range(struct bch_fs *c, u64 new_i_size, s64 *i_sectors_delta) { struct btree_trans trans; - struct btree_iter *dst_iter, *src_iter; + struct btree_iter dst_iter, src_iter; struct bkey_s_c src_k; struct bkey_buf new_dst, new_src; struct bpos dst_end = dst_start, src_end = src_start; @@ -223,13 +222,13 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); - src_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, src_start, - BTREE_ITER_INTENT); - dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start, - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &src_iter, BTREE_ID_extents, src_start, + BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start, + BTREE_ITER_INTENT); while ((ret == 0 || ret == -EINTR) && - bkey_cmp(dst_iter->pos, dst_end) < 0) { + bkey_cmp(dst_iter.pos, dst_end) < 0) { struct disk_reservation disk_res = { 0 }; bch2_trans_begin(&trans); @@ -239,31 +238,31 @@ s64 bch2_remap_range(struct bch_fs *c, break; } - dst_done = dst_iter->pos.offset - dst_start.offset; + dst_done = dst_iter.pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); - bch2_btree_iter_set_pos(src_iter, src_want); + bch2_btree_iter_set_pos(&src_iter, src_want); - src_k = get_next_src(src_iter, src_end); + src_k = get_next_src(&src_iter, src_end); ret = bkey_err(src_k); if (ret) continue; - if (bkey_cmp(src_want, src_iter->pos) < 0) { - ret = bch2_fpunch_at(&trans, dst_iter, + if (bkey_cmp(src_want, src_iter.pos) < 0) { + ret = bch2_fpunch_at(&trans, &dst_iter, bpos_min(dst_end, - POS(dst_iter->pos.inode, dst_iter->pos.offset + - src_iter->pos.offset - src_want.offset)), + POS(dst_iter.pos.inode, dst_iter.pos.offset + + src_iter.pos.offset - src_want.offset)), journal_seq, i_sectors_delta); continue; } if (src_k.k->type != KEY_TYPE_reflink_p) { - bch2_btree_iter_set_pos_to_extent_start(src_iter); + bch2_btree_iter_set_pos_to_extent_start(&src_iter); bch2_bkey_buf_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); - ret = bch2_make_extent_indirect(&trans, src_iter, + ret = bch2_make_extent_indirect(&trans, &src_iter, new_src.k); if (ret) continue; @@ -286,43 +285,42 @@ s64 bch2_remap_range(struct bch_fs *c, BUG(); } - new_dst.k->k.p = dst_iter->pos; + new_dst.k->k.p = dst_iter.pos; bch2_key_resize(&new_dst.k->k, min(src_k.k->p.offset - src_want.offset, - dst_end.offset - dst_iter->pos.offset)); - ret = bch2_extent_update(&trans, dst_iter, new_dst.k, + dst_end.offset - dst_iter.pos.offset)); + ret = bch2_extent_update(&trans, &dst_iter, new_dst.k, &disk_res, journal_seq, new_i_size, i_sectors_delta, true); bch2_disk_reservation_put(c, &disk_res); } - bch2_trans_iter_put(&trans, dst_iter); - bch2_trans_iter_put(&trans, src_iter); + bch2_trans_iter_exit(&trans, &dst_iter); + bch2_trans_iter_exit(&trans, &src_iter); - BUG_ON(!ret && bkey_cmp(dst_iter->pos, dst_end)); - BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0); + BUG_ON(!ret && bkey_cmp(dst_iter.pos, dst_end)); + BUG_ON(bkey_cmp(dst_iter.pos, dst_end) > 0); - dst_done = dst_iter->pos.offset - dst_start.offset; - new_i_size = min(dst_iter->pos.offset << 9, new_i_size); + dst_done = dst_iter.pos.offset - dst_start.offset; + new_i_size = min(dst_iter.pos.offset << 9, new_i_size); do { struct bch_inode_unpacked inode_u; - struct btree_iter *inode_iter; + struct btree_iter inode_iter = { NULL }; bch2_trans_begin(&trans); - inode_iter = bch2_inode_peek(&trans, &inode_u, + ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u, dst_start.inode, BTREE_ITER_INTENT); - ret2 = PTR_ERR_OR_ZERO(inode_iter); if (!ret2 && inode_u.bi_size < new_i_size) { inode_u.bi_size = new_i_size; - ret2 = bch2_inode_write(&trans, inode_iter, &inode_u) ?: + ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, journal_seq, 0); } - bch2_trans_iter_put(&trans, inode_iter); + bch2_trans_iter_exit(&trans, &inode_iter); } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 2360234..c6a132b 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -139,18 +139,18 @@ struct bch_hash_desc { bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c); }; -static __always_inline struct btree_iter * +static __always_inline int bch2_hash_lookup(struct btree_trans *trans, + struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, u64 inode, const void *key, unsigned flags) { - struct btree_iter *iter; struct bkey_s_c k; int ret; - for_each_btree_key(trans, iter, desc.btree_id, + for_each_btree_key(trans, *iter, desc.btree_id, POS(inode, desc.hash_key(info, key)), BTREE_ITER_SLOTS|flags, k, ret) { if (iter->pos.inode != inode) @@ -158,7 +158,7 @@ bch2_hash_lookup(struct btree_trans *trans, if (k.k->type == desc.key_type) { if (!desc.cmp_key(k, key)) - return iter; + return 0; } else if (k.k->type == KEY_TYPE_hash_whiteout) { ; } else { @@ -166,35 +166,33 @@ bch2_hash_lookup(struct btree_trans *trans, break; } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, iter); - return ERR_PTR(ret ?: -ENOENT); + return ret ?: -ENOENT; } -static __always_inline struct btree_iter * +static __always_inline int bch2_hash_hole(struct btree_trans *trans, + struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, u64 inode, const void *key) { - struct btree_iter *iter; struct bkey_s_c k; int ret; - for_each_btree_key(trans, iter, desc.btree_id, + for_each_btree_key(trans, *iter, desc.btree_id, POS(inode, desc.hash_key(info, key)), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (iter->pos.inode != inode) break; if (k.k->type != desc.key_type) - return iter; + return 0; } + bch2_trans_iter_exit(trans, iter); - iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; - bch2_trans_iter_put(trans, iter); - - return ERR_PTR(ret ?: -ENOSPC); + return ret ?: -ENOSPC; } static __always_inline @@ -203,13 +201,13 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, const struct bch_hash_info *info, struct btree_iter *start) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_copy_iter(trans, start); + bch2_trans_copy_iter(&iter, start); - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) { if (k.k->type != desc.key_type && @@ -218,13 +216,12 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, if (k.k->type == desc.key_type && desc.hash_bkey(info, k) <= start->pos.offset) { - iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; ret = 1; break; } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -234,7 +231,7 @@ int bch2_hash_set(struct btree_trans *trans, const struct bch_hash_info *info, u64 inode, struct bkey_i *insert, int flags) { - struct btree_iter *iter, *slot = NULL; + struct btree_iter iter, slot = { NULL }; struct bkey_s_c k; bool found = false; int ret; @@ -242,7 +239,7 @@ int bch2_hash_set(struct btree_trans *trans, for_each_btree_key(trans, iter, desc.btree_id, POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter->pos.inode != inode) + if (iter.pos.inode != inode) break; if (k.k->type == desc.key_type) { @@ -253,9 +250,9 @@ int bch2_hash_set(struct btree_trans *trans, continue; } - if (!slot && + if (!slot.path && !(flags & BCH_HASH_SET_MUST_REPLACE)) - slot = bch2_trans_copy_iter(trans, iter); + bch2_trans_copy_iter(&slot, &iter); if (k.k->type != KEY_TYPE_hash_whiteout) goto not_found; @@ -264,8 +261,8 @@ int bch2_hash_set(struct btree_trans *trans, if (!ret) ret = -ENOSPC; out: - bch2_trans_iter_put(trans, slot); - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &slot); + bch2_trans_iter_exit(trans, &iter); return ret; found: @@ -277,11 +274,11 @@ not_found: } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { ret = -EEXIST; } else { - if (!found && slot) + if (!found && slot.path) swap(iter, slot); - insert->k.p = iter->pos; - ret = bch2_trans_update(trans, iter, insert, 0); + insert->k.p = iter.pos; + ret = bch2_trans_update(trans, &iter, insert, 0); } goto out; @@ -318,16 +315,16 @@ int bch2_hash_delete(struct btree_trans *trans, const struct bch_hash_info *info, u64 inode, const void *key) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_hash_lookup(trans, desc, info, inode, key, + ret = bch2_hash_lookup(trans, &iter, desc, info, inode, key, BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter); + if (ret) + return ret; - ret = bch2_hash_delete_at(trans, desc, info, iter); - bch2_trans_iter_put(trans, iter); + ret = bch2_hash_delete_at(trans, desc, info, &iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index ce8e5d4..8f84766 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -486,12 +486,12 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_journal_entries_free(&c->journal_entries); percpu_free_rwsem(&c->mark_lock); - if (c->btree_iters_bufs) + if (c->btree_paths_bufs) for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(c->btree_iters_bufs, cpu)->iter); + kfree(per_cpu_ptr(c->btree_paths_bufs, cpu)->path); free_percpu(c->online_reserved); - free_percpu(c->btree_iters_bufs); + free_percpu(c->btree_paths_bufs); free_percpu(c->pcpu); mempool_exit(&c->large_bkey_pool); mempool_exit(&c->btree_bounce_pool); @@ -704,6 +704,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->ec_stripe_new_list); mutex_init(&c->ec_stripe_new_lock); + INIT_LIST_HEAD(&c->data_progress_list); + mutex_init(&c->data_progress_lock); + spin_lock_init(&c->ec_stripes_heap_lock); seqcount_init(&c->gc_pos_lock); @@ -771,7 +774,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) offsetof(struct btree_write_bio, wbio.bio)), BIOSET_NEED_BVECS) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || - !(c->btree_iters_bufs = alloc_percpu(struct btree_iter_buf)) || + !(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) || !(c->online_reserved = alloc_percpu(u64)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 9b1ffbf..92e58f5 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -203,6 +203,8 @@ read_attribute(new_stripes); read_attribute(io_timers_read); read_attribute(io_timers_write); +read_attribute(data_op_data_progress); + #ifdef CONFIG_BCACHEFS_TESTS write_attribute(perf_test); #endif /* CONFIG_BCACHEFS_TESTS */ @@ -239,6 +241,37 @@ static size_t bch2_btree_avg_write_size(struct bch_fs *c) return nr ? div64_u64(sectors, nr) : 0; } +static long stats_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_move_stats *stats) +{ + pr_buf(out, "%s: data type %s btree_id %s position: ", + stats->name, + bch2_data_types[stats->data_type], + bch2_btree_ids[stats->btree_id]); + bch2_bpos_to_text(out, stats->pos); + pr_buf(out, "%s", "\n"); + + return 0; +} + +static long data_progress_to_text(struct printbuf *out, struct bch_fs *c) +{ + long ret = 0; + struct bch_move_stats *iter; + + mutex_lock(&c->data_progress_lock); + + if (list_empty(&c->data_progress_list)) + pr_buf(out, "%s", "no progress to report\n"); + else + list_for_each_entry(iter, &c->data_progress_list, list) { + stats_to_text(out, c, iter); + } + + mutex_unlock(&c->data_progress_lock); + return ret; +} + static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) { struct bch_fs_usage_online *fs_usage = bch2_fs_usage_read(c); @@ -257,7 +290,7 @@ static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0, nr_compressed_extents = 0, @@ -292,6 +325,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c break; } } + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -434,6 +468,11 @@ SHOW(bch2_fs) return out.pos - buf; } + if (attr == &sysfs_data_op_data_progress) { + data_progress_to_text(&out, c); + return out.pos - buf; + } + return 0; } @@ -596,6 +635,8 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_io_timers_read, &sysfs_io_timers_write, + &sysfs_data_op_data_progress, + &sysfs_internal_uuid, NULL }; diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index 4d8d50f..d5a74f4 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -29,7 +29,7 @@ static void delete_test_keys(struct bch_fs *c) static int test_delete(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -37,13 +37,12 @@ static int test_delete(struct bch_fs *c, u64 nr) k.k.p.snapshot = U32_MAX; bch2_trans_init(&trans, c, 0, 0); - - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p, - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, + BTREE_ITER_INTENT); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &k.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { bch_err(c, "update error in test_delete: %i", ret); goto err; @@ -51,8 +50,8 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting once"); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_btree_delete_at(&trans, iter, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { bch_err(c, "delete error (first) in test_delete: %i", ret); goto err; @@ -60,14 +59,14 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting twice"); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_btree_delete_at(&trans, iter, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { bch_err(c, "delete error (second) in test_delete: %i", ret); goto err; } err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -75,7 +74,7 @@ err: static int test_delete_written(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -84,12 +83,12 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p, - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, + BTREE_ITER_INTENT); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &k.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { bch_err(c, "update error in test_delete_written: %i", ret); goto err; @@ -99,14 +98,14 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_journal_flush_all_pins(&c->journal); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_btree_delete_at(&trans, iter, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { bch_err(c, "delete error in test_delete_written: %i", ret); goto err; } err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -114,7 +113,7 @@ err: static int test_iterate(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -156,12 +155,12 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) BUG_ON(k.k->p.offset != --i); BUG_ON(i); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -169,7 +168,7 @@ err: static int test_iterate_extents(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -210,14 +209,14 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) { + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) { BUG_ON(k.k->p.offset != i); i = bkey_start_offset(k.k); } BUG_ON(i); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -225,7 +224,7 @@ err: static int test_iterate_slots(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -263,7 +262,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) BUG_ON(k.k->p.offset != i); i += 2; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(i != nr * 2); @@ -280,7 +279,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) if (i == nr * 2) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); err: bch2_trans_exit(&trans); return ret; @@ -289,7 +288,7 @@ err: static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -326,7 +325,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) BUG_ON(k.k->size != 8); i += 16; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(i != nr); @@ -345,7 +344,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) if (i == nr) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); err: bch2_trans_exit(&trans); return 0; @@ -358,21 +357,19 @@ err: static int test_peek_end(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; bch2_trans_init(&trans, c, 0, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); - - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - bch2_trans_iter_put(&trans, iter); - + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return 0; } @@ -380,21 +377,19 @@ static int test_peek_end(struct bch_fs *c, u64 nr) static int test_peek_end_extents(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; bch2_trans_init(&trans, c, 0, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, 0); - - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - bch2_trans_iter_put(&trans, iter); - + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return 0; } @@ -540,18 +535,18 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) static int rand_lookup(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; u64 i; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0); for (i = 0; i < nr; i++) { - bch2_btree_iter_set_pos(iter, POS(0, test_rand())); + bch2_btree_iter_set_pos(&iter, POS(0, test_rand())); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); ret = bkey_err(k); if (ret) { bch_err(c, "error in rand_lookup: %i", ret); @@ -559,63 +554,73 @@ static int rand_lookup(struct bch_fs *c, u64 nr) } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } +static int rand_mixed_trans(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i_cookie *cookie, + u64 i, u64 pos) +{ + struct bkey_s_c k; + int ret; + + bch2_btree_iter_set_pos(iter, POS(0, pos)); + + k = bch2_btree_iter_peek(iter); + ret = bkey_err(k); + if (ret && ret != -EINTR) + bch_err(trans->c, "lookup error in rand_mixed: %i", ret); + if (ret) + return ret; + + if (!(i & 3) && k.k) { + bkey_cookie_init(&cookie->k_i); + cookie->k.p = iter->pos; + bch2_trans_update(trans, iter, &cookie->k_i, 0); + } + + return 0; +} + static int rand_mixed(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; + struct btree_iter iter; + struct bkey_i_cookie cookie; int ret = 0; - u64 i; + u64 i, rand; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0); for (i = 0; i < nr; i++) { - bch2_btree_iter_set_pos(iter, POS(0, test_rand())); - - k = bch2_btree_iter_peek(iter); - ret = bkey_err(k); + rand = test_rand(); + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + rand_mixed_trans(&trans, &iter, &cookie, i, rand)); if (ret) { - bch_err(c, "lookup error in rand_mixed: %i", ret); + bch_err(c, "update error in rand_mixed: %i", ret); break; } - - if (!(i & 3) && k.k) { - struct bkey_i_cookie k; - - bkey_cookie_init(&k.k_i); - k.k.p = iter->pos; - - ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &k.k_i, 0)); - if (ret) { - bch_err(c, "update error in rand_mixed: %i", ret); - break; - } - } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } static int __do_delete(struct btree_trans *trans, struct bpos pos) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i delete; struct bkey_s_c k; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_xattrs, pos, - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek(&iter); ret = bkey_err(k); if (ret) goto err; @@ -626,9 +631,9 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bkey_init(&delete.k); delete.k.p = k.k->p; - ret = bch2_trans_update(trans, iter, &delete, 0); + ret = bch2_trans_update(trans, &iter, &delete, 0); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -658,7 +663,7 @@ static int rand_delete(struct bch_fs *c, u64 nr) static int seq_insert(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_i_cookie insert; int ret = 0; @@ -670,11 +675,11 @@ static int seq_insert(struct bch_fs *c, u64 nr) for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - insert.k.p = iter->pos; + insert.k.p = iter.pos; ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &insert.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &insert.k_i, 0)); if (ret) { bch_err(c, "error in seq_insert: %i", ret); break; @@ -683,7 +688,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) if (++i == nr) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; @@ -692,7 +697,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) static int seq_lookup(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -700,7 +705,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr) for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret) ; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; @@ -709,7 +714,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr) static int seq_overwrite(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -722,14 +727,14 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &u.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &u.k_i, 0)); if (ret) { bch_err(c, "error in seq_overwrite: %i", ret); break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; @@ -778,7 +783,7 @@ static int btree_perf_test_thread(void *data) wait_event(j->ready_wait, !atomic_read(&j->ready)); } - ret = j->fn(j->c, j->nr / j->nr_threads); + ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads)); if (ret) j->ret = ret; @@ -854,11 +859,11 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, scnprintf(name_buf, sizeof(name_buf), "%s:", testname); bch2_hprint(&PBUF(nr_buf), nr); - bch2_hprint(&PBUF(per_sec_buf), nr * NSEC_PER_SEC / time); + bch2_hprint(&PBUF(per_sec_buf), div64_u64(nr * NSEC_PER_SEC, time)); printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n", name_buf, nr_buf, nr_threads, - time / NSEC_PER_SEC, - time * nr_threads / nr, + div_u64(time, NSEC_PER_SEC), + div_u64(time * nr_threads, nr), per_sec_buf); return j.ret; } diff --git a/libbcachefs/varint.c b/libbcachefs/varint.c index e6a0415..752179b 100644 --- a/libbcachefs/varint.c +++ b/libbcachefs/varint.c @@ -96,7 +96,7 @@ int bch2_varint_encode_fast(u8 *out, u64 v) int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out) { u64 v = get_unaligned_le64(in); - unsigned bytes = ffz(v & 255) + 1; + unsigned bytes = ffz(*in) + 1; if (unlikely(in + bytes > end)) return -1; diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index e4d400b..ef6ae97 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -122,23 +122,22 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info const char *name, void *buffer, size_t size, int type) { struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode); - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c_xattr xattr; struct bkey_s_c k; int ret; - iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash, - inode->v.i_ino, - &X_SEARCH(type, name, strlen(name)), - 0); - ret = PTR_ERR_OR_ZERO(iter); + ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, + inode->v.i_ino, + &X_SEARCH(type, name, strlen(name)), + 0); if (ret) - goto err; + goto err1; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) - goto err; + goto err2; xattr = bkey_s_c_to_xattr(k); ret = le16_to_cpu(xattr.v->x_val_len); @@ -148,8 +147,9 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info else memcpy(buffer, xattr_val(xattr.v), ret); } - bch2_trans_iter_put(trans, iter); -err: +err2: + bch2_trans_iter_exit(trans, &iter); +err1: return ret == -ENOENT ? -ENODATA : ret; } @@ -279,7 +279,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct bch_fs *c = dentry->d_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; u64 inum = dentry->d_inode->i_ino; @@ -301,7 +301,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; -- 2.39.2