]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to d868a87c67 bcachefs: fix initial gc
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 28 Mar 2019 09:21:24 +0000 (05:21 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Thu, 28 Mar 2019 09:28:38 +0000 (05:28 -0400)
40 files changed:
.bcachefs_revision
cmd_debug.c
include/linux/bitops.h
include/linux/kernel.h
include/linux/printk.h
libbcachefs/alloc_background.c
libbcachefs/bcachefs.h
libbcachefs/bset.c
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/chardev.c
libbcachefs/debug.c
libbcachefs/dirent.c
libbcachefs/ec.c
libbcachefs/error.c
libbcachefs/extents.c
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/fsck.h
libbcachefs/inode.c
libbcachefs/io.c
libbcachefs/journal_seq_blacklist.c
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/move_types.h
libbcachefs/quota.c
libbcachefs/rebalance.c
libbcachefs/str_hash.h
libbcachefs/sysfs.c
libbcachefs/tests.c
libbcachefs/xattr.c

index 9fff2db3aa6298c3aca14585da6175caa678f7e7..e42a96d6705095f27ed1c7067fc58cb12b431824 100644 (file)
@@ -1 +1 @@
-ffe09df1065dd1b326913b21381ed1ad35ab8ef9
+d868a87c678935c89df9bca63d708d616529b0d2
index 72eccd8048853ef7bdd727ac45179d269243848a..5da97daa4ac139031af4ecc8516a7fbf9efe08af 100644 (file)
@@ -59,10 +59,13 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
        /* Btree: */
        for (i = 0; i < BTREE_ID_NR; i++) {
                const struct bch_extent_ptr *ptr;
-               struct btree_iter iter;
+               struct btree_trans trans;
+               struct btree_iter *iter;
                struct btree *b;
 
-               for_each_btree_node(&iter, c, i, POS_MIN, 0, b) {
+               bch2_trans_init(&trans, c);
+
+               for_each_btree_node(&trans, iter, i, POS_MIN, 0, b) {
                        struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
 
                        extent_for_each_ptr(e, ptr)
@@ -71,7 +74,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
                                                  ptr->offset << 9,
                                                  b->written << 9);
                }
-               bch2_btree_iter_unlock(&iter);
+               bch2_trans_exit(&trans);
        }
 
        qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data,
@@ -151,11 +154,14 @@ int cmd_dump(int argc, char *argv[])
 static void list_keys(struct bch_fs *c, enum btree_id btree_id,
                      struct bpos start, struct bpos end)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        char buf[512];
 
-       for_each_btree_key(&iter, c, btree_id, start,
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, btree_id, start,
                           BTREE_ITER_PREFETCH, k) {
                if (bkey_cmp(k.k->p, end) > 0)
                        break;
@@ -163,37 +169,43 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
                bch2_bkey_val_to_text(&PBUF(buf), c, k);
                puts(buf);
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 }
 
 static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id,
                               struct bpos start, struct bpos end)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct btree *b;
        char buf[4096];
 
-       for_each_btree_node(&iter, c, btree_id, start, 0, b) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
                if (bkey_cmp(b->key.k.p, end) > 0)
                        break;
 
                bch2_btree_node_to_text(&PBUF(buf), c, b);
                puts(buf);
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 }
 
 static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
                            struct bpos start, struct bpos end)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct btree_node_iter node_iter;
        struct bkey unpacked;
        struct bkey_s_c k;
        struct btree *b;
        char buf[4096];
 
-       for_each_btree_node(&iter, c, btree_id, start, 0, b) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
                if (bkey_cmp(b->key.k.p, end) > 0)
                        break;
 
@@ -206,7 +218,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
                        puts(buf);
                }
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 }
 
 static struct bpos parse_pos(char *buf)
index dc2927b3c1ea3c74625802b83c89ef6b7bb6598a..f2183d5430ba49affa395b20b0a6acf3c444b2c4 100644 (file)
@@ -120,6 +120,12 @@ static inline unsigned long hweight_long(unsigned long w)
        return __builtin_popcountl(w);
 }
 
+static inline unsigned long hweight64(u64 w)
+{
+       return __builtin_popcount((u32) w) +
+              __builtin_popcount(w >> 32);
+}
+
 static inline unsigned long hweight8(unsigned long w)
 {
        return __builtin_popcountl(w);
index 9dc0fef1b89c9412b3b03d93048e90242fb46581..10d94c5eca5f453303ae368f78f9f7622ffaa490 100644 (file)
@@ -217,4 +217,6 @@ struct qstr {
 
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
 
+#define POISON_FREE 0x6b
+
 #endif
index 8f8dd6b9352caec26d9b43b7c779272c6599e6c6..bc1619f7cbf765e184d4d80a2cd83a876c4b68e1 100644 (file)
@@ -45,6 +45,7 @@ static inline int scnprintf(char * buf, size_t size, const char * fmt, ...)
 }
 
 #define printk(...)    printf(__VA_ARGS__)
+#define vprintk(...)   vprintf(__VA_ARGS__)
 
 #define no_printk(fmt, ...)                            \
 ({                                                     \
index 1a40ac21891c0e355419c10979d8d47ce0f6eff6..18afef2e4a810cb2153b5ee81a888b2f309a39b7 100644 (file)
@@ -263,18 +263,21 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
 int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
 {
        struct journal_replay *r;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        struct bch_dev *ca;
        unsigned i;
        int ret;
 
-       for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS_MIN, 0, k) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k) {
                bch2_alloc_read_key(c, k);
-               bch2_btree_iter_cond_resched(&iter);
+               bch2_trans_cond_resched(&trans);
        }
 
-       ret = bch2_btree_iter_unlock(&iter);
+       ret = bch2_trans_exit(&trans);
        if (ret)
                return ret;
 
@@ -390,8 +393,6 @@ static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca,
        __alloc_write_key(a, g, m);
        percpu_up_read_preempt_enable(&c->mark_lock);
 
-       bch2_btree_iter_cond_resched(iter);
-
        bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
 
        ret = bch2_trans_commit(trans, NULL, journal_seq,
@@ -449,6 +450,7 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
                        if (ret)
                                break;
 
+                       bch2_trans_cond_resched(&trans);
                        *wrote = true;
                }
                up_read(&ca->bucket_lock);
@@ -937,14 +939,12 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
        spin_unlock(&c->freelist_lock);
        percpu_up_read_preempt_enable(&c->mark_lock);
 
-       bch2_btree_iter_cond_resched(iter);
-
        BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
 
        bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
 retry:
        k = bch2_btree_iter_peek_slot(iter);
-       ret = btree_iter_err(k);
+       ret = bkey_err(k);
        if (ret)
                return ret;
 
index ac90d8aa0131f2345b38d3ca0a3ec7a39a9fc3ae..9ee06e585073500f2e2392b5a6d4f9a1dece6f5c 100644 (file)
@@ -254,6 +254,8 @@ do {                                                                        \
        BCH_DEBUG_PARAM(expensive_debug_checks,                         \
                "Enables various runtime debugging checks that "        \
                "significantly affect performance")                     \
+       BCH_DEBUG_PARAM(debug_check_iterators,                          \
+               "Enables extra verification for btree iterators")       \
        BCH_DEBUG_PARAM(debug_check_bkeys,                              \
                "Run bkey_debugcheck (primarily checking GC/allocation "\
                "information) when iterating over keys")                \
index 7e572f5ffd706c7697a87d3d261fef1ff1fdffba..4d182518abe68a2eed5f90a89a62dbe439a893d6 100644 (file)
@@ -1040,7 +1040,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
                k = p;
        }
 
-       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+       if (btree_keys_expensive_checks(b)) {
                BUG_ON(ret >= orig_k);
 
                for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t);
@@ -1661,10 +1661,11 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
 void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
                                  struct btree *b)
 {
-#ifdef CONFIG_BCACHEFS_DEBUG
-       bch2_btree_node_iter_verify(iter, b);
-       bch2_btree_node_iter_next_check(iter, b);
-#endif
+       if (btree_keys_expensive_checks(b)) {
+               bch2_btree_node_iter_verify(iter, b);
+               bch2_btree_node_iter_next_check(iter, b);
+       }
+
        __bch2_btree_node_iter_advance(iter, b);
 }
 
@@ -1727,7 +1728,7 @@ found:
        iter->data[0].k = __btree_node_key_to_offset(b, prev);
        iter->data[0].end = end;
 out:
-       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+       if (btree_keys_expensive_checks(b)) {
                struct btree_node_iter iter2 = *iter;
 
                if (prev)
index f77dc20d9fea5a71fb4f2a99fa2e7430a7479113..074ea6f16c9dc42432f65e65f89ec1257e20cf89 100644 (file)
@@ -812,7 +812,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                 * We might have got -EINTR because trylock failed, and we're
                 * holding other locks that would cause us to deadlock:
                 */
-               for_each_linked_btree_iter(iter, linked)
+               trans_for_each_iter(iter->trans, linked)
                        if (btree_iter_cmp(iter, linked) < 0)
                                __bch2_btree_iter_unlock(linked);
 
@@ -837,13 +837,13 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                        }
                }
 
-               bch2_btree_iter_relock(iter);
+               bch2_btree_trans_relock(iter->trans);
        }
 out:
        if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
                btree_node_unlock(iter, level + 1);
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 
        BUG_ON((!may_drop_locks || !IS_ERR(ret)) &&
               (iter->uptodate >= BTREE_ITER_NEED_RELOCK ||
index af75878c06bd72a938f2e8612357494565e345f4..cb0e2449cd599ff0a0674b38e62dfbdc1060c7a1 100644 (file)
@@ -204,13 +204,16 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
 static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
                         bool initial)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct btree *b;
        struct range_checks r;
        unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1;
        u8 max_stale;
        int ret = 0;
 
+       bch2_trans_init(&trans, c);
+
        gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
 
        /*
@@ -224,7 +227,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 
        btree_node_range_checks_init(&r, depth);
 
-       __for_each_btree_node(&iter, c, btree_id, POS_MIN,
+       __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
                              0, depth, BTREE_ITER_PREFETCH, b) {
                btree_node_range_checks(c, b, &r);
 
@@ -238,22 +241,22 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 
                if (!initial) {
                        if (max_stale > 64)
-                               bch2_btree_node_rewrite(c, &iter,
+                               bch2_btree_node_rewrite(c, iter,
                                                b->data->keys.seq,
                                                BTREE_INSERT_USE_RESERVE|
                                                BTREE_INSERT_NOWAIT|
                                                BTREE_INSERT_GC_LOCK_HELD);
                        else if (!btree_gc_rewrite_disabled(c) &&
                                 (btree_gc_always_rewrite(c) || max_stale > 16))
-                               bch2_btree_node_rewrite(c, &iter,
+                               bch2_btree_node_rewrite(c, iter,
                                                b->data->keys.seq,
                                                BTREE_INSERT_NOWAIT|
                                                BTREE_INSERT_GC_LOCK_HELD);
                }
 
-               bch2_btree_iter_cond_resched(&iter);
+               bch2_trans_cond_resched(&trans);
        }
-       ret = bch2_btree_iter_unlock(&iter) ?: ret;
+       ret = bch2_trans_exit(&trans) ?: ret;
        if (ret)
                return ret;
 
@@ -474,12 +477,8 @@ static void bch2_gc_free(struct bch_fs *c)
                ca->usage[1] = NULL;
        }
 
-       percpu_down_write(&c->mark_lock);
-
        free_percpu(c->usage[1]);
        c->usage[1] = NULL;
-
-       percpu_up_write(&c->mark_lock);
 }
 
 static void bch2_gc_done(struct bch_fs *c, bool initial)
@@ -520,8 +519,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
 #define copy_fs_field(_f, _msg, ...)                                   \
        copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
 
-       percpu_down_write(&c->mark_lock);
-
        {
                struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
                struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
@@ -559,12 +556,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
                struct bucket_array *src = __bucket_array(ca, 1);
                size_t b;
 
-               if (initial) {
-                       memcpy(dst, src,
-                              sizeof(struct bucket_array) +
-                              sizeof(struct bucket) * dst->nbuckets);
-               }
-
                for (b = 0; b < src->nbuckets; b++) {
                        copy_bucket_field(gen);
                        copy_bucket_field(data_type);
@@ -629,8 +620,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
                }
        }
 
-       percpu_up_write(&c->mark_lock);
-
 #undef copy_fs_field
 #undef copy_dev_field
 #undef copy_bucket_field
@@ -643,8 +632,6 @@ static int bch2_gc_start(struct bch_fs *c)
        struct bch_dev *ca;
        unsigned i;
 
-       percpu_down_write(&c->mark_lock);
-
        /*
         * indicate to stripe code that we need to allocate for the gc stripes
         * radix tree, too
@@ -655,8 +642,6 @@ static int bch2_gc_start(struct bch_fs *c)
 
        c->usage[1] = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
                                         sizeof(u64), GFP_KERNEL);
-       percpu_up_write(&c->mark_lock);
-
        if (!c->usage[1])
                return -ENOMEM;
 
@@ -679,8 +664,6 @@ static int bch2_gc_start(struct bch_fs *c)
                }
        }
 
-       percpu_down_write(&c->mark_lock);
-
        for_each_member_device(ca, c, i) {
                struct bucket_array *dst = __bucket_array(ca, 1);
                struct bucket_array *src = __bucket_array(ca, 0);
@@ -697,8 +680,6 @@ static int bch2_gc_start(struct bch_fs *c)
                }
        };
 
-       percpu_up_write(&c->mark_lock);
-
        return bch2_ec_mem_alloc(c, true);
 }
 
@@ -731,7 +712,10 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
 
        down_write(&c->gc_lock);
 again:
+       percpu_down_write(&c->mark_lock);
        ret = bch2_gc_start(c);
+       percpu_up_write(&c->mark_lock);
+
        if (ret)
                goto out;
 
@@ -756,7 +740,11 @@ out:
                        bch_info(c, "Fixed gens, restarting mark and sweep:");
                        clear_bit(BCH_FS_FIXED_GENS, &c->flags);
                        __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+
+                       percpu_down_write(&c->mark_lock);
                        bch2_gc_free(c);
+                       percpu_up_write(&c->mark_lock);
+
                        goto again;
                }
 
@@ -764,6 +752,8 @@ out:
                ret = -EINVAL;
        }
 
+       percpu_down_write(&c->mark_lock);
+
        if (!ret)
                bch2_gc_done(c, initial);
 
@@ -771,6 +761,8 @@ out:
        __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
 
        bch2_gc_free(c);
+       percpu_up_write(&c->mark_lock);
+
        up_write(&c->gc_lock);
 
        trace_gc_end(c);
@@ -1027,7 +1019,8 @@ next:
 
 static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct btree *b;
        bool kthread = (current->flags & PF_KTHREAD) != 0;
        unsigned i;
@@ -1036,6 +1029,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
        struct btree *merge[GC_MERGE_NODES];
        u32 lock_seq[GC_MERGE_NODES];
 
+       bch2_trans_init(&trans, c);
+
        /*
         * XXX: We don't have a good way of positively matching on sibling nodes
         * that have the same parent - this code works by handling the cases
@@ -1045,7 +1040,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
         */
        memset(merge, 0, sizeof(merge));
 
-       __for_each_btree_node(&iter, c, btree_id, POS_MIN,
+       __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
                              BTREE_MAX_DEPTH, 0,
                              BTREE_ITER_PREFETCH, b) {
                memmove(merge + 1, merge,
@@ -1067,7 +1062,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
                }
                memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0]));
 
-               bch2_coalesce_nodes(c, &iter, merge);
+               bch2_coalesce_nodes(c, iter, merge);
 
                for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) {
                        lock_seq[i] = merge[i]->lock.state.seq;
@@ -1077,23 +1072,23 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
                lock_seq[0] = merge[0]->lock.state.seq;
 
                if (kthread && kthread_should_stop()) {
-                       bch2_btree_iter_unlock(&iter);
+                       bch2_trans_exit(&trans);
                        return -ESHUTDOWN;
                }
 
-               bch2_btree_iter_cond_resched(&iter);
+               bch2_trans_cond_resched(&trans);
 
                /*
                 * If the parent node wasn't relocked, it might have been split
                 * and the nodes in our sliding window might not have the same
                 * parent anymore - blow away the sliding window:
                 */
-               if (btree_iter_node(&iter, iter.level + 1) &&
-                   !btree_node_intent_locked(&iter, iter.level + 1))
+               if (btree_iter_node(iter, iter->level + 1) &&
+                   !btree_node_intent_locked(iter, iter->level + 1))
                        memset(merge + 1, 0,
                               (GC_MERGE_NODES - 1) * sizeof(merge[0]));
        }
-       return bch2_btree_iter_unlock(&iter);
+       return bch2_trans_exit(&trans);
 }
 
 /**
index f2107cf7ca5b6839c0fd741d1fa5862598da8aac..0b99e7d217a8178d7712ef8facf1711f71da66d6 100644 (file)
@@ -1153,19 +1153,21 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
        struct bkey_i_btree_ptr *new_key;
        struct bkey_s_btree_ptr bp;
        struct bch_extent_ptr *ptr;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        int ret;
 
-       __bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p,
-                              BTREE_MAX_DEPTH,
-                              b->level, BTREE_ITER_NODES);
+       bch2_trans_init(&trans, c);
+
+       iter = bch2_trans_get_node_iter(&trans, b->btree_id, b->key.k.p,
+                                       BTREE_MAX_DEPTH, b->level, 0);
 retry:
-       ret = bch2_btree_iter_traverse(&iter);
+       ret = bch2_btree_iter_traverse(iter);
        if (ret)
                goto err;
 
        /* has node been freed? */
-       if (iter.l[b->level].b != b) {
+       if (iter->l[b->level].b != b) {
                /* node has been freed: */
                BUG_ON(!btree_node_dying(b));
                goto out;
@@ -1184,13 +1186,13 @@ retry:
        if (!bch2_bkey_nr_ptrs(bp.s_c))
                goto err;
 
-       ret = bch2_btree_node_update_key(c, &iter, b, new_key);
+       ret = bch2_btree_node_update_key(c, iter, b, new_key);
        if (ret == -EINTR)
                goto retry;
        if (ret)
                goto err;
 out:
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
        bio_put(&wbio->wbio.bio);
        btree_node_write_done(c, b);
        return;
index 94b86ad6c7e68600183fab6d8cb220afd53a9536..49ad6df8125f4d0784038a881e479a49a7749d1f 100644 (file)
@@ -69,7 +69,7 @@ void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
        EBUG_ON(iter->l[b->level].b != b);
        EBUG_ON(iter->l[b->level].lock_seq + 1 != b->lock.state.seq);
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                linked->l[b->level].lock_seq += 2;
 
        six_unlock_write(&b->lock);
@@ -77,13 +77,12 @@ void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
 
 void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
 {
-       struct bch_fs *c = iter->c;
        struct btree_iter *linked;
        unsigned readers = 0;
 
        EBUG_ON(btree_node_read_locked(iter, b->level));
 
-       for_each_linked_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (linked->l[b->level].b == b &&
                    btree_node_read_locked(linked, b->level))
                        readers++;
@@ -96,7 +95,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
         */
        atomic64_sub(__SIX_VAL(read_lock, readers),
                     &b->lock.state.counter);
-       btree_node_lock_type(c, b, SIX_LOCK_write);
+       btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write);
        atomic64_add(__SIX_VAL(read_lock, readers),
                     &b->lock.state.counter);
 }
@@ -187,7 +186,8 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
        if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
                iter->uptodate = BTREE_ITER_NEED_PEEK;
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
+
        return iter->uptodate < BTREE_ITER_NEED_RELOCK;
 }
 
@@ -198,12 +198,11 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                           enum six_lock_type type,
                           bool may_drop_locks)
 {
-       struct bch_fs *c = iter->c;
        struct btree_iter *linked;
        bool ret = true;
 
        /* Check if it's safe to block: */
-       for_each_btree_iter(iter, linked) {
+       trans_for_each_iter(iter->trans, linked) {
                if (!linked->nodes_locked)
                        continue;
 
@@ -253,7 +252,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
        }
 
        if (ret)
-               __btree_node_lock_type(c, b, type);
+               __btree_node_lock_type(iter->trans->c, b, type);
        else
                trans_restart();
 
@@ -263,7 +262,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
 /* Btree iterator locking: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
-void __bch2_btree_iter_verify_locks(struct btree_iter *iter)
+void bch2_btree_iter_verify_locks(struct btree_iter *iter)
 {
        unsigned l;
 
@@ -280,35 +279,23 @@ void __bch2_btree_iter_verify_locks(struct btree_iter *iter)
        }
 }
 
-void bch2_btree_iter_verify_locks(struct btree_iter *iter)
+void bch2_btree_trans_verify_locks(struct btree_trans *trans)
 {
-       struct btree_iter *linked;
-
-       for_each_btree_iter(iter, linked)
-               __bch2_btree_iter_verify_locks(linked);
+       struct btree_iter *iter;
 
+       trans_for_each_iter(trans, iter)
+               bch2_btree_iter_verify_locks(iter);
 }
 #endif
 
 __flatten
-static bool __bch2_btree_iter_relock(struct btree_iter *iter)
+static bool bch2_btree_iter_relock(struct btree_iter *iter)
 {
        return iter->uptodate >= BTREE_ITER_NEED_RELOCK
                ? btree_iter_get_locks(iter, false)
                : true;
 }
 
-bool bch2_btree_iter_relock(struct btree_iter *iter)
-{
-       struct btree_iter *linked;
-       bool ret = true;
-
-       for_each_btree_iter(iter, linked)
-               ret &= __bch2_btree_iter_relock(linked);
-
-       return ret;
-}
-
 bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
                               unsigned new_locks_want)
 {
@@ -326,8 +313,9 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
         * on iterators that might lock ancestors before us to avoid getting
         * -EINTR later:
         */
-       for_each_linked_btree_iter(iter, linked)
-               if (linked->btree_id == iter->btree_id &&
+       trans_for_each_iter(iter->trans, linked)
+               if (linked != iter &&
+                   linked->btree_id == iter->btree_id &&
                    btree_iter_cmp(linked, iter) <= 0 &&
                    linked->locks_want < new_locks_want) {
                        linked->locks_want = new_locks_want;
@@ -372,7 +360,7 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter,
         * might have had to modify locks_want on linked iterators due to lock
         * ordering:
         */
-       for_each_btree_iter(iter, linked) {
+       trans_for_each_iter(iter->trans, linked) {
                unsigned new_locks_want = downgrade_to ?:
                        (linked->flags & BTREE_ITER_INTENT ? 1 : 0);
 
@@ -395,19 +383,40 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter,
                }
        }
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 }
 
 int bch2_btree_iter_unlock(struct btree_iter *iter)
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                __bch2_btree_iter_unlock(linked);
 
-       return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
+       return btree_iter_err(iter);
+}
+
+bool bch2_btree_trans_relock(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+       bool ret = true;
+
+       trans_for_each_iter(trans, iter)
+               ret &= bch2_btree_iter_relock(iter);
+
+       return ret;
+}
+
+void bch2_btree_trans_unlock(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+
+       trans_for_each_iter(trans, iter)
+               __bch2_btree_iter_unlock(iter);
 }
 
+/* Btree transaction locking: */
+
 /* Btree iterator: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -419,6 +428,9 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
        struct btree_node_iter tmp = l->iter;
        struct bkey_packed *k;
 
+       if (!debug_check_iterators(iter->trans->c))
+               return;
+
        if (iter->uptodate > BTREE_ITER_NEED_PEEK)
                return;
 
@@ -465,7 +477,10 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       if (!debug_check_iterators(iter->trans->c))
+               return;
+
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                __bch2_btree_iter_verify(linked, b);
 }
 
@@ -619,7 +634,7 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
                __bch2_btree_node_iter_fix(iter, b, node_iter, t,
                                          where, clobber_u64s, new_u64s);
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                __bch2_btree_node_iter_fix(linked, b,
                                          &linked->l[b->level].iter, t,
                                          where, clobber_u64s, new_u64s);
@@ -643,8 +658,8 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
 
        ret = bkey_disassemble(l->b, k, u);
 
-       if (debug_check_bkeys(iter->c))
-               bch2_bkey_debugcheck(iter->c, l->b, ret);
+       if (debug_check_bkeys(iter->trans->c))
+               bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
 
        return ret;
 }
@@ -777,7 +792,7 @@ void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
        enum btree_node_locked_type t;
        struct btree_iter *linked;
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (btree_iter_pos_in_node(linked, b)) {
                        /*
                         * bch2_btree_iter_node_drop() has already been called -
@@ -811,7 +826,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
        iter->l[level].b = BTREE_ITER_NOT_END;
        mark_btree_node_unlocked(iter, level);
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (linked->l[level].b == b) {
                        __btree_node_unlock(linked, level);
                        linked->l[level].b = BTREE_ITER_NOT_END;
@@ -826,14 +841,14 @@ void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                __btree_iter_init(linked, b->level);
 }
 
 static inline int btree_iter_lock_root(struct btree_iter *iter,
                                       unsigned depth_want)
 {
-       struct bch_fs *c = iter->c;
+       struct bch_fs *c = iter->trans->c;
        struct btree *b;
        enum six_lock_type lock_type;
        unsigned i;
@@ -881,11 +896,12 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
 noinline
 static void btree_iter_prefetch(struct btree_iter *iter)
 {
+       struct bch_fs *c = iter->trans->c;
        struct btree_iter_level *l = &iter->l[iter->level];
        struct btree_node_iter node_iter = l->iter;
        struct bkey_packed *k;
        BKEY_PADDED(k) tmp;
-       unsigned nr = test_bit(BCH_FS_STARTED, &iter->c->flags)
+       unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
                ? (iter->level > 1 ? 0 :  2)
                : (iter->level > 1 ? 1 : 16);
        bool was_locked = btree_node_locked(iter, iter->level);
@@ -900,8 +916,7 @@ static void btree_iter_prefetch(struct btree_iter *iter)
                        break;
 
                bch2_bkey_unpack(l->b, &tmp.k, k);
-               bch2_btree_node_prefetch(iter->c, iter, &tmp.k,
-                                        iter->level - 1);
+               bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
        }
 
        if (!was_locked)
@@ -910,6 +925,7 @@ static void btree_iter_prefetch(struct btree_iter *iter)
 
 static inline int btree_iter_down(struct btree_iter *iter)
 {
+       struct bch_fs *c = iter->trans->c;
        struct btree_iter_level *l = &iter->l[iter->level];
        struct btree *b;
        unsigned level = iter->level - 1;
@@ -921,7 +937,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
        bch2_bkey_unpack(l->b, &tmp.k,
                         bch2_btree_node_iter_peek(&l->iter, l->b));
 
-       b = bch2_btree_node_get(iter->c, iter, &tmp.k, level, lock_type, true);
+       b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, true);
        if (unlikely(IS_ERR(b)))
                return PTR_ERR(b);
 
@@ -943,17 +959,26 @@ static void btree_iter_up(struct btree_iter *iter)
 
 int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
 
-static int btree_iter_traverse_error(struct btree_iter *iter, int ret)
+static int __btree_iter_traverse_all(struct btree_trans *trans,
+                                    struct btree_iter *iter, int ret)
 {
-       struct bch_fs *c = iter->c;
-       struct btree_iter *linked, *sorted_iters, **i;
-retry_all:
-       bch2_btree_iter_unlock(iter);
+       struct bch_fs *c = trans->c;
+       u8 sorted[BTREE_ITER_MAX];
+       unsigned i, nr_sorted = 0;
+
+       trans_for_each_iter(trans, iter)
+               sorted[nr_sorted++] = iter - trans->iters;
 
-       if (ret != -ENOMEM && ret != -EINTR)
-               goto io_error;
+#define btree_iter_cmp_by_idx(_l, _r)                          \
+               btree_iter_cmp(&trans->iters[_l], &trans->iters[_r])
 
-       if (ret == -ENOMEM) {
+       bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
+#undef btree_iter_cmp_by_idx
+
+retry_all:
+       bch2_btree_trans_unlock(trans);
+
+       if (unlikely(ret == -ENOMEM)) {
                struct closure cl;
 
                closure_init_stack(&cl);
@@ -964,57 +989,35 @@ retry_all:
                } while (ret);
        }
 
-       /*
-        * Linked iters are normally a circular singly linked list - break cycle
-        * while we sort them:
-        */
-       linked = iter->next;
-       iter->next = NULL;
-       sorted_iters = NULL;
-
-       while (linked) {
-               iter = linked;
-               linked = linked->next;
-
-               i = &sorted_iters;
-               while (*i && btree_iter_cmp(iter, *i) > 0)
-                       i = &(*i)->next;
-
-               iter->next = *i;
-               *i = iter;
+       if (unlikely(ret == -EIO)) {
+               iter->flags |= BTREE_ITER_ERROR;
+               iter->l[iter->level].b = BTREE_ITER_NOT_END;
+               goto out;
        }
 
-       /* Make list circular again: */
-       iter = sorted_iters;
-       while (iter->next)
-               iter = iter->next;
-       iter->next = sorted_iters;
+       BUG_ON(ret && ret != -EINTR);
 
        /* Now, redo traversals in correct order: */
+       for (i = 0; i < nr_sorted; i++) {
+               iter = &trans->iters[sorted[i]];
 
-       iter = sorted_iters;
-       do {
-retry:
-               ret = __bch2_btree_iter_traverse(iter);
-               if (unlikely(ret)) {
-                       if (ret == -EINTR)
-                               goto retry;
-                       goto retry_all;
-               }
+               do {
+                       ret = __bch2_btree_iter_traverse(iter);
+               } while (ret == -EINTR);
 
-               iter = iter->next;
-       } while (iter != sorted_iters);
+               if (ret)
+                       goto retry_all;
+       }
 
-       ret = btree_iter_linked(iter) ? -EINTR : 0;
+       ret = btree_trans_has_multiple_iters(trans) ? -EINTR : 0;
 out:
        bch2_btree_cache_cannibalize_unlock(c);
        return ret;
-io_error:
-       BUG_ON(ret != -EIO);
+}
 
-       iter->flags |= BTREE_ITER_ERROR;
-       iter->l[iter->level].b = BTREE_ITER_NOT_END;
-       goto out;
+int bch2_btree_iter_traverse_all(struct btree_trans *trans)
+{
+       return __btree_iter_traverse_all(trans, NULL, 0);
 }
 
 static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
@@ -1051,7 +1054,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
        if (unlikely(iter->level >= BTREE_MAX_DEPTH))
                return 0;
 
-       if (__bch2_btree_iter_relock(iter))
+       if (bch2_btree_iter_relock(iter))
                return 0;
 
        /*
@@ -1091,7 +1094,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
 
        iter->uptodate = BTREE_ITER_NEED_PEEK;
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
        __bch2_btree_iter_verify(iter, iter->l[iter->level].b);
        return 0;
 }
@@ -1102,9 +1105,9 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
 
        ret = __bch2_btree_iter_traverse(iter);
        if (unlikely(ret))
-               ret = btree_iter_traverse_error(iter, ret);
+               ret = __btree_iter_traverse_all(iter->trans, iter, ret);
 
-       BUG_ON(ret == -EINTR && !btree_iter_linked(iter));
+       BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans));
 
        return ret;
 }
@@ -1117,7 +1120,7 @@ static inline void bch2_btree_iter_checks(struct btree_iter *iter,
                (iter->btree_id == BTREE_ID_EXTENTS &&
                 type != BTREE_ITER_NODES));
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 }
 
 /* Iterate across nodes (leaf and interior nodes) */
@@ -1274,9 +1277,9 @@ static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
                        __bch2_btree_node_iter_peek_all(&l->iter, l->b));
        }
 
-       if (debug_check_bkeys(iter->c) &&
+       if (debug_check_bkeys(iter->trans->c) &&
            !bkey_deleted(ret.k))
-               bch2_bkey_debugcheck(iter->c, l->b, ret);
+               bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
        return ret;
 }
 
@@ -1581,124 +1584,79 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
        return __bch2_btree_iter_peek_slot(iter);
 }
 
-void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
-                           enum btree_id btree_id, struct bpos pos,
-                           unsigned locks_want, unsigned depth,
-                           unsigned flags)
+static inline void bch2_btree_iter_init(struct btree_trans *trans,
+                       struct btree_iter *iter, enum btree_id btree_id,
+                       struct bpos pos, unsigned flags)
 {
+       struct bch_fs *c = trans->c;
        unsigned i;
 
-       EBUG_ON(depth >= BTREE_MAX_DEPTH);
-       EBUG_ON(locks_want > BTREE_MAX_DEPTH);
+       if (btree_id == BTREE_ID_EXTENTS &&
+           !(flags & BTREE_ITER_NODES))
+               flags |= BTREE_ITER_IS_EXTENTS;
 
-       iter->c                         = c;
+       iter->trans                     = trans;
        iter->pos                       = pos;
        bkey_init(&iter->k);
        iter->k.p                       = pos;
        iter->flags                     = flags;
        iter->uptodate                  = BTREE_ITER_NEED_TRAVERSE;
        iter->btree_id                  = btree_id;
-       iter->level                     = depth;
-       iter->locks_want                = locks_want;
+       iter->level                     = 0;
+       iter->locks_want                = flags & BTREE_ITER_INTENT ? 1 : 0;
        iter->nodes_locked              = 0;
        iter->nodes_intent_locked       = 0;
        for (i = 0; i < ARRAY_SIZE(iter->l); i++)
                iter->l[i].b            = NULL;
        iter->l[iter->level].b          = BTREE_ITER_NOT_END;
-       iter->next                      = iter;
 
        prefetch(c->btree_roots[btree_id].b);
 }
 
-static void bch2_btree_iter_unlink(struct btree_iter *iter)
-{
-       struct btree_iter *linked;
-
-       __bch2_btree_iter_unlock(iter);
-
-       if (!btree_iter_linked(iter))
-               return;
-
-       for_each_linked_btree_iter(iter, linked)
-               if (linked->next == iter) {
-                       linked->next = iter->next;
-                       iter->next = iter;
-                       return;
-               }
-
-       BUG();
-}
-
-static void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
-{
-       BUG_ON(btree_iter_linked(new));
-
-       new->next = iter->next;
-       iter->next = new;
-}
-
-void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
-{
-       unsigned i;
-
-       __bch2_btree_iter_unlock(dst);
-       memcpy(dst, src, offsetof(struct btree_iter, next));
-
-       for (i = 0; i < BTREE_MAX_DEPTH; i++)
-               if (btree_node_locked(dst, i))
-                       six_lock_increment(&dst->l[i].b->lock,
-                                          __btree_lock_want(dst, i));
-}
-
 /* new transactional stuff: */
 
-static void btree_trans_verify(struct btree_trans *trans)
+int bch2_trans_iter_put(struct btree_trans *trans,
+                       struct btree_iter *iter)
 {
-       unsigned i;
-
-       for (i = 0; i < trans->nr_iters; i++) {
-               struct btree_iter *iter = &trans->iters[i];
+       int ret = btree_iter_err(iter);
 
-               BUG_ON(btree_iter_linked(iter) !=
-                      ((trans->iters_linked & (1 << i)) &&
-                       !is_power_of_2(trans->iters_linked)));
-       }
+       trans->iters_live       &= ~(1ULL << iter->idx);
+       return ret;
 }
 
-static inline unsigned btree_trans_iter_idx(struct btree_trans *trans,
-                                           struct btree_iter *iter)
+static inline void __bch2_trans_iter_free(struct btree_trans *trans,
+                                         unsigned idx)
 {
-       ssize_t idx = iter - trans->iters;
-
-       BUG_ON(idx < 0 || idx >= trans->nr_iters);
-       BUG_ON(!(trans->iters_live & (1ULL << idx)));
-
-       return idx;
+       __bch2_btree_iter_unlock(&trans->iters[idx]);
+       trans->iters_linked             &= ~(1ULL << idx);
+       trans->iters_live               &= ~(1ULL << idx);
+       trans->iters_touched            &= ~(1ULL << idx);
+       trans->iters_unlink_on_restart  &= ~(1ULL << idx);
+       trans->iters_unlink_on_commit   &= ~(1ULL << idx);
 }
 
-void bch2_trans_iter_put(struct btree_trans *trans,
+int bch2_trans_iter_free(struct btree_trans *trans,
                         struct btree_iter *iter)
 {
-       ssize_t idx = btree_trans_iter_idx(trans, iter);
+       int ret = btree_iter_err(iter);
 
-       trans->iters_live       &= ~(1ULL << idx);
+       __bch2_trans_iter_free(trans, iter->idx);
+       return ret;
 }
 
-void bch2_trans_iter_free(struct btree_trans *trans,
-                         struct btree_iter *iter)
+int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
+                                  struct btree_iter *iter)
 {
-       ssize_t idx = btree_trans_iter_idx(trans, iter);
+       int ret = btree_iter_err(iter);
 
-       trans->iters_live       &= ~(1ULL << idx);
-       trans->iters_linked     &= ~(1ULL << idx);
-       bch2_btree_iter_unlink(iter);
+       trans->iters_unlink_on_commit |= 1ULL << iter->idx;
+       return ret;
 }
 
 static int btree_trans_realloc_iters(struct btree_trans *trans,
                                     unsigned new_size)
 {
        void *new_iters, *new_updates;
-       unsigned i;
 
        BUG_ON(new_size > BTREE_ITER_MAX);
 
@@ -1727,6 +1685,11 @@ success:
        memcpy(new_updates, trans->updates,
               sizeof(struct btree_insert_entry) * trans->nr_updates);
 
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+               memset(trans->iters, POISON_FREE,
+                      sizeof(struct btree_iter) * trans->nr_iters +
+                      sizeof(struct btree_insert_entry) * trans->nr_iters);
+
        if (trans->iters != trans->iters_onstack)
                kfree(trans->iters);
 
@@ -1734,20 +1697,6 @@ success:
        trans->updates  = new_updates;
        trans->size     = new_size;
 
-       for (i = 0; i < trans->nr_iters; i++)
-               trans->iters[i].next = &trans->iters[i];
-
-       if (trans->iters_linked) {
-               unsigned first_linked = __ffs(trans->iters_linked);
-
-               for (i = first_linked + 1; i < trans->nr_iters; i++)
-                       if (trans->iters_linked & (1 << i))
-                               bch2_btree_iter_link(&trans->iters[first_linked],
-                                                    &trans->iters[i]);
-       }
-
-       btree_trans_verify(trans);
-
        if (trans->iters_live) {
                trans_restart();
                return -EINTR;
@@ -1761,8 +1710,31 @@ void bch2_trans_preload_iters(struct btree_trans *trans)
        btree_trans_realloc_iters(trans, BTREE_ITER_MAX);
 }
 
+static int btree_trans_iter_alloc(struct btree_trans *trans)
+{
+       unsigned idx = ffz(trans->iters_linked);
+
+       if (idx < trans->nr_iters)
+               goto got_slot;
+
+       if (trans->nr_iters == trans->size) {
+               int ret = btree_trans_realloc_iters(trans, trans->size * 2);
+               if (ret)
+                       return ret;
+       }
+
+       idx = trans->nr_iters++;
+       BUG_ON(trans->nr_iters > trans->size);
+
+       trans->iters[idx].idx = idx;
+got_slot:
+       BUG_ON(trans->iters_linked & (1ULL << idx));
+       trans->iters_linked |= 1ULL << idx;
+       return idx;
+}
+
 static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
-                                                unsigned btree_id,
+                                                unsigned btree_id, struct bpos pos,
                                                 unsigned flags, u64 iter_id)
 {
        struct btree_iter *iter;
@@ -1770,32 +1742,28 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
 
        BUG_ON(trans->nr_iters > BTREE_ITER_MAX);
 
-       for (idx = 0; idx < trans->nr_iters; idx++)
-               if (trans->iters[idx].id == iter_id)
+       for (idx = 0; idx < trans->nr_iters; idx++) {
+               if (!(trans->iters_linked & (1ULL << idx)))
+                       continue;
+
+               iter = &trans->iters[idx];
+               if (iter_id
+                   ? iter->id == iter_id
+                   : (iter->btree_id == btree_id &&
+                      !bkey_cmp(iter->pos, pos)))
                        goto found;
+       }
        idx = -1;
 found:
        if (idx < 0) {
-               idx = ffz(trans->iters_linked);
-               if (idx < trans->nr_iters)
-                       goto got_slot;
+               idx = btree_trans_iter_alloc(trans);
+               if (idx < 0)
+                       return ERR_PTR(idx);
 
-               BUG_ON(trans->nr_iters > trans->size);
-
-               if (trans->nr_iters == trans->size) {
-                       int ret = btree_trans_realloc_iters(trans,
-                                                       trans->size * 2);
-                       if (ret)
-                               return ERR_PTR(ret);
-               }
-
-               idx = trans->nr_iters++;
-               BUG_ON(trans->nr_iters > trans->size);
-got_slot:
                iter = &trans->iters[idx];
                iter->id = iter_id;
 
-               bch2_btree_iter_init(iter, trans->c, btree_id, POS_MIN, flags);
+               bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
        } else {
                iter = &trans->iters[idx];
 
@@ -1803,17 +1771,10 @@ got_slot:
                iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
        }
 
+       BUG_ON(iter->btree_id != btree_id);
        BUG_ON(trans->iters_live & (1ULL << idx));
-       trans->iters_live |= 1ULL << idx;
-
-       if (trans->iters_linked &&
-           !(trans->iters_linked & (1 << idx)))
-               bch2_btree_iter_link(&trans->iters[__ffs(trans->iters_linked)],
-                                    iter);
-
-       trans->iters_linked |= 1ULL << idx;
-
-       btree_trans_verify(trans);
+       trans->iters_live       |= 1ULL << idx;
+       trans->iters_touched    |= 1ULL << idx;
 
        BUG_ON(iter->btree_id != btree_id);
        BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
@@ -1827,26 +1788,66 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                                         u64 iter_id)
 {
        struct btree_iter *iter =
-               __btree_trans_get_iter(trans, btree_id, flags, iter_id);
+               __btree_trans_get_iter(trans, btree_id, pos, flags, iter_id);
 
        if (!IS_ERR(iter))
                bch2_btree_iter_set_pos(iter, pos);
        return iter;
 }
 
-struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
-                                         struct btree_iter *src,
-                                         u64 iter_id)
+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
+                                           enum btree_id btree_id,
+                                           struct bpos pos,
+                                           unsigned locks_want,
+                                           unsigned depth,
+                                           unsigned flags)
 {
        struct btree_iter *iter =
-               __btree_trans_get_iter(trans, src->btree_id,
-                                      src->flags, iter_id);
+               __btree_trans_get_iter(trans, btree_id, pos,
+                                      flags|BTREE_ITER_NODES, 0);
+       unsigned i;
+
+       BUG_ON(IS_ERR(iter));
+       BUG_ON(bkey_cmp(iter->pos, pos));
+
+       iter->locks_want = locks_want;
+       iter->level     = depth;
+
+       for (i = 0; i < ARRAY_SIZE(iter->l); i++)
+               iter->l[i].b            = NULL;
+       iter->l[iter->level].b          = BTREE_ITER_NOT_END;
 
-       if (!IS_ERR(iter))
-               bch2_btree_iter_copy(iter, src);
        return iter;
 }
 
+struct btree_iter *bch2_trans_copy_iter(struct btree_trans *trans,
+                                       struct btree_iter *src)
+{
+       struct btree_iter *iter;
+       int i, idx;
+
+       idx = btree_trans_iter_alloc(trans);
+       if (idx < 0)
+               return ERR_PTR(idx);
+
+       trans->iters_live               |= 1ULL << idx;
+       trans->iters_touched            |= 1ULL << idx;
+       trans->iters_unlink_on_restart  |= 1ULL << idx;
+
+       iter = &trans->iters[idx];
+
+       memcpy(&iter->trans,
+              &src->trans,
+              (void *) &iter[1] - (void *) &iter->trans);
+
+       for (i = 0; i < BTREE_MAX_DEPTH; i++)
+               if (btree_node_locked(iter, i))
+                       six_lock_increment(&iter->l[i].b->lock,
+                                          __btree_lock_want(iter, i));
+
+       return &trans->iters[idx];
+}
+
 void *bch2_trans_kmalloc(struct btree_trans *trans,
                         size_t size)
 {
@@ -1883,8 +1884,7 @@ int bch2_trans_unlock(struct btree_trans *trans)
                unsigned idx = __ffs(iters);
                struct btree_iter *iter = &trans->iters[idx];
 
-               if (iter->flags & BTREE_ITER_ERROR)
-                       ret = -EIO;
+               ret = ret ?: btree_iter_err(iter);
 
                __bch2_btree_iter_unlock(iter);
                iters ^= 1 << idx;
@@ -1893,12 +1893,22 @@ int bch2_trans_unlock(struct btree_trans *trans)
        return ret;
 }
 
-void __bch2_trans_begin(struct btree_trans *trans)
+inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters)
 {
-       u64 linked_not_live;
-       unsigned idx;
+       iters &= trans->iters_linked;
+       iters &= ~trans->iters_live;
+
+       while (iters) {
+               unsigned idx = __ffs64(iters);
+
+               iters &= ~(1ULL << idx);
+               __bch2_trans_iter_free(trans, idx);
+       }
+}
 
-       btree_trans_verify(trans);
+void __bch2_trans_begin(struct btree_trans *trans)
+{
+       u64 iters_to_unlink;
 
        /*
         * On transaction restart, the transaction isn't required to allocate
@@ -1908,24 +1918,23 @@ void __bch2_trans_begin(struct btree_trans *trans)
         * further (allocated an iter with a higher idx) than where the iter
         * was originally allocated:
         */
-       while (1) {
-               linked_not_live = trans->iters_linked & ~trans->iters_live;
-               if (!linked_not_live)
-                       break;
+       iters_to_unlink = ~trans->iters_live &
+               ((1ULL << fls64(trans->iters_live)) - 1);
 
-               idx = __ffs64(linked_not_live);
-               if (1ULL << idx > trans->iters_live)
-                       break;
+       iters_to_unlink |= trans->iters_unlink_on_restart;
+       iters_to_unlink |= trans->iters_unlink_on_commit;
 
-               trans->iters_linked ^= 1 << idx;
-               bch2_btree_iter_unlink(&trans->iters[idx]);
-       }
+       trans->iters_live               = 0;
+
+       bch2_trans_unlink_iters(trans, iters_to_unlink);
 
-       trans->iters_live       = 0;
-       trans->nr_updates       = 0;
-       trans->mem_top          = 0;
+       trans->iters_touched            = 0;
+       trans->iters_unlink_on_restart  = 0;
+       trans->iters_unlink_on_commit   = 0;
+       trans->nr_updates               = 0;
+       trans->mem_top                  = 0;
 
-       btree_trans_verify(trans);
+       bch2_btree_iter_traverse_all(trans);
 }
 
 void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
index 7c49a661277a53da0238d97ee023bfa44a5b0b15..c05b2dac1726d157cfdcb74eb8779ca02c8c36c4 100644 (file)
@@ -24,11 +24,35 @@ static inline struct btree *btree_node_parent(struct btree_iter *iter,
        return btree_iter_node(iter, b->level + 1);
 }
 
-static inline bool btree_iter_linked(const struct btree_iter *iter)
+static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
 {
-       return iter->next != iter;
+       return hweight64(trans->iters_linked) > 1;
 }
 
+static inline int btree_iter_err(const struct btree_iter *iter)
+{
+       return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
+}
+
+/* Iterate over iters within a transaction: */
+
+static inline struct btree_iter *
+__trans_next_iter(struct btree_trans *trans, unsigned idx)
+{
+       EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
+
+       for (; idx < trans->nr_iters; idx++)
+               if (trans->iters_linked & (1ULL << idx))
+                       return &trans->iters[idx];
+
+       return NULL;
+}
+
+#define trans_for_each_iter(_trans, _iter)                             \
+       for (_iter = __trans_next_iter((_trans), 0);                    \
+            (_iter);                                                   \
+            _iter = __trans_next_iter((_trans), (_iter)->idx + 1))
+
 static inline bool __iter_has_node(const struct btree_iter *iter,
                                   const struct btree *b)
 {
@@ -45,59 +69,32 @@ static inline bool __iter_has_node(const struct btree_iter *iter,
 }
 
 static inline struct btree_iter *
-__next_linked_iter(struct btree_iter *iter, struct btree_iter *linked)
+__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
+                           unsigned idx)
 {
-       return linked->next != iter ? linked->next : NULL;
-}
+       EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
 
-static inline struct btree_iter *
-__next_iter_with_node(struct btree_iter *iter, struct btree *b,
-                     struct btree_iter *linked)
-{
-       while (linked && !__iter_has_node(linked, b))
-               linked = __next_linked_iter(iter, linked);
+       for (; idx < trans->nr_iters; idx++)
+               if ((trans->iters_linked & (1ULL << idx)) &&
+                   __iter_has_node(&trans->iters[idx], b))
+                       return &trans->iters[idx];
 
-       return linked;
+       return NULL;
 }
 
-/**
- * for_each_btree_iter - iterate over all iterators linked with @_iter,
- * including @_iter
- */
-#define for_each_btree_iter(_iter, _linked)                            \
-       for ((_linked) = (_iter); (_linked);                            \
-            (_linked) = __next_linked_iter(_iter, _linked))
-
-/**
- * for_each_btree_iter_with_node - iterate over all iterators linked with @_iter
- * that also point to @_b
- *
- * @_b is assumed to be locked by @_iter
- *
- * Filters out iterators that don't have a valid btree_node iterator for @_b -
- * i.e. iterators for which bch2_btree_node_relock() would not succeed.
- */
-#define for_each_btree_iter_with_node(_iter, _b, _linked)              \
-       for ((_linked) = (_iter);                                       \
-            ((_linked) = __next_iter_with_node(_iter, _b, _linked));   \
-            (_linked) = __next_linked_iter(_iter, _linked))
-
-/**
- * for_each_linked_btree_iter - iterate over all iterators linked with @_iter,
- * _not_ including @_iter
- */
-#define for_each_linked_btree_iter(_iter, _linked)                     \
-       for ((_linked) = (_iter)->next;                                 \
-            (_linked) != (_iter);                                      \
-            (_linked) = (_linked)->next)
+#define trans_for_each_iter_with_node(_trans, _b, _iter)               \
+       for (_iter = __trans_next_iter_with_node((_trans), (_b), 0);    \
+            (_iter);                                                   \
+            _iter = __trans_next_iter_with_node((_trans), (_b),        \
+                                                (_iter)->idx + 1))
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 void bch2_btree_iter_verify(struct btree_iter *, struct btree *);
-void bch2_btree_iter_verify_locks(struct btree_iter *);
+void bch2_btree_trans_verify_locks(struct btree_trans *);
 #else
 static inline void bch2_btree_iter_verify(struct btree_iter *iter,
                                          struct btree *b) {}
-static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
+static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {}
 #endif
 
 void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
@@ -105,7 +102,9 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
                              unsigned, unsigned);
 
 int bch2_btree_iter_unlock(struct btree_iter *);
-bool bch2_btree_iter_relock(struct btree_iter *);
+
+bool bch2_btree_trans_relock(struct btree_trans *);
+void bch2_btree_trans_unlock(struct btree_trans *);
 
 bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
 bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
@@ -137,6 +136,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
 void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
 
 int __must_check bch2_btree_iter_traverse(struct btree_iter *);
+int bch2_btree_iter_traverse_all(struct btree_trans *);
 
 struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
 struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
@@ -151,22 +151,6 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
 void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
 void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
 
-void __bch2_btree_iter_init(struct btree_iter *, struct bch_fs *,
-                          enum btree_id, struct bpos,
-                          unsigned , unsigned, unsigned);
-
-static inline void bch2_btree_iter_init(struct btree_iter *iter,
-                       struct bch_fs *c, enum btree_id btree_id,
-                       struct bpos pos, unsigned flags)
-{
-       __bch2_btree_iter_init(iter, c, btree_id, pos,
-                              flags & BTREE_ITER_INTENT ? 1 : 0, 0,
-                              (btree_id == BTREE_ID_EXTENTS
-                               ?  BTREE_ITER_IS_EXTENTS : 0)|flags);
-}
-
-void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *);
-
 static inline struct bpos btree_type_successor(enum btree_id id,
                                               struct bpos pos)
 {
@@ -208,31 +192,34 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
        return __btree_iter_cmp(l->btree_id, l->pos, r);
 }
 
+int bch2_trans_unlock(struct btree_trans *);
+
 /*
  * Unlocks before scheduling
  * Note: does not revalidate iterator
  */
-static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter)
+static inline void bch2_trans_cond_resched(struct btree_trans *trans)
 {
        if (need_resched()) {
-               bch2_btree_iter_unlock(iter);
+               bch2_trans_unlock(trans);
                schedule();
        } else if (race_fault()) {
-               bch2_btree_iter_unlock(iter);
+               bch2_trans_unlock(trans);
        }
 }
 
-#define __for_each_btree_node(_iter, _c, _btree_id, _start,            \
+#define __for_each_btree_node(_trans, _iter, _btree_id, _start,        \
                              _locks_want, _depth, _flags, _b)          \
-       for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \
-                                   _locks_want, _depth,                \
-                                   _flags|BTREE_ITER_NODES),           \
+       for (iter = bch2_trans_get_node_iter((_trans), (_btree_id),     \
+                               _start, _locks_want, _depth, _flags),   \
             _b = bch2_btree_iter_peek_node(_iter);                     \
             (_b);                                                      \
             (_b) = bch2_btree_iter_next_node(_iter, _depth))
 
-#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b)  \
-       __for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b)
+#define for_each_btree_node(_trans, _iter, _btree_id, _start,          \
+                           _flags, _b)                                 \
+       __for_each_btree_node(_trans, _iter, _btree_id, _start,         \
+                             0, 0, _flags, _b)
 
 static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
                                                     unsigned flags)
@@ -245,16 +232,16 @@ static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
 static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
                                                     unsigned flags)
 {
-       bch2_btree_iter_cond_resched(iter);
+       bch2_trans_cond_resched(iter->trans);
 
        return flags & BTREE_ITER_SLOTS
                ? bch2_btree_iter_next_slot(iter)
                : bch2_btree_iter_next(iter);
 }
 
-#define for_each_btree_key(_iter, _c, _btree_id,  _start, _flags, _k)  \
-       for (bch2_btree_iter_init((_iter), (_c), (_btree_id),           \
-                                 (_start), (_flags)),                  \
+#define for_each_btree_key(_trans, _iter, _btree_id,  _start, _flags, _k)\
+       for (iter = bch2_trans_get_iter((_trans), (_btree_id),          \
+                                       (_start), (_flags)),            \
             (_k) = __bch2_btree_iter_peek(_iter, _flags);              \
             !IS_ERR_OR_NULL((_k).k);                                   \
             (_k) = __bch2_btree_iter_next(_iter, _flags))
@@ -264,7 +251,7 @@ static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
             !IS_ERR_OR_NULL((_k).k);                                   \
             (_k) = __bch2_btree_iter_next(_iter, _flags))
 
-static inline int btree_iter_err(struct bkey_s_c k)
+static inline int bkey_err(struct bkey_s_c k)
 {
        return PTR_ERR_OR_ZERO(k.k);
 }
@@ -272,13 +259,16 @@ static inline int btree_iter_err(struct bkey_s_c k)
 /* new multiple iterator interface: */
 
 void bch2_trans_preload_iters(struct btree_trans *);
-void bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
-void bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
+int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
+int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
+int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *);
+
+void bch2_trans_unlink_iters(struct btree_trans *, u64);
 
 struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
                                         struct bpos, unsigned, u64);
-struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *,
-                                         struct btree_iter *, u64);
+struct btree_iter *bch2_trans_copy_iter(struct btree_trans *,
+                                       struct btree_iter *);
 
 static __always_inline u64 __btree_iter_id(void)
 {
@@ -299,12 +289,9 @@ bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
                                     __btree_iter_id());
 }
 
-static __always_inline struct btree_iter *
-bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
-{
-
-       return __bch2_trans_copy_iter(trans, src, __btree_iter_id());
-}
+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
+                               enum btree_id, struct bpos,
+                               unsigned, unsigned, unsigned);
 
 void __bch2_trans_begin(struct btree_trans *);
 
@@ -314,7 +301,6 @@ static inline void bch2_trans_begin_updates(struct btree_trans *trans)
 }
 
 void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-int bch2_trans_unlock(struct btree_trans *);
 void bch2_trans_init(struct btree_trans *, struct bch_fs *);
 int bch2_trans_exit(struct btree_trans *);
 
index f565fa367bf144eaba406a45beddc6b19e522024..e9686197a90817aa4473f90f371a2e2aafeb2ac7 100644 (file)
@@ -163,8 +163,9 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter,
 {
        struct btree_iter *linked;
 
-       for_each_linked_btree_iter(iter, linked)
-               if (linked->l[level].b == b &&
+       trans_for_each_iter(iter->trans, linked)
+               if (linked != iter &&
+                   linked->l[level].b == b &&
                    btree_node_locked_type(linked, level) >= want) {
                        six_lock_increment(&b->lock, want);
                        return true;
index d566722aa482716c08784c01723d630877bb99a6..3d869dd80c267ee536b482de5b19c1858ee3a019 100644 (file)
@@ -10,6 +10,7 @@
 
 struct open_bucket;
 struct btree_update;
+struct btree_trans;
 
 #define MAX_BSETS              3U
 
@@ -208,7 +209,9 @@ enum btree_iter_uptodate {
  * @nodes_intent_locked        - bitmask indicating which locks are intent locks
  */
 struct btree_iter {
-       struct bch_fs           *c;
+       u8                      idx;
+
+       struct btree_trans      *trans;
        struct bpos             pos;
 
        u8                      flags;
@@ -232,15 +235,6 @@ struct btree_iter {
        struct bkey             k;
 
        u64                     id;
-
-       /*
-        * Circular linked list of linked iterators: linked iterators share
-        * locks (e.g. two linked iterators may have the same node intent
-        * locked, or read and write locked, at the same time), and insertions
-        * through one iterator won't invalidate the other linked iterators.
-        */
-       /* Must come last: */
-       struct btree_iter       *next;
 };
 
 struct deferred_update {
@@ -275,8 +269,11 @@ struct btree_trans {
        size_t                  nr_restarts;
        u64                     commit_start;
 
-       u64                     iters_live;
        u64                     iters_linked;
+       u64                     iters_live;
+       u64                     iters_touched;
+       u64                     iters_unlink_on_restart;
+       u64                     iters_unlink_on_commit;
 
        u8                      nr_iters;
        u8                      nr_updates;
index ce5fa6b25cf67a872e97725b58e33b75ce6bb8b6..944b6c24305d0c543eb6991ffc1fb17501cbf243 100644 (file)
@@ -98,19 +98,13 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
 int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
                               struct btree *, struct bkey_i_btree_ptr *);
 
-static inline void
-bch2_trans_update(struct btree_trans *trans,
-                 struct btree_insert_entry entry)
-{
-       BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
-
-       trans->updates[trans->nr_updates++] = entry;
-}
-
 int bch2_trans_commit(struct btree_trans *,
                      struct disk_reservation *,
                      u64 *, unsigned);
 
+struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
+                                            struct btree_insert_entry);
+
 #define bch2_trans_do(_c, _journal_seq, _flags, _do)                   \
 ({                                                                     \
        struct btree_trans trans;                                       \
index 47196c14096b7ed8f184f1363d836a6391de1fea..620217276e8519a7f5b4b6e2118eae70a6fdd79b 100644 (file)
@@ -245,7 +245,7 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                BUG_ON(linked->l[b->level].b == b);
 
        /*
@@ -1437,7 +1437,7 @@ static void btree_split(struct btree_update *as, struct btree *b,
 
        bch2_btree_node_free_inmem(c, b, iter);
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
                               start_time);
@@ -1473,7 +1473,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
 
        btree_update_updated_node(as, b);
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                bch2_btree_node_iter_peek(&linked->l[b->level].iter, b);
 
        bch2_btree_iter_verify(iter, b);
@@ -1558,7 +1558,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
         * We already have a disk reservation and open buckets pinned; this
         * allocation must not block:
         */
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (linked->btree_id == BTREE_ID_EXTENTS)
                        flags |= BTREE_INSERT_USE_RESERVE;
 
@@ -1570,10 +1570,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
                if (flags & BTREE_INSERT_NOUNLOCK)
                        return -EINTR;
 
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(iter->trans);
                down_read(&c->gc_lock);
 
-               if (btree_iter_linked(iter))
+               if (!bch2_btree_trans_relock(iter->trans))
                        ret = -EINTR;
        }
 
@@ -1752,7 +1752,7 @@ retry:
        if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
                up_read(&c->gc_lock);
 out:
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 
        /*
         * Don't downgrade locks here: we're called after successful insert,
@@ -2035,10 +2035,10 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                return -EINTR;
 
        if (!down_read_trylock(&c->gc_lock)) {
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(iter->trans);
                down_read(&c->gc_lock);
 
-               if (!bch2_btree_iter_relock(iter)) {
+               if (!bch2_btree_trans_relock(iter->trans)) {
                        ret = -EINTR;
                        goto err;
                }
@@ -2049,15 +2049,15 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                /* bch2_btree_reserve_get will unlock */
                ret = bch2_btree_cache_cannibalize_lock(c, &cl);
                if (ret) {
-                       ret = -EINTR;
-
-                       bch2_btree_iter_unlock(iter);
+                       bch2_btree_trans_unlock(iter->trans);
                        up_read(&c->gc_lock);
                        closure_sync(&cl);
                        down_read(&c->gc_lock);
 
-                       if (!bch2_btree_iter_relock(iter))
+                       if (!bch2_btree_trans_relock(iter->trans)) {
+                               ret = -EINTR;
                                goto err;
+                       }
                }
 
                new_hash = bch2_btree_node_mem_alloc(c);
@@ -2078,12 +2078,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                if (ret != -EINTR)
                        goto err;
 
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(iter->trans);
                up_read(&c->gc_lock);
                closure_sync(&cl);
                down_read(&c->gc_lock);
 
-               if (!bch2_btree_iter_relock(iter))
+               if (!bch2_btree_trans_relock(iter->trans))
                        goto err;
        }
 
index e207b0997056b999f7a61869bb16523701d20a2a..142230cf03f28f8b92a4849b5666c6488b13707f 100644 (file)
@@ -50,25 +50,6 @@ static void btree_trans_unlock_write(struct btree_trans *trans)
                bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
 }
 
-static bool btree_trans_relock(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update_iter(trans, i)
-               return bch2_btree_iter_relock(i->iter);
-       return true;
-}
-
-static void btree_trans_unlock(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update_iter(trans, i) {
-               bch2_btree_iter_unlock(i->iter);
-               break;
-       }
-}
-
 static inline int btree_trans_cmp(struct btree_insert_entry l,
                                  struct btree_insert_entry r)
 {
@@ -421,8 +402,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
 
                EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
                        !(trans->flags & BTREE_INSERT_ATOMIC));
-
-               bch2_btree_iter_verify_locks(i->iter);
        }
 
        BUG_ON(debug_check_bkeys(c) &&
@@ -450,14 +429,14 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans)
        if (ret != -EAGAIN)
                return ret;
 
-       btree_trans_unlock(trans);
+       bch2_btree_trans_unlock(trans);
 
        ret = bch2_journal_preres_get(&c->journal,
                        &trans->journal_preres, u64s, 0);
        if (ret)
                return ret;
 
-       if (!btree_trans_relock(trans)) {
+       if (!bch2_btree_trans_relock(trans)) {
                trans_restart(" (iter relock after journal preres get blocked)");
                return -EINTR;
        }
@@ -616,12 +595,9 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                 * have been traversed/locked, depending on what the caller was
                 * doing:
                 */
-               trans_for_each_update_iter(trans, i) {
-                       for_each_btree_iter(i->iter, linked)
-                               if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
-                                       linked->flags |= BTREE_ITER_NOUNLOCK;
-                       break;
-               }
+               trans_for_each_iter(trans, linked)
+                       if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
+                               linked->flags |= BTREE_ITER_NOUNLOCK;
        }
 
        trans_for_each_update_iter(trans, i)
@@ -706,20 +682,20 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                                return ret;
                }
 
-               if (btree_trans_relock(trans))
+               if (bch2_btree_trans_relock(trans))
                        return 0;
 
                trans_restart(" (iter relock after marking replicas)");
                ret = -EINTR;
                break;
        case BTREE_INSERT_NEED_JOURNAL_RES:
-               btree_trans_unlock(trans);
+               bch2_btree_trans_unlock(trans);
 
                ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
                if (ret)
                        return ret;
 
-               if (btree_trans_relock(trans))
+               if (bch2_btree_trans_relock(trans))
                        return 0;
 
                trans_restart(" (iter relock after journal res get blocked)");
@@ -731,14 +707,11 @@ int bch2_trans_commit_error(struct btree_trans *trans,
        }
 
        if (ret == -EINTR) {
-               trans_for_each_update_iter(trans, i) {
-                       int ret2 = bch2_btree_iter_traverse(i->iter);
-                       if (ret2) {
-                               trans_restart(" (traverse)");
-                               return ret2;
-                       }
+               int ret2 = bch2_btree_iter_traverse_all(trans);
 
-                       BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
+               if (ret2) {
+                       trans_restart(" (traverse)");
+                       return ret2;
                }
 
                /*
@@ -784,10 +757,9 @@ static int __bch2_trans_commit(struct btree_trans *trans,
                        goto err;
                }
 
-               if (i->iter->flags & BTREE_ITER_ERROR) {
-                       ret = -EIO;
+               ret = btree_iter_err(i->iter);
+               if (ret)
                        goto err;
-               }
        }
 
        ret = do_btree_insert_at(trans, stopped_at);
@@ -801,16 +773,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
                bch2_btree_iter_downgrade(i->iter);
 err:
        /* make sure we didn't drop or screw up locks: */
-       trans_for_each_update_iter(trans, i) {
-               bch2_btree_iter_verify_locks(i->iter);
-               break;
-       }
+       bch2_btree_trans_verify_locks(trans);
 
-       trans_for_each_update_iter(trans, i) {
-               for_each_btree_iter(i->iter, linked)
-                       linked->flags &= ~BTREE_ITER_NOUNLOCK;
-               break;
-       }
+       trans_for_each_iter(trans, linked)
+               linked->flags &= ~BTREE_ITER_NOUNLOCK;
 
        return ret;
 }
@@ -842,17 +808,16 @@ int bch2_trans_commit(struct btree_trans *trans,
        trans->journal_seq      = journal_seq;
        trans->flags            = flags;
 
-       bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp);
-
        trans_for_each_update(trans, i)
                btree_insert_entry_checks(trans, i);
+       bch2_btree_trans_verify_locks(trans);
 
        if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
                     !percpu_ref_tryget(&c->writes))) {
                if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
                        return -EROFS;
 
-               btree_trans_unlock(trans);
+               bch2_btree_trans_unlock(trans);
 
                ret = bch2_fs_read_write_early(c);
                if (ret)
@@ -860,7 +825,7 @@ int bch2_trans_commit(struct btree_trans *trans,
 
                percpu_ref_get(&c->writes);
 
-               if (!btree_trans_relock(trans)) {
+               if (!bch2_btree_trans_relock(trans)) {
                        ret = -EINTR;
                        goto err;
                }
@@ -885,10 +850,15 @@ out_noupdates:
                trans->commit_start = 0;
        }
 
-       trans->nr_updates = 0;
-
        BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
 
+       bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit);
+       if (!ret) {
+               bch2_trans_unlink_iters(trans, ~trans->iters_touched);
+               trans->iters_touched = 0;
+       }
+       trans->nr_updates = 0;
+
        return ret;
 err:
        ret = bch2_trans_commit_error(trans, i, ret);
@@ -898,6 +868,26 @@ err:
        goto out;
 }
 
+struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
+                                            struct btree_insert_entry entry)
+{
+       struct btree_insert_entry *i;
+
+       BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
+
+       for (i = trans->updates;
+            i < trans->updates + trans->nr_updates;
+            i++)
+               if (btree_trans_cmp(entry, *i) < 0)
+                       break;
+
+       memmove(&i[1], &i[0],
+               (void *) &trans->updates[trans->nr_updates] - (void *) i);
+       trans->nr_updates++;
+       *i = entry;
+       return i;
+}
+
 int bch2_btree_delete_at(struct btree_trans *trans,
                         struct btree_iter *iter, unsigned flags)
 {
@@ -960,7 +950,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
        iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k)) &&
+              !(ret = bkey_err(k)) &&
               bkey_cmp(iter->pos, end) < 0) {
                unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
                /* really shouldn't be using a bare, unpadded bkey_i */
@@ -997,7 +987,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
                if (ret)
                        break;
 
-               bch2_btree_iter_cond_resched(iter);
+               bch2_trans_cond_resched(&trans);
        }
 
        bch2_trans_exit(&trans);
index 4e33e7b8187228ede85e76793a2e88d97dd13060..82d90cdea0032c36cd85a9af8c4eb5766d05e45b 100644 (file)
@@ -302,8 +302,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
        struct bch_ioctl_data_event e = {
                .type                   = BCH_DATA_EVENT_PROGRESS,
                .p.data_type            = ctx->stats.data_type,
-               .p.btree_id             = ctx->stats.iter.btree_id,
-               .p.pos                  = ctx->stats.iter.pos,
+               .p.btree_id             = ctx->stats.btree_id,
+               .p.pos                  = ctx->stats.pos,
                .p.sectors_done         = atomic64_read(&ctx->stats.sectors_seen),
                .p.sectors_total        = bch2_fs_usage_read_short(c).used,
        };
index e8a671a1b8985e4ed600058e9c6a7e4e779976ee..a22ac8d60bb012f67df4c25d1bbac77d2a748205 100644 (file)
@@ -204,7 +204,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
                               size_t size, loff_t *ppos)
 {
        struct dump_iter *i = file->private_data;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int err;
 
@@ -219,18 +220,20 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
        if (!i->size)
                return i->ret;
 
-       bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH);
-       k = bch2_btree_iter_peek(&iter);
+       bch2_trans_init(&trans, i->c);
 
-       while (k.k && !(err = btree_iter_err(k))) {
+       iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
+       k = bch2_btree_iter_peek(iter);
+
+       while (k.k && !(err = bkey_err(k))) {
                bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
                i->bytes = strlen(i->buf);
                BUG_ON(i->bytes >= PAGE_SIZE);
                i->buf[i->bytes] = '\n';
                i->bytes++;
 
-               k = bch2_btree_iter_next(&iter);
-               i->from = iter.pos;
+               k = bch2_btree_iter_next(iter);
+               i->from = iter->pos;
 
                err = flush_buf(i);
                if (err)
@@ -239,7 +242,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
                if (!i->size)
                        break;
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        return err < 0 ? err : i->ret;
 }
@@ -255,7 +258,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
                                       size_t size, loff_t *ppos)
 {
        struct dump_iter *i = file->private_data;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct btree *b;
        int err;
 
@@ -270,7 +274,9 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
        if (!i->size || !bkey_cmp(POS_MAX, i->from))
                return i->ret;
 
-       for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) {
+       bch2_trans_init(&trans, i->c);
+
+       for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
                bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
                i->bytes = strlen(i->buf);
                err = flush_buf(i);
@@ -288,7 +294,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
                if (!i->size)
                        break;
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        return err < 0 ? err : i->ret;
 }
@@ -304,7 +310,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
                                       size_t size, loff_t *ppos)
 {
        struct dump_iter *i = file->private_data;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        struct btree *prev_node = NULL;
        int err;
@@ -320,11 +327,13 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
        if (!i->size)
                return i->ret;
 
-       bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH);
+       bch2_trans_init(&trans, i->c);
+
+       iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
 
-       while ((k = bch2_btree_iter_peek(&iter)).k &&
-              !(err = btree_iter_err(k))) {
-               struct btree_iter_level *l = &iter.l[0];
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(err = bkey_err(k))) {
+               struct btree_iter_level *l = &iter->l[0];
                struct bkey_packed *_k =
                        bch2_btree_node_iter_peek(&l->iter, l->b);
 
@@ -343,8 +352,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
                if (err)
                        break;
 
-               bch2_btree_iter_next(&iter);
-               i->from = iter.pos;
+               bch2_btree_iter_next(iter);
+               i->from = iter->pos;
 
                err = flush_buf(i);
                if (err)
@@ -353,7 +362,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
                if (!i->size)
                        break;
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        return err < 0 ? err : i->ret;
 }
index 550561e67ad7c3dc5544a14fcb21625484a5753f..58289fccb8dc88a7949bbf49c475c1243397e190 100644 (file)
@@ -330,11 +330,15 @@ out:
 
 int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), 0, k) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
+                          POS(dir_inum, 0), 0, k) {
                if (k.k->p.inode > dir_inum)
                        break;
 
@@ -343,7 +347,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
                        break;
                }
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        return ret;
 }
@@ -352,7 +356,8 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
                 struct dir_context *ctx)
 {
        struct bch_inode_info *inode = file_bch_inode(file);
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent dirent;
        unsigned len;
@@ -360,7 +365,9 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
        if (!dir_emit_dots(file, ctx))
                return 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
                           POS(inode->v.i_ino, ctx->pos), 0, k) {
                if (k.k->type != KEY_TYPE_dirent)
                        continue;
@@ -386,7 +393,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
 
                ctx->pos = k.k->p.offset + 1;
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        return 0;
 }
index ea6f4867ae81defa3448c8f3caa56187d9a1a926..e5df91495ba1e64b5adef593c82c9d5b48c7223a 100644 (file)
@@ -397,7 +397,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
 /* recovery read path: */
 int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct ec_stripe_buf *buf;
        struct closure cl;
        struct bkey_s_c k;
@@ -418,19 +419,21 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
        if (!buf)
                return -ENOMEM;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EC,
-                            POS(0, stripe_idx),
-                            BTREE_ITER_SLOTS);
-       k = bch2_btree_iter_peek_slot(&iter);
-       if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) {
+       bch2_trans_init(&trans, c);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
+                                  POS(0, stripe_idx),
+                                  BTREE_ITER_SLOTS);
+       k = bch2_btree_iter_peek_slot(iter);
+       if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
                __bcache_io_error(c,
                        "error doing reconstruct read: stripe not found");
                kfree(buf);
-               return bch2_btree_iter_unlock(&iter) ?: -EIO;
+               return bch2_trans_exit(&trans) ?: -EIO;
        }
 
        bkey_reassemble(&buf->key.k_i, k);
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        v = &buf->key.v;
 
@@ -537,7 +540,7 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
        if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT))
                return 0;
 
-       bch2_btree_iter_unlock(iter);
+       bch2_btree_trans_unlock(iter->trans);
 
        if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
                return -EINTR;
@@ -746,7 +749,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
                                   BTREE_ITER_INTENT);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k)) &&
+              !(ret = bkey_err(k)) &&
               bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
                idx = extent_matches_stripe(c, &s->key.v, k);
                if (idx < 0) {
@@ -1166,7 +1169,7 @@ static int __bch2_stripe_write_key(struct btree_trans *trans,
        bch2_btree_iter_set_pos(iter, POS(0, idx));
 
        k = bch2_btree_iter_peek_slot(iter);
-       ret = btree_iter_err(k);
+       ret = bkey_err(k);
        if (ret)
                return ret;
 
@@ -1237,7 +1240,8 @@ static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
 int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
 {
        struct journal_replay *r;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret;
 
@@ -1245,12 +1249,14 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
        if (ret)
                return ret;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN, 0, k) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k) {
                bch2_stripe_read_key(c, k);
-               bch2_btree_iter_cond_resched(&iter);
+               bch2_trans_cond_resched(&trans);
        }
 
-       ret = bch2_btree_iter_unlock(&iter);
+       ret = bch2_trans_exit(&trans);
        if (ret)
                return ret;
 
@@ -1268,17 +1274,20 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
 
 int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        size_t i, idx = 0;
        int ret = 0;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS(0, U64_MAX), 0);
+       bch2_trans_init(&trans, c);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);
 
-       k = bch2_btree_iter_prev(&iter);
+       k = bch2_btree_iter_prev(iter);
        if (!IS_ERR_OR_NULL(k.k))
                idx = k.k->p.offset + 1;
-       ret = bch2_btree_iter_unlock(&iter);
+       ret = bch2_trans_exit(&trans);
        if (ret)
                return ret;
 
index 9505b6e6d3752d50ed29703ccef53b75429dc3aa..e3747781c4b9219c29af312870cfb1054ea79b2a 100644 (file)
@@ -66,10 +66,20 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
        bool fix = false, print = true, suppressing = false;
        char _buf[sizeof(s->buf)], *buf = _buf;
 
-       mutex_lock(&c->fsck_error_lock);
+       if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
+               va_start(args, fmt);
+               vprintk(fmt, args);
+               va_end(args);
 
-       if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
-               goto print;
+               if (c->opts.errors == BCH_ON_ERROR_CONTINUE &&
+                   flags & FSCK_CAN_FIX)
+                       return FSCK_ERR_FIX;
+
+               bch2_inconsistent_error(c);
+               return FSCK_ERR_EXIT;
+       }
+
+       mutex_lock(&c->fsck_error_lock);
 
        list_for_each_entry(s, &c->fsck_errors, list)
                if (s->fmt == fmt)
index 1ab951c97a890e9f09b285dccf2a3836c3a57747..aa2fc7793b17f8d1f1874b0d2cf9e8c030fb0ccb 100644 (file)
@@ -788,7 +788,8 @@ static bool bch2_extent_merge_inline(struct bch_fs *,
                                     struct bkey_packed *,
                                     bool);
 
-static void verify_extent_nonoverlapping(struct btree *b,
+static void verify_extent_nonoverlapping(struct bch_fs *c,
+                                        struct btree *b,
                                         struct btree_node_iter *_iter,
                                         struct bkey_i *insert)
 {
@@ -797,6 +798,9 @@ static void verify_extent_nonoverlapping(struct btree *b,
        struct bkey_packed *k;
        struct bkey uk;
 
+       if (!expensive_debug_checks(c))
+               return;
+
        iter = *_iter;
        k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard);
        BUG_ON(k &&
@@ -847,7 +851,7 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
        BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b));
 
        EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
-       verify_extent_nonoverlapping(l->b, &l->iter, insert);
+       verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
 
        node_iter = l->iter;
        k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
@@ -1618,15 +1622,18 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
 bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
                               unsigned nr_replicas)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bpos end = pos;
        struct bkey_s_c k;
        bool ret = true;
 
        end.offset += size;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, pos,
-                            BTREE_ITER_SLOTS, k) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
+                          BTREE_ITER_SLOTS, k) {
                if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
                        break;
 
@@ -1635,7 +1642,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
                        break;
                }
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        return ret;
 }
index d3a03641cbfd057d30e5ec29855032f5467261e8..ef658ad0c075de39107b30a44a5f626cf1181fb2 100644 (file)
@@ -251,7 +251,7 @@ static int sum_sector_overwrites(struct btree_trans *trans,
                 * carefully not advancing past @new and thus whatever leaf node
                 * @_iter currently points to:
                 */
-               BUG_ON(btree_iter_err(old));
+               BUG_ON(bkey_err(old));
 
                if (allocating &&
                    !*allocating &&
@@ -322,10 +322,10 @@ static int bch2_extent_update(struct btree_trans *trans,
        if (i_sectors_delta ||
            new_i_size > inode->ei_inode.bi_size) {
                if (c->opts.new_inode_updates) {
-                       bch2_btree_iter_unlock(extent_iter);
+                       bch2_btree_trans_unlock(trans);
                        mutex_lock(&inode->ei_update_lock);
 
-                       if (!bch2_btree_iter_relock(extent_iter)) {
+                       if (!bch2_btree_trans_relock(trans)) {
                                mutex_unlock(&inode->ei_update_lock);
                                return -EINTR;
                        }
@@ -967,10 +967,11 @@ static void readpage_bio_extend(struct readpages_iter *iter,
        }
 }
 
-static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
+static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
                       struct bch_read_bio *rbio, u64 inum,
                       struct readpages_iter *readpages_iter)
 {
+       struct bch_fs *c = trans->c;
        struct bio *bio = &rbio->bio;
        int flags = BCH_READ_RETRY_IF_STALE|
                BCH_READ_MAY_PROMOTE;
@@ -989,7 +990,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
                BUG_ON(!k.k);
 
                if (IS_ERR(k.k)) {
-                       int ret = bch2_btree_iter_unlock(iter);
+                       int ret = btree_iter_err(iter);
                        BUG_ON(!ret);
                        bcache_io_error(c, bio, "btree IO error %i", ret);
                        bio_endio(bio);
@@ -997,7 +998,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
                }
 
                bkey_reassemble(&tmp.k, k);
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(trans);
                k = bkey_i_to_s_c(&tmp.k);
 
                if (readpages_iter) {
@@ -1044,7 +1045,8 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
        struct bch_inode_info *inode = to_bch_ei(mapping->host);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch_io_opts opts = io_opts(c, inode);
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct page *page;
        struct readpages_iter readpages_iter;
        int ret;
@@ -1052,8 +1054,10 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
        ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
        BUG_ON(ret);
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
-                            BTREE_ITER_SLOTS);
+       bch2_trans_init(&trans, c);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
+                                  BTREE_ITER_SLOTS);
 
        if (current->pagecache_lock != &mapping->add_lock)
                pagecache_add_get(&mapping->add_lock);
@@ -1075,12 +1079,14 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
                rbio->bio.bi_end_io = bch2_readpages_end_io;
                __bio_add_page(&rbio->bio, page, PAGE_SIZE, 0);
 
-               bchfs_read(c, &iter, rbio, inode->v.i_ino, &readpages_iter);
+               bchfs_read(&trans, iter, rbio, inode->v.i_ino,
+                          &readpages_iter);
        }
 
        if (current->pagecache_lock != &mapping->add_lock)
                pagecache_add_put(&mapping->add_lock);
 
+       bch2_trans_exit(&trans);
        kfree(readpages_iter.pages);
 
        return 0;
@@ -1089,16 +1095,21 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
 static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
                             u64 inum, struct page *page)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
 
        page_state_init_for_read(page);
 
        bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
        bio_add_page_contig(&rbio->bio, page);
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
-                            BTREE_ITER_SLOTS);
-       bchfs_read(c, &iter, rbio, inum, NULL);
+       bch2_trans_init(&trans, c);
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
+                                  BTREE_ITER_SLOTS);
+
+       bchfs_read(&trans, iter, rbio, inum, NULL);
+
+       bch2_trans_exit(&trans);
 }
 
 int bch2_readpage(struct file *file, struct page *page)
@@ -2097,7 +2108,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
                                   BTREE_ITER_INTENT);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k)) &&
+              !(ret = bkey_err(k)) &&
               bkey_cmp(iter->pos, end) < 0) {
                struct disk_reservation disk_res =
                        bch2_disk_reservation_init(c, 0);
@@ -2120,7 +2131,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
                if (ret)
                        break;
 
-               bch2_btree_iter_cond_resched(iter);
+               bch2_trans_cond_resched(&trans);
        }
 
        bch2_trans_exit(&trans);
@@ -2132,13 +2143,14 @@ static inline int range_has_data(struct bch_fs *c,
                                  struct bpos start,
                                  struct bpos end)
 {
-
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
-                          start, 0, k) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k) {
                if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
                        break;
 
@@ -2148,7 +2160,7 @@ static inline int range_has_data(struct bch_fs *c,
                }
        }
 
-       return bch2_btree_iter_unlock(&iter) ?: ret;
+       return bch2_trans_exit(&trans) ?: ret;
 }
 
 static int __bch2_truncate_page(struct bch_inode_info *inode,
@@ -2434,14 +2446,14 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
 
                ret = bch2_btree_iter_traverse(dst);
                if (ret)
-                       goto btree_iter_err;
+                       goto bkey_err;
 
                bch2_btree_iter_set_pos(src,
                        POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
 
                k = bch2_btree_iter_peek_slot(src);
-               if ((ret = btree_iter_err(k)))
-                       goto btree_iter_err;
+               if ((ret = bkey_err(k)))
+                       goto bkey_err;
 
                bkey_reassemble(&copy.k, k);
 
@@ -2462,7 +2474,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
                                dst, &copy.k,
                                0, true, true, NULL);
                bch2_disk_reservation_put(c, &disk_res);
-btree_iter_err:
+bkey_err:
                if (ret == -EINTR)
                        ret = 0;
                if (ret)
@@ -2472,7 +2484,7 @@ btree_iter_err:
                 * pointers... which isn't a _super_ serious problem...
                 */
 
-               bch2_btree_iter_cond_resched(src);
+               bch2_trans_cond_resched(&trans);
        }
        bch2_trans_unlock(&trans);
 
@@ -2556,8 +2568,8 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
                struct bkey_s_c k;
 
                k = bch2_btree_iter_peek_slot(iter);
-               if ((ret = btree_iter_err(k)))
-                       goto btree_iter_err;
+               if ((ret = bkey_err(k)))
+                       goto bkey_err;
 
                /* already reserved */
                if (k.k->type == KEY_TYPE_reservation &&
@@ -2588,7 +2600,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
                                        &quota_res,
                                        sectors, true);
                        if (unlikely(ret))
-                               goto btree_iter_err;
+                               goto bkey_err;
                }
 
                if (reservation.v.nr_replicas < replicas ||
@@ -2596,7 +2608,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
                        ret = bch2_disk_reservation_get(c, &disk_res, sectors,
                                                        replicas, 0);
                        if (unlikely(ret))
-                               goto btree_iter_err;
+                               goto bkey_err;
 
                        reservation.v.nr_replicas = disk_res.nr_replicas;
                }
@@ -2605,7 +2617,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
                                &disk_res, &quota_res,
                                iter, &reservation.k_i,
                                0, true, true, NULL);
-btree_iter_err:
+bkey_err:
                bch2_quota_reservation_put(c, inode, &quota_res);
                bch2_disk_reservation_put(c, &disk_res);
                if (ret == -EINTR)
@@ -2710,7 +2722,8 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
 {
        struct bch_inode_info *inode = file_bch_inode(file);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 isize, next_data = MAX_LFS_FILESIZE;
        int ret;
@@ -2719,7 +2732,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
        if (offset >= isize)
                return -ENXIO;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(inode->v.i_ino, offset >> 9), 0, k) {
                if (k.k->p.inode != inode->v.i_ino) {
                        break;
@@ -2730,7 +2745,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
                        break;
        }
 
-       ret = bch2_btree_iter_unlock(&iter);
+       ret = bch2_trans_exit(&trans);
        if (ret)
                return ret;
 
@@ -2780,7 +2795,8 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
 {
        struct bch_inode_info *inode = file_bch_inode(file);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 isize, next_hole = MAX_LFS_FILESIZE;
        int ret;
@@ -2789,7 +2805,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
        if (offset >= isize)
                return -ENXIO;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(inode->v.i_ino, offset >> 9),
                           BTREE_ITER_SLOTS, k) {
                if (k.k->p.inode != inode->v.i_ino) {
@@ -2808,7 +2826,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
                }
        }
 
-       ret = bch2_btree_iter_unlock(&iter);
+       ret = bch2_trans_exit(&trans);
        if (ret)
                return ret;
 
index f05606753dd63b44a56d1abf1410c2fe92806e2c..cc91af0ad64805ec2a57b3b2dcf0581f70f165ac 100644 (file)
@@ -106,7 +106,7 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans,
                                void *p)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_iter *iter = NULL;
        struct bkey_inode_buf *inode_p;
        int ret;
 
@@ -1113,7 +1113,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
 {
        struct bch_fs *c = vinode->i_sb->s_fs_info;
        struct bch_inode_info *ei = to_bch_ei(vinode);
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        BKEY_PADDED(k) tmp;
        bool have_extent = false;
@@ -1122,7 +1123,9 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        if (start + len < start)
                return -EINVAL;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(ei->v.i_ino, start >> 9), 0, k)
                if (bkey_extent_is_data(k.k) ||
                    k.k->type == KEY_TYPE_reservation) {
@@ -1143,7 +1146,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        if (have_extent)
                ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST);
 out:
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
        return ret < 0 ? ret : 0;
 }
 
index fb0cb9a454d6ed3988589c6336c2673ac652701a..5c2329d91bcca83d576f0b2194d0f42721895073 100644 (file)
 
 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
 
-static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 sectors = 0;
+
+       for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
+               if (k.k->p.inode != inum)
+                       break;
+
+               if (bkey_extent_is_allocation(k.k))
+                       sectors += k.k->size;
+       }
+
+       return bch2_trans_iter_free(trans, iter) ?: sectors;
+}
+
+static int remove_dirent(struct btree_trans *trans,
                         struct bkey_s_c_dirent dirent)
 {
+       struct bch_fs *c = trans->c;
        struct qstr name;
        struct bch_inode_unpacked dir_inode;
        struct bch_hash_info dir_hash_info;
@@ -34,8 +52,8 @@ static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
        buf[name.len] = '\0';
        name.name = buf;
 
-       /* Unlock iter so we don't deadlock, after copying name: */
-       bch2_btree_iter_unlock(iter);
+       /* Unlock so we don't deadlock, after copying name: */
+       bch2_btree_trans_unlock(trans);
 
        ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
        if (ret) {
@@ -125,29 +143,33 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
 
 struct hash_check {
        struct bch_hash_info    info;
-       struct btree_trans      *trans;
 
        /* start of current chain of hash collisions: */
        struct btree_iter       *chain;
 
        /* next offset in current chain of hash collisions: */
-       u64                     next;
+       u64                     chain_end;
 };
 
-static void hash_check_init(const struct bch_hash_desc desc,
-                           struct btree_trans *trans,
+static void hash_check_init(struct hash_check *h)
+{
+       h->chain = NULL;
+}
+
+static void hash_stop_chain(struct btree_trans *trans,
                            struct hash_check *h)
 {
-       h->trans = trans;
-       h->chain = bch2_trans_get_iter(trans, desc.btree_id, POS_MIN, 0);
-       h->next = -1;
+       if (h->chain)
+               bch2_trans_iter_free(trans, h->chain);
+       h->chain = NULL;
 }
 
-static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
+static void hash_check_set_inode(struct btree_trans *trans,
+                                struct hash_check *h,
                                 const struct bch_inode_unpacked *bi)
 {
-       h->info = bch2_hash_info_init(c, bi);
-       h->next = -1;
+       h->info = bch2_hash_info_init(trans->c, bi);
+       hash_stop_chain(trans, h);
 }
 
 static int hash_redo_key(const struct bch_hash_desc desc,
@@ -168,8 +190,6 @@ static int hash_redo_key(const struct bch_hash_desc desc,
        if (ret)
                goto err;
 
-       bch2_btree_iter_unlock(k_iter);
-
        bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
                      tmp, BCH_HASH_SET_MUST_CREATE);
        ret = bch2_trans_commit(trans, NULL, NULL,
@@ -180,44 +200,32 @@ err:
        return ret;
 }
 
-/* fsck hasn't been converted to new transactions yet: */
-static int fsck_hash_delete_at(const struct bch_hash_desc desc,
+static int fsck_hash_delete_at(struct btree_trans *trans,
+                              const struct bch_hash_desc desc,
                               struct bch_hash_info *info,
-                              struct btree_iter *orig_iter)
+                              struct btree_iter *iter)
 {
-       struct btree_trans trans;
-       struct btree_iter *iter;
        int ret;
-
-       bch2_btree_iter_unlock(orig_iter);
-
-       bch2_trans_init(&trans, orig_iter->c);
 retry:
-       bch2_trans_begin(&trans);
-
-       iter = bch2_trans_copy_iter(&trans, orig_iter);
-       if (IS_ERR(iter)) {
-               ret = PTR_ERR(iter);
-               goto err;
-       }
-
-       ret   = bch2_hash_delete_at(&trans, desc, info, iter) ?:
-               bch2_trans_commit(&trans, NULL, NULL,
+       ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
+               bch2_trans_commit(trans, NULL, NULL,
                                  BTREE_INSERT_ATOMIC|
                                  BTREE_INSERT_NOFAIL|
                                  BTREE_INSERT_LAZY_RW);
-err:
-       if (ret == -EINTR)
-               goto retry;
+       if (ret == -EINTR) {
+               ret = bch2_btree_iter_traverse(iter);
+               if (!ret)
+                       goto retry;
+       }
 
-       bch2_trans_exit(&trans);
        return ret;
 }
 
-static int hash_check_duplicates(const struct bch_hash_desc desc,
-                                struct hash_check *h, struct bch_fs *c,
-                                struct btree_iter *k_iter, struct bkey_s_c k)
+static int hash_check_duplicates(struct btree_trans *trans,
+                       const struct bch_hash_desc desc, struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
 {
+       struct bch_fs *c = trans->c;
        struct btree_iter *iter;
        struct bkey_s_c k2;
        char buf[200];
@@ -226,7 +234,7 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
        if (!bkey_cmp(h->chain->pos, k_iter->pos))
                return 0;
 
-       iter = bch2_trans_copy_iter(h->trans, h->chain);
+       iter = bch2_trans_copy_iter(trans, h->chain);
        BUG_ON(IS_ERR(iter));
 
        for_each_btree_key_continue(iter, 0, k2) {
@@ -238,7 +246,7 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
                                "duplicate hash table keys:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
                                                       k), buf))) {
-                       ret = fsck_hash_delete_at(desc, &h->info, k_iter);
+                       ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
                        if (ret)
                                return ret;
                        ret = 1;
@@ -246,23 +254,39 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
                }
        }
 fsck_err:
-       bch2_trans_iter_free(h->trans, iter);
+       bch2_trans_iter_free(trans, iter);
        return ret;
 }
 
-static bool key_has_correct_hash(const struct bch_hash_desc desc,
-                                struct hash_check *h, struct bch_fs *c,
-                                struct btree_iter *k_iter, struct bkey_s_c k)
+static void hash_set_chain_start(struct btree_trans *trans,
+                       const struct bch_hash_desc desc,
+                       struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
 {
-       u64 hash;
+       bool hole = (k.k->type != KEY_TYPE_whiteout &&
+                    k.k->type != desc.key_type);
 
-       if (k.k->type != KEY_TYPE_whiteout &&
-           k.k->type != desc.key_type)
-               return true;
+       if (hole || k.k->p.offset > h->chain_end + 1)
+               hash_stop_chain(trans, h);
+
+       if (!hole) {
+               if (!h->chain) {
+                       h->chain = bch2_trans_copy_iter(trans, k_iter);
+                       BUG_ON(IS_ERR(h->chain));
+               }
+
+               h->chain_end = k.k->p.offset;
+       }
+}
+
+static bool key_has_correct_hash(struct btree_trans *trans,
+                       const struct bch_hash_desc desc,
+                       struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
+{
+       u64 hash;
 
-       if (k.k->p.offset != h->next)
-               bch2_btree_iter_copy(h->chain, k_iter);
-       h->next = k.k->p.offset + 1;
+       hash_set_chain_start(trans, desc, h, k_iter, k);
 
        if (k.k->type != desc.key_type)
                return true;
@@ -273,22 +297,16 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc,
                hash <= k.k->p.offset;
 }
 
-static int hash_check_key(const struct bch_hash_desc desc,
-                         struct btree_trans *trans, struct hash_check *h,
-                         struct btree_iter *k_iter, struct bkey_s_c k)
+static int hash_check_key(struct btree_trans *trans,
+                       const struct bch_hash_desc desc, struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
 {
        struct bch_fs *c = trans->c;
        char buf[200];
        u64 hashed;
        int ret = 0;
 
-       if (k.k->type != KEY_TYPE_whiteout &&
-           k.k->type != desc.key_type)
-               return 0;
-
-       if (k.k->p.offset != h->next)
-               bch2_btree_iter_copy(h->chain, k_iter);
-       h->next = k.k->p.offset + 1;
+       hash_set_chain_start(trans, desc, h, k_iter, k);
 
        if (k.k->type != desc.key_type)
                return 0;
@@ -311,7 +329,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
                return 1;
        }
 
-       ret = hash_check_duplicates(desc, h, c, k_iter, k);
+       ret = hash_check_duplicates(trans, desc, h, k_iter, k);
 fsck_err:
        return ret;
 }
@@ -326,7 +344,7 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
        unsigned len;
        u64 hash;
 
-       if (key_has_correct_hash(bch2_dirent_hash_desc, h, c, iter, *k))
+       if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
                return 0;
 
        len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
@@ -416,14 +434,17 @@ noinline_for_stack
 static int check_extents(struct bch_fs *c)
 {
        struct inode_walker w = inode_walker_init();
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 i_sectors;
        int ret = 0;
 
+       bch2_trans_init(&trans, c);
+
        bch_verbose(c, "checking extents");
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(BCACHEFS_ROOT_INO, 0), 0, k) {
                ret = walk_inode(c, &w, k.k->p.inode);
                if (ret)
@@ -436,7 +457,7 @@ static int check_extents(struct bch_fs *c)
                        !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
                        "extent type %u for non regular file, inode %llu mode %o",
                        k.k->type, k.k->p.inode, w.inode.bi_mode)) {
-                       bch2_btree_iter_unlock(&iter);
+                       bch2_trans_unlock(&trans);
 
                        ret = bch2_inode_truncate(c, k.k->p.inode, 0);
                        if (ret)
@@ -448,14 +469,14 @@ static int check_extents(struct bch_fs *c)
                        w.have_inode &&
                        !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
                        w.inode.bi_sectors !=
-                       (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)),
+                       (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
                        c, "i_sectors wrong: got %llu, should be %llu",
                        w.inode.bi_sectors, i_sectors)) {
                        struct bkey_inode_buf p;
 
                        w.inode.bi_sectors = i_sectors;
 
-                       bch2_btree_iter_unlock(&iter);
+                       bch2_trans_unlock(&trans);
 
                        bch2_inode_pack(&p, &w.inode);
 
@@ -469,7 +490,7 @@ static int check_extents(struct bch_fs *c)
                        }
 
                        /* revalidate iterator: */
-                       k = bch2_btree_iter_peek(&iter);
+                       k = bch2_btree_iter_peek(iter);
                }
 
                if (fsck_err_on(w.have_inode &&
@@ -478,7 +499,7 @@ static int check_extents(struct bch_fs *c)
                        k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
                        "extent type %u offset %llu past end of inode %llu, i_size %llu",
                        k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
-                       bch2_btree_iter_unlock(&iter);
+                       bch2_trans_unlock(&trans);
 
                        ret = bch2_inode_truncate(c, k.k->p.inode,
                                                  w.inode.bi_size);
@@ -489,7 +510,7 @@ static int check_extents(struct bch_fs *c)
        }
 err:
 fsck_err:
-       return bch2_btree_iter_unlock(&iter) ?: ret;
+       return bch2_trans_exit(&trans) ?: ret;
 }
 
 /*
@@ -517,7 +538,7 @@ static int check_dirents(struct bch_fs *c)
        iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
                                   POS(BCACHEFS_ROOT_INO, 0), 0);
 
-       hash_check_init(bch2_dirent_hash_desc, &trans, &h);
+       hash_check_init(&h);
 
        for_each_btree_key_continue(iter, 0, k) {
                struct bkey_s_c_dirent d;
@@ -545,7 +566,7 @@ static int check_dirents(struct bch_fs *c)
                }
 
                if (w.first_this_inode && w.have_inode)
-                       hash_check_set_inode(&h, c, &w.inode);
+                       hash_check_set_inode(&trans, &h, &w.inode);
 
                ret = check_dirent_hash(&trans, &h, iter, &k);
                if (ret > 0) {
@@ -578,7 +599,7 @@ static int check_dirents(struct bch_fs *c)
                                ".. dirent") ||
                    fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
                                "dirent name has invalid chars")) {
-                       ret = remove_dirent(c, iter, d);
+                       ret = remove_dirent(&trans, d);
                        if (ret)
                                goto err;
                        continue;
@@ -588,7 +609,7 @@ static int check_dirents(struct bch_fs *c)
                                "dirent points to own directory:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
                                                       k), buf))) {
-                       ret = remove_dirent(c, iter, d);
+                       ret = remove_dirent(&trans, d);
                        if (ret)
                                goto err;
                        continue;
@@ -605,7 +626,7 @@ static int check_dirents(struct bch_fs *c)
                                "dirent points to missing inode:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
                                                       k), buf))) {
-                       ret = remove_dirent(c, iter, d);
+                       ret = remove_dirent(&trans, d);
                        if (ret)
                                goto err;
                        continue;
@@ -641,6 +662,8 @@ static int check_dirents(struct bch_fs *c)
 
                }
        }
+
+       hash_stop_chain(&trans, &h);
 err:
 fsck_err:
        return bch2_trans_exit(&trans) ?: ret;
@@ -668,7 +691,7 @@ static int check_xattrs(struct bch_fs *c)
        iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
                                   POS(BCACHEFS_ROOT_INO, 0), 0);
 
-       hash_check_init(bch2_xattr_hash_desc, &trans, &h);
+       hash_check_init(&h);
 
        for_each_btree_key_continue(iter, 0, k) {
                ret = walk_inode(c, &w, k.k->p.inode);
@@ -685,9 +708,10 @@ static int check_xattrs(struct bch_fs *c)
                }
 
                if (w.first_this_inode && w.have_inode)
-                       hash_check_set_inode(&h, c, &w.inode);
+                       hash_check_set_inode(&trans, &h, &w.inode);
 
-               ret = hash_check_key(bch2_xattr_hash_desc, &trans, &h, iter, k);
+               ret = hash_check_key(&trans, bch2_xattr_hash_desc,
+                                    &h, iter, k);
                if (ret)
                        goto fsck_err;
        }
@@ -862,13 +886,16 @@ static int check_directory_structure(struct bch_fs *c,
        struct inode_bitmap dirs_done = { NULL, 0 };
        struct pathbuf path = { 0, 0, NULL };
        struct pathbuf_entry *e;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent dirent;
        bool had_unreachable;
        u64 d_inum;
        int ret = 0;
 
+       bch2_trans_init(&trans, c);
+
        bch_verbose(c, "checking directory structure");
 
        /* DFS: */
@@ -893,7 +920,7 @@ next:
                if (e->offset == U64_MAX)
                        goto up;
 
-               for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
+               for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
                                   POS(e->inum, e->offset + 1), 0, k) {
                        if (k.k->p.inode != e->inum)
                                break;
@@ -913,7 +940,7 @@ next:
                        if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
                                        "directory %llu has multiple hardlinks",
                                        d_inum)) {
-                               ret = remove_dirent(c, &iter, dirent);
+                               ret = remove_dirent(&trans, dirent);
                                if (ret)
                                        goto err;
                                continue;
@@ -930,10 +957,14 @@ next:
                                goto err;
                        }
 
-                       bch2_btree_iter_unlock(&iter);
+                       ret = bch2_trans_iter_free(&trans, iter);
+                       if (ret) {
+                               bch_err(c, "btree error %i in fsck", ret);
+                               goto err;
+                       }
                        goto next;
                }
-               ret = bch2_btree_iter_unlock(&iter);
+               ret = bch2_trans_iter_free(&trans, iter);
                if (ret) {
                        bch_err(c, "btree error %i in fsck", ret);
                        goto err;
@@ -942,7 +973,7 @@ up:
                path.nr--;
        }
 
-       for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
+       for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k) {
                if (k.k->type != KEY_TYPE_inode)
                        continue;
 
@@ -955,7 +986,7 @@ up:
                if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
                                "unreachable directory found (inum %llu)",
                                k.k->p.inode)) {
-                       bch2_btree_iter_unlock(&iter);
+                       bch2_btree_trans_unlock(&trans);
 
                        ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
                        if (ret) {
@@ -965,7 +996,7 @@ up:
                        had_unreachable = true;
                }
        }
-       ret = bch2_btree_iter_unlock(&iter);
+       ret = bch2_trans_iter_free(&trans, iter);
        if (ret)
                goto err;
 
@@ -984,7 +1015,7 @@ out:
        return ret;
 err:
 fsck_err:
-       ret = bch2_btree_iter_unlock(&iter) ?: ret;
+       ret = bch2_trans_exit(&trans) ?: ret;
        goto out;
 }
 
@@ -1021,15 +1052,18 @@ noinline_for_stack
 static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
                               u64 range_start, u64 *range_end)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent d;
        u64 d_inum;
        int ret;
 
+       bch2_trans_init(&trans, c);
+
        inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
                switch (k.k->type) {
                case KEY_TYPE_dirent:
                        d = bkey_s_c_to_dirent(k);
@@ -1045,32 +1079,15 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
                        break;
                }
 
-               bch2_btree_iter_cond_resched(&iter);
+               bch2_trans_cond_resched(&trans);
        }
-       ret = bch2_btree_iter_unlock(&iter);
+       ret = bch2_trans_exit(&trans);
        if (ret)
                bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
 
        return ret;
 }
 
-s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
-{
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       u64 sectors = 0;
-
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
-               if (k.k->p.inode != inum)
-                       break;
-
-               if (bkey_extent_is_allocation(k.k))
-                       sectors += k.k->size;
-       }
-
-       return bch2_btree_iter_unlock(&iter) ?: sectors;
-}
-
 static int check_inode_nlink(struct bch_fs *c,
                             struct bch_inode_unpacked *lostfound_inode,
                             struct bch_inode_unpacked *u,
@@ -1184,6 +1201,9 @@ static int check_inode(struct btree_trans *trans,
        int ret = 0;
 
        ret = bch2_inode_unpack(inode, &u);
+
+       bch2_btree_trans_unlock(trans);
+
        if (bch2_fs_inconsistent_on(ret, c,
                         "error unpacking inode %llu in fsck",
                         inode.k->p.inode))
@@ -1252,7 +1272,7 @@ static int check_inode(struct btree_trans *trans,
                bch_verbose(c, "recounting sectors for inode %llu",
                            u.bi_inum);
 
-               sectors = bch2_count_inode_sectors(c, u.bi_inum);
+               sectors = bch2_count_inode_sectors(trans, u.bi_inum);
                if (sectors < 0) {
                        bch_err(c, "error in fs gc: error %i "
                                "recounting inode sectors",
@@ -1303,7 +1323,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
        nlinks_iter = genradix_iter_init(links, 0);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret2 = btree_iter_err(k))) {
+              !(ret2 = bkey_err(k))) {
 peek_nlinks:   link = genradix_iter_peek(&nlinks_iter, links);
 
                if (!link && (!k.k || iter->pos.inode >= range_end))
@@ -1323,12 +1343,6 @@ peek_nlinks:     link = genradix_iter_peek(&nlinks_iter, links);
                        link = &zero_links;
 
                if (k.k && k.k->type == KEY_TYPE_inode) {
-                       /*
-                        * Avoid potential deadlocks with iter for
-                        * truncate/rm/etc.:
-                        */
-                       bch2_btree_iter_unlock(iter);
-
                        ret = check_inode(&trans, lostfound_inode, iter,
                                          bkey_s_c_to_inode(k), link);
                        BUG_ON(ret == -EINTR);
@@ -1345,7 +1359,7 @@ peek_nlinks:      link = genradix_iter_peek(&nlinks_iter, links);
                        genradix_iter_advance(&nlinks_iter, links);
 
                bch2_btree_iter_next(iter);
-               bch2_btree_iter_cond_resched(iter);
+               bch2_trans_cond_resched(&trans);
        }
 fsck_err:
        bch2_trans_exit(&trans);
@@ -1399,7 +1413,7 @@ static int check_inodes_fast(struct bch_fs *c)
        struct btree_iter *iter;
        struct bkey_s_c k;
        struct bkey_s_c_inode inode;
-       int ret = 0;
+       int ret = 0, ret2;
 
        bch2_trans_init(&trans, c);
 
@@ -1423,12 +1437,9 @@ static int check_inodes_fast(struct bch_fs *c)
                }
        }
 
-       if (!ret)
-               ret = bch2_btree_iter_unlock(iter);
-
-       bch2_trans_exit(&trans);
+       ret2 = bch2_trans_exit(&trans);
 
-       return ret;
+       return ret ?: ret2;
 }
 
 /*
index bc9caaf237978353f75d4aabb0d3670e07f997de..dc7ce68769fc0e1cb936f84212ad265259a3174c 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef _BCACHEFS_FSCK_H
 #define _BCACHEFS_FSCK_H
 
-s64 bch2_count_inode_sectors(struct bch_fs *, u64);
 int bch2_fsck(struct bch_fs *);
 
 #endif /* _BCACHEFS_FSCK_H */
index a555a8afd27ed78f7fd1ebcd83b4ed7d1269ea75..58d58cc4785e4142f4ecfd7f87224d5d66e72669 100644 (file)
@@ -324,7 +324,7 @@ again:
        while (1) {
                struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
 
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        return ret;
 
@@ -400,7 +400,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
                struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
                u32 bi_generation = 0;
 
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        break;
 
@@ -448,13 +448,15 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
 int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
                            struct bch_inode_unpacked *inode)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret = -ENOENT;
 
-       for_each_btree_key(&iter, c, BTREE_ID_INODES,
-                          POS(inode_nr, 0),
-                          BTREE_ITER_SLOTS, k) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_INODES,
+                          POS(inode_nr, 0), BTREE_ITER_SLOTS, k) {
                switch (k.k->type) {
                case KEY_TYPE_inode:
                        ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
@@ -467,7 +469,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
                break;
        }
 
-       return bch2_btree_iter_unlock(&iter) ?: ret;
+       return bch2_trans_exit(&trans) ?: ret;
 }
 
 #ifdef CONFIG_BCACHEFS_DEBUG
index 11b927e64a5ddb7e475702d168730913a28745fe..cc8a3c51a4bb42daa0adc7b277ca9ae57e078a8e 100644 (file)
@@ -1245,27 +1245,28 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
                                     struct bch_io_failures *failed,
                                     unsigned flags)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        BKEY_PADDED(k) tmp;
        struct bkey_s_c k;
        int ret;
 
        flags &= ~BCH_READ_LAST_FRAGMENT;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
-                            rbio->pos, BTREE_ITER_SLOTS);
+       bch2_trans_init(&trans, c);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  rbio->pos, BTREE_ITER_SLOTS);
 retry:
        rbio->bio.bi_status = 0;
 
-       k = bch2_btree_iter_peek_slot(&iter);
-       if (btree_iter_err(k)) {
-               bch2_btree_iter_unlock(&iter);
+       k = bch2_btree_iter_peek_slot(iter);
+       if (bkey_err(k))
                goto err;
-       }
 
        bkey_reassemble(&tmp.k, k);
        k = bkey_i_to_s_c(&tmp.k);
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_unlock(&trans);
 
        if (!bkey_extent_is_data(k.k) ||
            !bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k),
@@ -1282,25 +1283,30 @@ retry:
                goto retry;
        if (ret)
                goto err;
-       goto out;
-err:
-       rbio->bio.bi_status = BLK_STS_IOERR;
 out:
        bch2_rbio_done(rbio);
+       bch2_trans_exit(&trans);
+       return;
+err:
+       rbio->bio.bi_status = BLK_STS_IOERR;
+       goto out;
 }
 
 static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
                            struct bvec_iter bvec_iter, u64 inode,
                            struct bch_io_failures *failed, unsigned flags)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret;
 
+       bch2_trans_init(&trans, c);
+
        flags &= ~BCH_READ_LAST_FRAGMENT;
        flags |= BCH_READ_MUST_CLONE;
 retry:
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(inode, bvec_iter.bi_sector),
                           BTREE_ITER_SLOTS, k) {
                BKEY_PADDED(k) tmp;
@@ -1308,7 +1314,7 @@ retry:
 
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_btree_iter_unlock(&iter);
+               bch2_btree_trans_unlock(&trans);
 
                bytes = min_t(unsigned, bvec_iter.bi_size,
                              (k.k->p.offset - bvec_iter.bi_sector) << 9);
@@ -1333,12 +1339,12 @@ retry:
         * If we get here, it better have been because there was an error
         * reading a btree node
         */
-       ret = bch2_btree_iter_unlock(&iter);
-       BUG_ON(!ret);
-       __bcache_io_error(c, "btree IO error %i", ret);
+       BUG_ON(!btree_iter_err(iter));
+       __bcache_io_error(c, "btree IO error");
 err:
        rbio->bio.bi_status = BLK_STS_IOERR;
 out:
+       bch2_trans_exit(&trans);
        bch2_rbio_done(rbio);
 }
 
@@ -1834,12 +1840,14 @@ out_read_done:
 
 void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        unsigned flags = BCH_READ_RETRY_IF_STALE|
                BCH_READ_MAY_PROMOTE|
                BCH_READ_USER_MAPPED;
-       int ret;
+
+       bch2_trans_init(&trans, c);
 
        BUG_ON(rbio->_state);
        BUG_ON(flags & BCH_READ_NODECODE);
@@ -1848,7 +1856,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
        rbio->c = c;
        rbio->start_time = local_clock();
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(inode, rbio->bio.bi_iter.bi_sector),
                           BTREE_ITER_SLOTS, k) {
                BKEY_PADDED(k) tmp;
@@ -1860,7 +1868,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
                 */
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_btree_iter_unlock(&iter);
+               bch2_btree_trans_unlock(&trans);
 
                bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size,
                              (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9);
@@ -1882,9 +1890,10 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
         * If we get here, it better have been because there was an error
         * reading a btree node
         */
-       ret = bch2_btree_iter_unlock(&iter);
-       BUG_ON(!ret);
-       bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+       BUG_ON(!btree_iter_err(iter));
+       bcache_io_error(c, &rbio->bio, "btree IO error");
+
+       bch2_trans_exit(&trans);
        bch2_rbio_done(rbio);
 }
 
index dd0e8d2fce991d88876459cfeab1930ec128c30b..5bac41cf437c0afd520948b53f0c60275b9e7883 100644 (file)
@@ -61,9 +61,12 @@ static void journal_seq_blacklist_flush(struct journal *j,
        closure_init_stack(&cl);
 
        for (i = 0;; i++) {
-               struct btree_iter iter;
+               struct btree_trans trans;
+               struct btree_iter *iter;
                struct btree *b;
 
+               bch2_trans_init(&trans, c);
+
                mutex_lock(&j->blacklist_lock);
                if (i >= bl->nr_entries) {
                        mutex_unlock(&j->blacklist_lock);
@@ -72,17 +75,17 @@ static void journal_seq_blacklist_flush(struct journal *j,
                n = bl->entries[i];
                mutex_unlock(&j->blacklist_lock);
 
-               __bch2_btree_iter_init(&iter, c, n.btree_id, n.pos,
-                                      0, 0, BTREE_ITER_NODES);
+               iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos,
+                                               0, 0, 0);
 
-               b = bch2_btree_iter_peek_node(&iter);
+               b = bch2_btree_iter_peek_node(iter);
 
                /* The node might have already been rewritten: */
 
                if (b->data->keys.seq == n.seq) {
-                       ret = bch2_btree_node_rewrite(c, &iter, n.seq, 0);
+                       ret = bch2_btree_node_rewrite(c, iter, n.seq, 0);
                        if (ret) {
-                               bch2_btree_iter_unlock(&iter);
+                               bch2_trans_exit(&trans);
                                bch2_fs_fatal_error(c,
                                        "error %i rewriting btree node with blacklisted journal seq",
                                        ret);
@@ -91,7 +94,7 @@ static void journal_seq_blacklist_flush(struct journal *j,
                        }
                }
 
-               bch2_btree_iter_unlock(&iter);
+               bch2_trans_exit(&trans);
        }
 
        for (i = 0;; i++) {
index 58d7d3a30cd539e5eeeda9d8fd871613c2c447c6..88761d34dc65d4216b577df11446c4c729065bcc 100644 (file)
@@ -51,7 +51,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k))) {
+              !(ret = bkey_err(k))) {
                if (!bkey_extent_is_data(k.k) ||
                    !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
                        ret = bch2_mark_bkey_replicas(c, k);
@@ -105,7 +105,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 
 static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct closure cl;
        struct btree *b;
        unsigned id;
@@ -115,13 +116,15 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        if (flags & BCH_FORCE_IF_METADATA_LOST)
                return -EINVAL;
 
+       bch2_trans_init(&trans, c);
        closure_init_stack(&cl);
 
        mutex_lock(&c->replicas_gc_lock);
        bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
 
        for (id = 0; id < BTREE_ID_NR; id++) {
-               for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
+               for_each_btree_node(&trans, iter, id, POS_MIN,
+                                   BTREE_ITER_PREFETCH, b) {
                        __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
                        struct bkey_i_btree_ptr *new_key;
 retry:
@@ -133,7 +136,7 @@ retry:
                                 * but got -EINTR after upgrading the iter, but
                                 * then raced and the node is now gone:
                                 */
-                               bch2_btree_iter_downgrade(&iter);
+                               bch2_btree_iter_downgrade(iter);
 
                                ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
                                if (ret)
@@ -147,16 +150,16 @@ retry:
                                if (ret)
                                        goto err;
 
-                               ret = bch2_btree_node_update_key(c, &iter, b, new_key);
+                               ret = bch2_btree_node_update_key(c, iter, b, new_key);
                                if (ret == -EINTR) {
-                                       b = bch2_btree_iter_peek_node(&iter);
+                                       b = bch2_btree_iter_peek_node(iter);
                                        goto retry;
                                }
                                if (ret)
                                        goto err;
                        }
                }
-               bch2_btree_iter_unlock(&iter);
+               bch2_trans_iter_free(&trans, iter);
        }
 
        /* flush relevant btree updates */
@@ -170,14 +173,13 @@ retry:
        }
 
        ret = 0;
-out:
+err:
+       bch2_trans_exit(&trans);
+
        ret = bch2_replicas_gc_end(c, ret);
        mutex_unlock(&c->replicas_gc_lock);
 
        return ret;
-err:
-       bch2_btree_iter_unlock(&iter);
-       goto out;
 }
 
 int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
index 3315bedcabfd40841efd38f190f6392082f5e15c..1e7448ba7529cb322a199a31b7e800593c6b2abf 100644 (file)
@@ -77,7 +77,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                bool did_work = false;
                int nr;
 
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        break;
 
@@ -486,6 +486,8 @@ int bch2_move_data(struct bch_fs *c,
        struct moving_context ctxt = { .stats = stats };
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        BKEY_PADDED(k) tmp;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        struct data_opts data_opts;
        enum data_cmd data_cmd;
@@ -496,9 +498,14 @@ int bch2_move_data(struct bch_fs *c,
        INIT_LIST_HEAD(&ctxt.reads);
        init_waitqueue_head(&ctxt.wait);
 
+       bch2_trans_init(&trans, c);
+
        stats->data_type = BCH_DATA_USER;
-       bch2_btree_iter_init(&stats->iter, c, BTREE_ID_EXTENTS, start,
-                            BTREE_ITER_PREFETCH);
+       stats->btree_id = BTREE_ID_EXTENTS;
+       stats->pos      = POS_MIN;
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
+                                  BTREE_ITER_PREFETCH);
 
        if (rate)
                bch2_ratelimit_reset(rate);
@@ -508,7 +515,7 @@ int bch2_move_data(struct bch_fs *c,
                        delay = rate ? bch2_ratelimit_delay(rate) : 0;
 
                        if (delay) {
-                               bch2_btree_iter_unlock(&stats->iter);
+                               bch2_trans_unlock(&trans);
                                set_current_state(TASK_INTERRUPTIBLE);
                        }
 
@@ -521,16 +528,19 @@ int bch2_move_data(struct bch_fs *c,
                                schedule_timeout(delay);
 
                        if (unlikely(freezing(current))) {
-                               bch2_btree_iter_unlock(&stats->iter);
+                               bch2_trans_unlock(&trans);
                                move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
                                try_to_freeze();
                        }
                } while (delay);
 peek:
-               k = bch2_btree_iter_peek(&stats->iter);
+               k = bch2_btree_iter_peek(iter);
+
+               stats->pos = iter->pos;
+
                if (!k.k)
                        break;
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        break;
                if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
@@ -543,7 +553,7 @@ peek:
                        struct bch_inode_unpacked inode;
 
                        /* don't hold btree locks while looking up inode: */
-                       bch2_btree_iter_unlock(&stats->iter);
+                       bch2_trans_unlock(&trans);
 
                        io_opts = bch2_opts_to_inode_opts(c->opts);
                        if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
@@ -568,7 +578,7 @@ peek:
                /* unlock before doing IO: */
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_btree_iter_unlock(&stats->iter);
+               bch2_trans_unlock(&trans);
 
                ret2 = bch2_move_extent(c, &ctxt, wp, io_opts,
                                        bkey_s_c_to_extent(k),
@@ -590,11 +600,11 @@ next:
                atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k),
                             &stats->sectors_seen);
 next_nondata:
-               bch2_btree_iter_next(&stats->iter);
-               bch2_btree_iter_cond_resched(&stats->iter);
+               bch2_btree_iter_next(iter);
+               bch2_trans_cond_resched(&trans);
        }
 out:
-       bch2_btree_iter_unlock(&stats->iter);
+       bch2_trans_exit(&trans);
 
        move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
        closure_sync(&ctxt.cl);
@@ -610,20 +620,23 @@ out:
 
 static int bch2_gc_data_replicas(struct bch_fs *c)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret;
 
+       bch2_trans_init(&trans, c);
+
        mutex_lock(&c->replicas_gc_lock);
        bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
                           BTREE_ITER_PREFETCH, k) {
                ret = bch2_mark_bkey_replicas(c, k);
                if (ret)
                        break;
        }
-       ret = bch2_btree_iter_unlock(&iter) ?: ret;
+       ret = bch2_trans_exit(&trans) ?: ret;
 
        bch2_replicas_gc_end(c, ret);
        mutex_unlock(&c->replicas_gc_lock);
@@ -633,24 +646,30 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
 
 static int bch2_gc_btree_replicas(struct bch_fs *c)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct btree *b;
        unsigned id;
        int ret = 0;
 
+       bch2_trans_init(&trans, c);
+
        mutex_lock(&c->replicas_gc_lock);
        bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
 
        for (id = 0; id < BTREE_ID_NR; id++) {
-               for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
+               for_each_btree_node(&trans, iter, id, POS_MIN,
+                                   BTREE_ITER_PREFETCH, b) {
                        ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
 
-                       bch2_btree_iter_cond_resched(&iter);
+                       bch2_trans_cond_resched(&trans);
                }
 
-               ret = bch2_btree_iter_unlock(&iter) ?: ret;
+               ret = bch2_trans_iter_free(&trans, iter) ?: ret;
        }
 
+       bch2_trans_exit(&trans);
+
        bch2_replicas_gc_end(c, ret);
        mutex_unlock(&c->replicas_gc_lock);
 
@@ -663,16 +682,25 @@ static int bch2_move_btree(struct bch_fs *c,
                           struct bch_move_stats *stats)
 {
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct btree *b;
        unsigned id;
        struct data_opts data_opts;
        enum data_cmd cmd;
        int ret = 0;
 
+       bch2_trans_init(&trans, c);
+
        stats->data_type = BCH_DATA_BTREE;
 
        for (id = 0; id < BTREE_ID_NR; id++) {
-               for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
+               stats->btree_id = id;
+
+               for_each_btree_node(&trans, iter, id, POS_MIN,
+                                   BTREE_ITER_PREFETCH, b) {
+                       stats->pos = iter->pos;
+
                        switch ((cmd = pred(c, arg,
                                            bkey_i_to_s_c(&b->key),
                                            &io_opts, &data_opts))) {
@@ -687,15 +715,17 @@ static int bch2_move_btree(struct bch_fs *c,
                                BUG();
                        }
 
-                       ret = bch2_btree_node_rewrite(c, &stats->iter,
+                       ret = bch2_btree_node_rewrite(c, iter,
                                        b->data->keys.seq, 0) ?: ret;
 next:
-                       bch2_btree_iter_cond_resched(&stats->iter);
+                       bch2_trans_cond_resched(&trans);
                }
 
-               ret = bch2_btree_iter_unlock(&stats->iter) ?: ret;
+               ret = bch2_trans_iter_free(&trans, iter) ?: ret;
        }
 
+       bch2_trans_exit(&trans);
+
        return ret;
 }
 
index 832542a879ab4292950ff39041e1b0038b6fe800..7703ce43dce932dd4bd9f426d13cdc6328b1df45 100644 (file)
@@ -3,7 +3,8 @@
 
 struct bch_move_stats {
        enum bch_data_type      data_type;
-       struct btree_iter       iter;
+       enum btree_id           btree_id;
+       struct bpos             pos;
 
        atomic64_t              keys_moved;
        atomic64_t              sectors_moved;
index 6606e85cc8dec57048bc885f63778c5bf862f9ae..b988a56599b7f1dccaf5c9ed1ef1d730e2570f18 100644 (file)
@@ -355,11 +355,14 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
 
 static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_QUOTAS, POS(type, 0),
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
                           BTREE_ITER_PREFETCH, k) {
                if (k.k->p.inode != type)
                        break;
@@ -369,7 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
                        break;
        }
 
-       return bch2_btree_iter_unlock(&iter) ?: ret;
+       return bch2_trans_exit(&trans) ?: ret;
 }
 
 void bch2_fs_quota_exit(struct bch_fs *c)
@@ -413,7 +416,8 @@ int bch2_fs_quota_read(struct bch_fs *c)
 {
        unsigned i, qtypes = enabled_qtypes(c);
        struct bch_memquota_type *q;
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bch_inode_unpacked u;
        struct bkey_s_c k;
        int ret;
@@ -428,7 +432,9 @@ int bch2_fs_quota_read(struct bch_fs *c)
                        return ret;
        }
 
-       for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN,
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
                           BTREE_ITER_PREFETCH, k) {
                switch (k.k->type) {
                case KEY_TYPE_inode:
@@ -442,7 +448,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
                                        KEY_TYPE_QUOTA_NOCHECK);
                }
        }
-       return bch2_btree_iter_unlock(&iter) ?: ret;
+       return bch2_trans_exit(&trans) ?: ret;
 }
 
 /* Enable/disable/delete quotas for an entire filesystem: */
@@ -725,7 +731,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
        k = bch2_btree_iter_peek_slot(iter);
 
-       ret = btree_iter_err(k);
+       ret = bkey_err(k);
        if (unlikely(ret))
                return ret;
 
index 768c02845daf8311b5a7eeb7492087d394201d17..d7698451f1ae9d23def458b425efdd03fe77f3ac 100644 (file)
@@ -288,8 +288,8 @@ ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf)
        case REBALANCE_RUNNING:
                pr_buf(&out, "running\n");
                pr_buf(&out, "pos %llu:%llu\n",
-                      r->move_stats.iter.pos.inode,
-                      r->move_stats.iter.pos.offset);
+                      r->move_stats.pos.inode,
+                      r->move_stats.pos.offset);
                break;
        }
 
index f78f07bd520212b688b905fbc66f46b98411f468..f928ca99104328b5c836181d6eeb786696d07c6d 100644 (file)
@@ -203,13 +203,16 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
        for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
                if (k.k->type != desc.key_type &&
                    k.k->type != KEY_TYPE_whiteout)
-                       return false;
+                       break;
 
                if (k.k->type == desc.key_type &&
-                   desc.hash_bkey(info, k) <= start->pos.offset)
-                       return true;
+                   desc.hash_bkey(info, k) <= start->pos.offset) {
+                       bch2_trans_iter_free_on_commit(trans, iter);
+                       return 1;
+               }
        }
-       return btree_iter_err(k);
+
+       return bch2_trans_iter_free(trans, iter);
 }
 
 static __always_inline
@@ -220,6 +223,8 @@ int bch2_hash_set(struct btree_trans *trans,
 {
        struct btree_iter *iter, *slot = NULL;
        struct bkey_s_c k;
+       bool found = false;
+       int ret = 0;
 
        iter = bch2_trans_get_iter(trans, desc.btree_id,
                        POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
@@ -250,21 +255,30 @@ int bch2_hash_set(struct btree_trans *trans,
                        goto not_found;
        }
 
-       return btree_iter_err(k) ?: -ENOSPC;
-not_found:
-       if (flags & BCH_HASH_SET_MUST_REPLACE)
-               return -ENOENT;
+       if (slot)
+               bch2_trans_iter_free(trans, iter);
 
-       insert->k.p = slot->pos;
-       bch2_trans_update(trans, BTREE_INSERT_ENTRY(slot, insert));
-       return 0;
+       return bch2_trans_iter_free(trans, iter) ?: -ENOSPC;
 found:
-       if (flags & BCH_HASH_SET_MUST_CREATE)
-               return -EEXIST;
+       found = true;
+not_found:
 
-       insert->k.p = iter->pos;
-       bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert));
-       return 0;
+       if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
+               ret = -ENOENT;
+       } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
+               ret = -EEXIST;
+       } else {
+               if (!found && slot) {
+                       bch2_trans_iter_free(trans, iter);
+                       iter = slot;
+               }
+
+               insert->k.p = iter->pos;
+               bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert));
+               bch2_trans_iter_free_on_commit(trans, iter);
+       }
+
+       return ret;
 }
 
 static __always_inline
index a6d70ce524e91042ca3d426ca99c97f1f50dc1c2..f97315139c713eb656c0ae900ac87d8f22e6d89f 100644 (file)
@@ -281,7 +281,8 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
 
 static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
            nr_compressed_extents = 0,
@@ -291,7 +292,9 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
        if (!test_bit(BCH_FS_STARTED, &c->flags))
                return -EPERM;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k)
                if (k.k->type == KEY_TYPE_extent) {
                        struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
                        const union bch_extent_entry *entry;
@@ -313,7 +316,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
                                break;
                        }
                }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        return scnprintf(buf, PAGE_SIZE,
                        "uncompressed data:\n"
index c9362af5651184c14deebb9134aa54540d4c9b8f..a7b6fef21aef4effd37de2f636dca4569f26f6ff 100644 (file)
@@ -88,11 +88,14 @@ static void test_delete_written(struct bch_fs *c, u64 nr)
 
 static void test_iterate(struct bch_fs *c, u64 nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 i;
        int ret;
 
+       bch2_trans_init(&trans, c);
+
        delete_test_keys(c);
 
        pr_info("inserting test keys");
@@ -112,28 +115,31 @@ static void test_iterate(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k)
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k)
                BUG_ON(k.k->p.offset != i++);
-       bch2_btree_iter_unlock(&iter);
 
        BUG_ON(i != nr);
 
        pr_info("iterating backwards");
 
-       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k))
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k))
                BUG_ON(k.k->p.offset != --i);
-       bch2_btree_iter_unlock(&iter);
 
        BUG_ON(i);
+
+       bch2_trans_exit(&trans);
 }
 
 static void test_iterate_extents(struct bch_fs *c, u64 nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 i;
        int ret;
 
+       bch2_trans_init(&trans, c);
+
        delete_test_keys(c);
 
        pr_info("inserting test extents");
@@ -154,32 +160,35 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
                BUG_ON(bkey_start_offset(k.k) != i);
                i = k.k->p.offset;
        }
-       bch2_btree_iter_unlock(&iter);
 
        BUG_ON(i != nr);
 
        pr_info("iterating backwards");
 
-       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) {
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) {
                BUG_ON(k.k->p.offset != i);
                i = bkey_start_offset(k.k);
        }
-       bch2_btree_iter_unlock(&iter);
 
        BUG_ON(i);
+
+       bch2_trans_exit(&trans);
 }
 
 static void test_iterate_slots(struct bch_fs *c, u64 nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 i;
        int ret;
 
+       bch2_trans_init(&trans, c);
+
        delete_test_keys(c);
 
        pr_info("inserting test keys");
@@ -199,11 +208,11 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) {
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) {
                BUG_ON(k.k->p.offset != i);
                i += 2;
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_iter_free(&trans, iter);
 
        BUG_ON(i != nr * 2);
 
@@ -211,7 +220,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0),
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0),
                           BTREE_ITER_SLOTS, k) {
                BUG_ON(bkey_deleted(k.k) != (i & 1));
                BUG_ON(k.k->p.offset != i++);
@@ -219,16 +228,20 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
                if (i == nr * 2)
                        break;
        }
-       bch2_btree_iter_unlock(&iter);
+
+       bch2_trans_exit(&trans);
 }
 
 static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 i;
        int ret;
 
+       bch2_trans_init(&trans, c);
+
        delete_test_keys(c);
 
        pr_info("inserting test keys");
@@ -249,12 +262,12 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
                BUG_ON(bkey_start_offset(k.k) != i + 8);
                BUG_ON(k.k->size != 8);
                i += 16;
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_iter_free(&trans, iter);
 
        BUG_ON(i != nr);
 
@@ -262,7 +275,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0),
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0),
                           BTREE_ITER_SLOTS, k) {
                BUG_ON(bkey_deleted(k.k) != !(i % 16));
 
@@ -273,7 +286,8 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
                if (i == nr)
                        break;
        }
-       bch2_btree_iter_unlock(&iter);
+
+       bch2_trans_exit(&trans);
 }
 
 /*
@@ -282,34 +296,40 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
  */
 static void test_peek_end(struct bch_fs *c, u64 nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0);
+       bch2_trans_init(&trans, c);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, 0);
 
-       k = bch2_btree_iter_peek(&iter);
+       k = bch2_btree_iter_peek(iter);
        BUG_ON(k.k);
 
-       k = bch2_btree_iter_peek(&iter);
+       k = bch2_btree_iter_peek(iter);
        BUG_ON(k.k);
 
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 }
 
 static void test_peek_end_extents(struct bch_fs *c, u64 nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0);
+       bch2_trans_init(&trans, c);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
 
-       k = bch2_btree_iter_peek(&iter);
+       k = bch2_btree_iter_peek(iter);
        BUG_ON(k.k);
 
-       k = bch2_btree_iter_peek(&iter);
+       k = bch2_btree_iter_peek(iter);
        BUG_ON(k.k);
 
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 }
 
 /* extent unit tests */
@@ -400,32 +420,35 @@ static void rand_insert(struct bch_fs *c, u64 nr)
 
 static void rand_lookup(struct bch_fs *c, u64 nr)
 {
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
        u64 i;
 
-       for (i = 0; i < nr; i++) {
-               struct btree_iter iter;
-               struct bkey_s_c k;
+       bch2_trans_init(&trans, c);
 
-               bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS,
-                                    POS(0, test_rand()), 0);
+       for (i = 0; i < nr; i++) {
+               iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
+                                          POS(0, test_rand()), 0);
 
-               k = bch2_btree_iter_peek(&iter);
-               bch2_btree_iter_unlock(&iter);
+               k = bch2_btree_iter_peek(iter);
+               bch2_trans_iter_free(&trans, iter);
        }
+
+       bch2_trans_exit(&trans);
 }
 
 static void rand_mixed(struct bch_fs *c, u64 nr)
 {
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
        int ret;
        u64 i;
 
-       for (i = 0; i < nr; i++) {
-               struct btree_trans trans;
-               struct btree_iter *iter;
-               struct bkey_s_c k;
-
-               bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c);
 
+       for (i = 0; i < nr; i++) {
                iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
                                           POS(0, test_rand()), 0);
 
@@ -442,9 +465,10 @@ static void rand_mixed(struct bch_fs *c, u64 nr)
                        BUG_ON(ret);
                }
 
-               bch2_trans_exit(&trans);
+               bch2_trans_iter_free(&trans, iter);
        }
 
+       bch2_trans_exit(&trans);
 }
 
 static void rand_delete(struct bch_fs *c, u64 nr)
@@ -494,12 +518,15 @@ static void seq_insert(struct bch_fs *c, u64 nr)
 
 static void seq_lookup(struct bch_fs *c, u64 nr)
 {
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
 
-       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k)
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k)
                ;
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 }
 
 static void seq_overwrite(struct bch_fs *c, u64 nr)
index b204b53bb9d6a3b970a9ca2156ee4f52088815be..5ba52a3ff0b29719208008d780aa94d7c374f768 100644 (file)
@@ -270,12 +270,16 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
        struct bch_fs *c = dentry->d_sb->s_fs_info;
        struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-       struct btree_iter iter;
+       struct btree_trans trans;
+       struct btree_iter *iter;
        struct bkey_s_c k;
        u64 inum = dentry->d_inode->i_ino;
        ssize_t ret = 0;
 
-       for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), 0, k) {
+       bch2_trans_init(&trans, c);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
+                          POS(inum, 0), 0, k) {
                BUG_ON(k.k->p.inode < inum);
 
                if (k.k->p.inode > inum)
@@ -289,7 +293,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                if (ret < 0)
                        break;
        }
-       bch2_btree_iter_unlock(&iter);
+       bch2_trans_exit(&trans);
 
        if (ret < 0)
                return ret;