]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_iter.c
Update bcachefs sources to b964c6cba8 bcachefs: Change lockrestart_do() to always...
[bcachefs-tools-debian] / libbcachefs / btree_iter.c
index c747398108d6a189597c951c7bfb6dcb9d56ebb6..3af00e24a84c14856add9a336534e251c73b68a9 100644 (file)
@@ -189,6 +189,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade,
                        (upgrade
                         ? trace_node_upgrade_fail
                         : trace_node_relock_fail)(iter->trans->ip, trace_ip,
+                                       btree_iter_type(iter) == BTREE_ITER_CACHED,
                                        iter->btree_id, &iter->real_pos,
                                        l, iter->l[l].lock_seq,
                                        is_btree_node(iter, l)
@@ -197,7 +198,6 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade,
                                        is_btree_node(iter, l)
                                        ? iter->l[l].b->c.lock.state.seq
                                        : 0);
-
                        fail_idx = l;
                        btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
                }
@@ -309,7 +309,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
        }
 
        if (unlikely(deadlock_iter)) {
-               trace_trans_restart_would_deadlock(iter->trans->ip, ip,
+               trace_trans_restart_would_deadlock(trans->ip, ip,
                                trans->in_traverse_all, reason,
                                deadlock_iter->btree_id,
                                btree_iter_type(deadlock_iter),
@@ -317,6 +317,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                                iter->btree_id,
                                btree_iter_type(iter),
                                &pos);
+               btree_trans_restart(trans);
                return false;
        }
 
@@ -375,10 +376,44 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans)
 static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
 #endif
 
+/*
+ * Only for btree_cache.c - only relocks intent locks
+ */
+bool bch2_btree_iter_relock_intent(struct btree_iter *iter)
+{
+       unsigned l;
+
+       for (l = iter->level;
+            l < iter->locks_want && btree_iter_node(iter, l);
+            l++) {
+               if (!bch2_btree_node_relock(iter, l)) {
+                       trace_node_relock_fail(iter->trans->ip, _RET_IP_,
+                                       btree_iter_type(iter) == BTREE_ITER_CACHED,
+                                       iter->btree_id, &iter->real_pos,
+                                       l, iter->l[l].lock_seq,
+                                       is_btree_node(iter, l)
+                                       ? 0
+                                       : (unsigned long) iter->l[l].b,
+                                       is_btree_node(iter, l)
+                                       ? iter->l[l].b->c.lock.state.seq
+                                       : 0);
+                       btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+                       btree_trans_restart(iter->trans);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
 __flatten
 bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip)
 {
-       return btree_iter_get_locks(iter, false, trace_ip);
+       bool ret = btree_iter_get_locks(iter, false, trace_ip);
+
+       if (!ret)
+               btree_trans_restart(iter->trans);
+       return ret;
 }
 
 bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
@@ -421,6 +456,8 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
                        btree_iter_get_locks(linked, true, _THIS_IP_);
                }
 
+       if (iter->should_be_locked)
+               btree_trans_restart(iter->trans);
        return false;
 }
 
@@ -459,8 +496,7 @@ void bch2_trans_downgrade(struct btree_trans *trans)
 
 /* Btree transaction locking: */
 
-static inline bool btree_iter_should_be_locked(struct btree_trans *trans,
-                                              struct btree_iter *iter)
+static inline bool btree_iter_should_be_locked(struct btree_iter *iter)
 {
        return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
                iter->should_be_locked;
@@ -470,11 +506,15 @@ bool bch2_trans_relock(struct btree_trans *trans)
 {
        struct btree_iter *iter;
 
+       if (unlikely(trans->restarted))
+               return false;
+
        trans_for_each_iter(trans, iter)
-               if (!bch2_btree_iter_relock(iter, _RET_IP_) &&
-                   btree_iter_should_be_locked(trans, iter)) {
+               if (btree_iter_should_be_locked(iter) &&
+                   !bch2_btree_iter_relock(iter, _RET_IP_)) {
                        trace_trans_restart_relock(trans->ip, _RET_IP_,
                                        iter->btree_id, &iter->real_pos);
+                       BUG_ON(!trans->restarted);
                        return false;
                }
        return true;
@@ -1055,11 +1095,12 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
        return b == *rootp ? 0 : -1;
 }
 
-static inline int btree_iter_lock_root(struct btree_iter *iter,
+static inline int btree_iter_lock_root(struct btree_trans *trans,
+                                      struct btree_iter *iter,
                                       unsigned depth_want,
                                       unsigned long trace_ip)
 {
-       struct bch_fs *c = iter->trans->c;
+       struct bch_fs *c = trans->c;
        struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
        enum six_lock_type lock_type;
        unsigned i;
@@ -1087,8 +1128,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
                if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level,
                                              iter, lock_type,
                                              lock_root_check_fn, rootp,
-                                             trace_ip)))
-                       return -EINTR;
+                                             trace_ip))) {
+                       if (trans->restarted)
+                               return -EINTR;
+                       continue;
+               }
 
                if (likely(b == READ_ONCE(*rootp) &&
                           b->c.level == iter->level &&
@@ -1109,7 +1153,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
 }
 
 noinline
-static void btree_iter_prefetch(struct btree_iter *iter)
+static int btree_iter_prefetch(struct btree_iter *iter)
 {
        struct bch_fs *c = iter->trans->c;
        struct btree_iter_level *l = &iter->l[iter->level];
@@ -1120,10 +1164,11 @@ static void btree_iter_prefetch(struct btree_iter *iter)
                ? (iter->level > 1 ? 0 :  2)
                : (iter->level > 1 ? 1 : 16);
        bool was_locked = btree_node_locked(iter, iter->level);
+       int ret = 0;
 
        bch2_bkey_buf_init(&tmp);
 
-       while (nr) {
+       while (nr && !ret) {
                if (!bch2_btree_node_relock(iter, iter->level))
                        break;
 
@@ -1133,14 +1178,15 @@ static void btree_iter_prefetch(struct btree_iter *iter)
                        break;
 
                bch2_bkey_buf_unpack(&tmp, c, l->b, k);
-               bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
-                                        iter->level - 1);
+               ret = bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
+                                              iter->level - 1);
        }
 
        if (!was_locked)
                btree_node_unlock(iter, iter->level);
 
        bch2_bkey_buf_exit(&tmp, c);
+       return ret;
 }
 
 static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
@@ -1164,10 +1210,11 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
                btree_node_unlock(iter, plevel);
 }
 
-static __always_inline int btree_iter_down(struct btree_iter *iter,
+static __always_inline int btree_iter_down(struct btree_trans *trans,
+                                          struct btree_iter *iter,
                                           unsigned long trace_ip)
 {
-       struct bch_fs *c = iter->trans->c;
+       struct bch_fs *c = trans->c;
        struct btree_iter_level *l = &iter->l[iter->level];
        struct btree *b;
        unsigned level = iter->level - 1;
@@ -1181,7 +1228,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
        bch2_bkey_buf_unpack(&tmp, c, l->b,
                         bch2_btree_node_iter_peek(&l->iter, l->b));
 
-       b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip);
+       b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip);
        ret = PTR_ERR_OR_ZERO(b);
        if (unlikely(ret))
                goto err;
@@ -1194,7 +1241,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
                btree_node_mem_ptr_set(iter, level + 1, b);
 
        if (iter->flags & BTREE_ITER_PREFETCH)
-               btree_iter_prefetch(iter);
+               ret = btree_iter_prefetch(iter);
 
        if (btree_node_read_locked(iter, level + 1))
                btree_node_unlock(iter, level + 1);
@@ -1215,25 +1262,19 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
        struct btree_iter *iter;
        u8 sorted[BTREE_ITER_MAX];
        int i, nr_sorted = 0;
-       bool relock_fail;
 
        if (trans->in_traverse_all)
                return -EINTR;
 
        trans->in_traverse_all = true;
 retry_all:
+       trans->restarted = false;
+
        nr_sorted = 0;
-       relock_fail = false;
 
        trans_for_each_iter(trans, iter) {
-               if (!bch2_btree_iter_relock(iter, _THIS_IP_))
-                       relock_fail = true;
                sorted[nr_sorted++] = iter->idx;
-       }
-
-       if (!relock_fail) {
-               trans->in_traverse_all = false;
-               return 0;
+               iter->should_be_locked = false;
        }
 
 #define btree_iter_cmp_by_idx(_l, _r)                          \
@@ -1289,15 +1330,6 @@ retry_all:
                if (ret)
                        goto retry_all;
        }
-
-       if (hweight64(trans->iters_live) > 1)
-               ret = -EINTR;
-       else
-               trans_for_each_iter(trans, iter)
-                       if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
-                               ret = -EINTR;
-                               break;
-                       }
 out:
        bch2_btree_cache_cannibalize_unlock(c);
 
@@ -1307,7 +1339,7 @@ out:
        return ret;
 }
 
-int bch2_btree_iter_traverse_all(struct btree_trans *trans)
+static int bch2_btree_iter_traverse_all(struct btree_trans *trans)
 {
        return __btree_iter_traverse_all(trans, 0, _RET_IP_);
 }
@@ -1353,9 +1385,19 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
 static int btree_iter_traverse_one(struct btree_iter *iter,
                                   unsigned long trace_ip)
 {
+       struct btree_trans *trans = iter->trans;
        unsigned l, depth_want = iter->level;
        int ret = 0;
 
+       /*
+        * Ensure we obey iter->should_be_locked: if it's set, we can't unlock
+        * and re-traverse the iterator without a transaction restart:
+        */
+       if (iter->should_be_locked) {
+               ret = bch2_btree_iter_relock(iter, trace_ip) ? 0 : -EINTR;
+               goto out;
+       }
+
        if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
                ret = bch2_btree_iter_traverse_cached(iter);
                goto out;
@@ -1385,8 +1427,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
         */
        while (iter->level > depth_want) {
                ret = btree_iter_node(iter, iter->level)
-                       ? btree_iter_down(iter, trace_ip)
-                       : btree_iter_lock_root(iter, depth_want, trace_ip);
+                       ? btree_iter_down(trans, iter, trace_ip)
+                       : btree_iter_lock_root(trans, iter, depth_want, trace_ip);
                if (unlikely(ret)) {
                        if (ret == 1) {
                                /*
@@ -1414,7 +1456,9 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
 
        iter->uptodate = BTREE_ITER_NEED_PEEK;
 out:
-       trace_iter_traverse(iter->trans->ip, trace_ip,
+       BUG_ON((ret == -EINTR) != !!trans->restarted);
+       trace_iter_traverse(trans->ip, trace_ip,
+                           btree_iter_type(iter) == BTREE_ITER_CACHED,
                            iter->btree_id, &iter->real_pos, ret);
        bch2_btree_iter_verify(iter);
        return ret;
@@ -1427,8 +1471,10 @@ static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
 
        ret =   bch2_trans_cond_resched(trans) ?:
                btree_iter_traverse_one(iter, _RET_IP_);
-       if (unlikely(ret))
+       if (unlikely(ret) && hweight64(trans->iters_linked) == 1) {
                ret = __btree_iter_traverse_all(trans, ret, _RET_IP_);
+               BUG_ON(ret == -EINTR);
+       }
 
        return ret;
 }
@@ -1559,6 +1605,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
        int cmp = bpos_cmp(new_pos, iter->real_pos);
        unsigned l = iter->level;
 
+       EBUG_ON(iter->trans->restarted);
+
        if (!cmp)
                goto out;
 
@@ -2118,6 +2166,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
        struct btree_iter *iter, *best = NULL;
        struct bpos real_pos, pos_min = POS_MIN;
 
+       EBUG_ON(trans->restarted);
+
        if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
            btree_node_type_is_extents(btree_id) &&
            !(flags & BTREE_ITER_NOT_EXTENTS) &&
@@ -2282,6 +2332,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
 
                if (old_bytes) {
                        trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes);
+                       btree_trans_restart(trans);
                        return ERR_PTR(-EINTR);
                }
        }
@@ -2307,33 +2358,26 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans)
 }
 
 /**
- * bch2_trans_reset() - reset a transaction after a interrupted attempt
+ * bch2_trans_begin() - reset a transaction after a interrupted attempt
  * @trans: transaction to reset
- * @flags: transaction reset flags.
  *
  * While iterating over nodes or updating nodes a attempt to lock a btree
  * node may return EINTR when the trylock fails. When this occurs
- * bch2_trans_reset() or bch2_trans_begin() should be called and the
- * transaction retried.
- *
- * Transaction reset flags include:
- *
- *  - TRANS_RESET_NOUNLOCK   - Do not attempt to unlock and reschedule the
- *                            transaction.
- *  - TRANS_RESET_NOTRAVERSE - Do not traverse all linked iters.
+ * bch2_trans_begin() should be called and the transaction retried.
  */
-void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
+void bch2_trans_begin(struct btree_trans *trans)
 {
        struct btree_iter *iter;
 
-       trans_for_each_iter(trans, iter) {
+       trans_for_each_iter(trans, iter)
                iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
                                 BTREE_ITER_SET_POS_AFTER_COMMIT);
-               iter->should_be_locked = false;
-       }
 
+       /*
+        * XXX: we shouldn't be doing this if the transaction was restarted, but
+        * currently we still overflow transaction iterators if we do that
+        * */
        bch2_trans_unlink_iters(trans);
-
        trans->iters_touched &= trans->iters_live;
 
        trans->extra_journal_res        = 0;
@@ -2351,12 +2395,12 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
                       (void *) &trans->fs_usage_deltas->memset_start);
        }
 
-       if (!(flags & TRANS_RESET_NOUNLOCK))
-               bch2_trans_cond_resched(trans);
+       bch2_trans_cond_resched(trans);
 
-       if (!(flags & TRANS_RESET_NOTRAVERSE) &&
-           trans->iters_linked)
+       if (trans->restarted)
                bch2_btree_iter_traverse_all(trans);
+
+       trans->restarted = false;
 }
 
 static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)