]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_iter.c
Update bcachefs sources to 1e3ca87f7b bcachefs: bcachefs_metadata_version_major_minor
[bcachefs-tools-debian] / libbcachefs / btree_iter.c
index 3977bb1fd8345bb8b9c0b970e56f15e3f9fcb853..e292c5a2a895e5db8ce42af10813a95dc8710661 100644 (file)
 #include "recovery.h"
 #include "replicas.h"
 #include "subvolume.h"
+#include "trace.h"
 
-#include <linux/prandom.h>
+#include <linux/random.h>
 #include <linux/prefetch.h>
-#include <trace/events/bcachefs.h>
 
 static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
 static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
@@ -41,13 +41,10 @@ static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_pa
  */
 static inline int bch2_trans_cond_resched(struct btree_trans *trans)
 {
-       if (need_resched() || race_fault()) {
-               bch2_trans_unlock(trans);
-               schedule();
-               return bch2_trans_relock(trans);
-       } else {
+       if (need_resched() || race_fault())
+               return drop_locks_do(trans, (schedule(), 0));
+       else
                return 0;
-       }
 }
 
 static inline int __btree_path_cmp(const struct btree_path *l,
@@ -241,7 +238,7 @@ static void bch2_btree_path_verify(struct btree_trans *trans,
        for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
                if (!path->l[i].b) {
                        BUG_ON(!path->cached &&
-                              c->btree_roots[path->btree_id].b->c.level > i);
+                              bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
                        break;
                }
 
@@ -652,9 +649,8 @@ void bch2_btree_path_level_init(struct btree_trans *trans,
        BUG_ON(path->cached);
 
        EBUG_ON(!btree_path_pos_in_node(path, b));
-       EBUG_ON(b->c.lock.state.seq & 1);
 
-       path->l[b->c.level].lock_seq = b->c.lock.state.seq;
+       path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
        path->l[b->c.level].b = b;
        __btree_path_level_init(path, b->c.level);
 }
@@ -704,8 +700,8 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
 
                        if (t != BTREE_NODE_UNLOCKED) {
                                btree_node_unlock(trans, path, b->c.level);
-                               six_lock_increment(&b->c.lock, t);
-                               mark_btree_node_locked(trans, path, b->c.level, t);
+                               six_lock_increment(&b->c.lock, (enum six_lock_type) t);
+                               mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
                        }
 
                        bch2_btree_path_level_init(trans, path, b);
@@ -736,7 +732,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
                                       unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
-       struct btree *b, **rootp = &c->btree_roots[path->btree_id].b;
+       struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b;
        enum six_lock_type lock_type;
        unsigned i;
        int ret;
@@ -815,7 +811,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
                        break;
 
                bch2_bkey_buf_unpack(&tmp, c, l->b, k);
-               ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
+               ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
                                               path->level - 1);
        }
 
@@ -850,7 +846,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p
                        break;
 
                bch2_bkey_buf_reassemble(&tmp, c, k);
-               ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
+               ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
                                               path->level - 1);
        }
 
@@ -976,6 +972,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
        trans->in_traverse_all = true;
 retry_all:
        trans->restarted = 0;
+       trans->last_restarted_ip = 0;
 
        trans_for_each_path(trans, path)
                path->should_be_locked = false;
@@ -1011,7 +1008,7 @@ retry_all:
                        __btree_path_put(path, false);
 
                        if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-                           ret == -ENOMEM)
+                           bch2_err_matches(ret, ENOMEM))
                                goto retry_all;
                        if (ret)
                                goto err;
@@ -1174,17 +1171,10 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
 
        path->uptodate = BTREE_ITER_UPTODATE;
 out:
-       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) {
-               struct printbuf buf = PRINTBUF;
-
-               prt_printf(&buf, "ret %s (%i) trans->restarted %s (%i)\n",
-                          bch2_err_str(ret), ret,
-                          bch2_err_str(trans->restarted), trans->restarted);
-#ifdef CONFIG_BCACHEFS_DEBUG
-               bch2_prt_backtrace(&buf, &trans->last_restarted);
-#endif
-               panic("%s", buf.buf);
-       }
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
+               panic("ret %s (%i) trans->restarted %s (%i)\n",
+                     bch2_err_str(ret), ret,
+                     bch2_err_str(trans->restarted), trans->restarted);
        bch2_btree_path_verify(trans, path);
        return ret;
 }
@@ -1374,7 +1364,7 @@ void bch2_trans_in_restart_error(struct btree_trans *trans)
 {
        panic("in transaction restart: %s, last restarted by %pS\n",
              bch2_err_str(trans->restarted),
-             (void *) trans->last_begin_ip);
+             (void *) trans->last_restarted_ip);
 }
 
 noinline __cold
@@ -1448,7 +1438,7 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
        prt_newline(out);
 }
 
-noinline __cold
+static noinline __cold
 void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans,
                                bool nosort)
 {
@@ -1468,7 +1458,7 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
        __bch2_trans_paths_to_text(out, trans, false);
 }
 
-noinline __cold
+static noinline __cold
 void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
 {
        struct printbuf buf = PRINTBUF;
@@ -1729,6 +1719,17 @@ err:
        goto out;
 }
 
+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
+{
+       struct btree *b;
+
+       while (b = bch2_btree_iter_peek_node(iter),
+              bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
+               bch2_trans_begin(iter->trans);
+
+       return b;
+}
+
 struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
 {
        struct btree_trans *trans = iter->trans;
@@ -1866,9 +1867,9 @@ static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
                : NULL;
 }
 
-struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
-                                      struct btree_iter *iter,
-                                      struct bpos end_pos)
+static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
+                                             struct btree_iter *iter,
+                                             struct bpos end_pos)
 {
        struct bkey_i *k;
 
@@ -2574,6 +2575,18 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
        return bch2_btree_iter_peek_slot(iter);
 }
 
+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
+{
+       struct bkey_s_c k;
+
+       while (btree_trans_too_many_iters(iter->trans) ||
+              (k = bch2_btree_iter_peek_type(iter, iter->flags),
+               bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
+               bch2_trans_begin(iter->trans);
+
+       return k;
+}
+
 /* new transactional stuff: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -2705,12 +2718,12 @@ static inline void btree_path_list_add(struct btree_trans *trans,
 
 void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
 {
-       if (iter->path)
-               bch2_path_put(trans, iter->path,
-                             iter->flags & BTREE_ITER_INTENT);
        if (iter->update_path)
                bch2_path_put_nokeep(trans, iter->update_path,
                              iter->flags & BTREE_ITER_INTENT);
+       if (iter->path)
+               bch2_path_put(trans, iter->path,
+                             iter->flags & BTREE_ITER_INTENT);
        if (iter->key_cache_path)
                bch2_path_put(trans, iter->key_cache_path,
                              iter->flags & BTREE_ITER_INTENT);
@@ -2777,6 +2790,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        unsigned new_top = trans->mem_top + size;
        size_t old_bytes = trans->mem_bytes;
        size_t new_bytes = roundup_pow_of_two(new_top);
+       int ret;
        void *new_mem;
        void *p;
 
@@ -2784,15 +2798,27 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
 
        WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
 
-       new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
-       if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
-               new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
-               new_bytes = BTREE_TRANS_MEM_MAX;
-               kfree(trans->mem);
-       }
+       new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+       if (unlikely(!new_mem)) {
+               bch2_trans_unlock(trans);
+
+               new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
+               if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
+                       new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
+                       new_bytes = BTREE_TRANS_MEM_MAX;
+                       kfree(trans->mem);
+               }
+
+               if (!new_mem)
+                       return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
 
-       if (!new_mem)
-               return ERR_PTR(-ENOMEM);
+               trans->mem = new_mem;
+               trans->mem_bytes = new_bytes;
+
+               ret = bch2_trans_relock(trans);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
 
        trans->mem = new_mem;
        trans->mem_bytes = new_bytes;
@@ -2833,6 +2859,7 @@ static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
 u32 bch2_trans_begin(struct btree_trans *trans)
 {
        struct btree_path *path;
+       u64 now;
 
        bch2_trans_reset_updates(trans);
 
@@ -2861,13 +2888,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
                        path->preserve = false;
        }
 
+       now = local_clock();
        if (!trans->restarted &&
            (need_resched() ||
-            local_clock() - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
-               bch2_trans_unlock(trans);
-               cond_resched();
-               bch2_trans_relock(trans);
+            now - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
+               drop_locks_do(trans, (cond_resched(), 0));
+               now = local_clock();
        }
+       trans->last_begin_time = now;
 
        if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
                bch2_trans_reset_srcu_lock(trans);
@@ -2878,7 +2906,6 @@ u32 bch2_trans_begin(struct btree_trans *trans)
                trans->notrace_relock_fail = false;
        }
 
-       trans->last_begin_time = local_clock();
        return trans->restart_count;
 }
 
@@ -2895,6 +2922,10 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
 #endif
        if (!p)
                p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
+       /*
+        * paths need to be zeroed, bch2_check_for_deadlock looks at paths in
+        * other threads
+        */
 
        trans->paths            = p; p += paths_bytes;
        trans->updates          = p; p += updates_bytes;
@@ -2922,7 +2953,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
 {
        struct btree_transaction_stats *s;
 
-       BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+       bch2_assert_btree_nodes_not_locked();
 
        memset(trans, 0, sizeof(*trans));
        trans->c                = c;
@@ -2962,8 +2993,17 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
                struct btree_trans *pos;
 
-               mutex_lock(&c->btree_trans_lock);
+               seqmutex_lock(&c->btree_trans_lock);
                list_for_each_entry(pos, &c->btree_trans_list, list) {
+                       /*
+                        * We'd much prefer to be stricter here and completely
+                        * disallow multiple btree_trans in the same thread -
+                        * but the data move path calls bch2_write when we
+                        * already have a btree_trans initialized.
+                        */
+                       BUG_ON(trans->locking_wait.task->pid == pos->locking_wait.task->pid &&
+                              bch2_trans_locked(pos));
+
                        if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) {
                                list_add_tail(&trans->list, &pos->list);
                                goto list_add_done;
@@ -2971,7 +3011,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
                }
                list_add_tail(&trans->list, &c->btree_trans_list);
 list_add_done:
-               mutex_unlock(&c->btree_trans_lock);
+               seqmutex_unlock(&c->btree_trans_lock);
        }
 }
 
@@ -3006,6 +3046,12 @@ void bch2_trans_exit(struct btree_trans *trans)
 
        bch2_trans_unlock(trans);
 
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
+               seqmutex_lock(&c->btree_trans_lock);
+               list_del(&trans->list);
+               seqmutex_unlock(&c->btree_trans_lock);
+       }
+
        closure_sync(&trans->ref);
 
        if (s)
@@ -3017,12 +3063,6 @@ void bch2_trans_exit(struct btree_trans *trans)
 
        check_btree_paths_leaked(trans);
 
-       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
-               mutex_lock(&c->btree_trans_lock);
-               list_del(&trans->list);
-               mutex_unlock(&c->btree_trans_lock);
-       }
-
        srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
 
        bch2_journal_preres_put(&c->journal, &trans->journal_preres);
@@ -3053,10 +3093,6 @@ void bch2_trans_exit(struct btree_trans *trans)
        if (trans->paths)
                mempool_free(trans->paths, &c->btree_paths_pool);
 
-#ifdef CONFIG_BCACHEFS_DEBUG
-       darray_exit(&trans->last_restarted);
-#endif
-
        trans->mem      = (void *) 0x1;
        trans->paths    = (void *) 0x1;
 }
@@ -3089,7 +3125,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
        struct btree_path *path;
        struct btree_bkey_cached_common *b;
        static char lock_types[] = { 'r', 'i', 'w' };
-       unsigned l;
+       unsigned l, idx;
 
        if (!out->nr_tabstops) {
                printbuf_tabstop_push(out, 16);
@@ -3098,7 +3134,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
 
        prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn);
 
-       trans_for_each_path(trans, path) {
+       trans_for_each_path_safe(trans, path, idx) {
                if (!path->nodes_locked)
                        continue;
 
@@ -3164,7 +3200,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
        }
 
        INIT_LIST_HEAD(&c->btree_trans_list);
-       mutex_init(&c->btree_trans_lock);
+       seqmutex_init(&c->btree_trans_lock);
 
        ret   = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
                        sizeof(struct btree_path) * nr +