]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_iter.c
Update bcachefs sources to 9a555a741e80 bcachefs: omit alignment attribute on big...
[bcachefs-tools-debian] / libbcachefs / btree_iter.c
index 47d203cdb7b22a143d073054b6e92560102e24e8..20b32c71b20af93626fe42da5b202751c237a979 100644 (file)
@@ -891,13 +891,14 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
        struct bkey_s_c k;
        int ret = 0;
 
-       __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
+       __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
 
        k = bch2_btree_and_journal_iter_peek(&jiter);
 
        bch2_bkey_buf_reassemble(out, c, k);
 
-       if (flags & BTREE_ITER_PREFETCH)
+       if ((flags & BTREE_ITER_PREFETCH) &&
+           c->opts.btree_node_prefetch)
                ret = btree_path_prefetch_j(trans, path, &jiter);
 
        bch2_btree_and_journal_iter_exit(&jiter);
@@ -929,7 +930,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
                bch2_bkey_buf_unpack(&tmp, c, l->b,
                                 bch2_btree_node_iter_peek(&l->iter, l->b));
 
-               if (flags & BTREE_ITER_PREFETCH) {
+               if ((flags & BTREE_ITER_PREFETCH) &&
+                   c->opts.btree_node_prefetch) {
                        ret = btree_path_prefetch(trans, path);
                        if (ret)
                                goto err;
@@ -1144,7 +1146,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
        path = &trans->paths[path_idx];
 
        if (unlikely(path->level >= BTREE_MAX_DEPTH))
-               goto out;
+               goto out_uptodate;
 
        path->level = btree_path_up_until_good_node(trans, path, 0);
 
@@ -1177,7 +1179,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
                        goto out;
                }
        }
-
+out_uptodate:
        path->uptodate = BTREE_ITER_UPTODATE;
 out:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
@@ -1209,7 +1211,6 @@ static btree_path_idx_t btree_path_clone(struct btree_trans *trans, btree_path_i
                                         bool intent)
 {
        btree_path_idx_t new = btree_path_alloc(trans, src);
-
        btree_path_copy(trans, trans->paths + new, trans->paths + src);
        __btree_path_get(trans->paths + new, intent);
        return new;
@@ -1336,7 +1337,7 @@ void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool in
 
        if (path->should_be_locked &&
            !trans->restarted &&
-           (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
+           (!dup || !bch2_btree_path_relock_norestart(trans, dup)))
                return;
 
        if (dup) {
@@ -1477,9 +1478,6 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
        struct printbuf buf = PRINTBUF;
        size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths);
 
-       if (!s)
-               return;
-
        bch2_trans_paths_to_text(&buf, trans);
 
        if (!buf.allocation_failure) {
@@ -1512,42 +1510,50 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *trans)
        return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
 }
 
+static noinline void btree_path_overflow(struct btree_trans *trans)
+{
+       bch2_dump_trans_paths_updates(trans);
+       bch_err(trans->c, "trans path overflow");
+}
+
 static noinline void btree_paths_realloc(struct btree_trans *trans)
 {
        unsigned nr = trans->nr_paths * 2;
 
-       void *p = kzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) +
-                         nr + 8 +
+       void *p = kvzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) +
                          sizeof(struct btree_trans_paths) +
                          nr * sizeof(struct btree_path) +
+                         nr * sizeof(btree_path_idx_t) + 8 +
                          nr * sizeof(struct btree_insert_entry), GFP_KERNEL|__GFP_NOFAIL);
 
        unsigned long *paths_allocated = p;
+       memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long));
        p += BITS_TO_LONGS(nr) * sizeof(unsigned long);
+
+       p += sizeof(struct btree_trans_paths);
        struct btree_path *paths = p;
+       *trans_paths_nr(paths) = nr;
+       memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path));
        p += nr * sizeof(struct btree_path);
-       u8 *sorted = p;
-       p += nr + 8;
-       struct btree_insert_entry *updates = p;
 
-       *trans_paths_nr(paths) = nr;
+       btree_path_idx_t *sorted = p;
+       memcpy(sorted, trans->sorted, trans->nr_sorted * sizeof(btree_path_idx_t));
+       p += nr * sizeof(btree_path_idx_t) + 8;
 
-       memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long));
-       memcpy(sorted, trans->sorted, trans->nr_sorted);
-       memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path));
-       memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_path));
+       struct btree_insert_entry *updates = p;
+       memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_insert_entry));
 
        unsigned long *old = trans->paths_allocated;
 
        rcu_assign_pointer(trans->paths_allocated,      paths_allocated);
-       rcu_assign_pointer(trans->sorted,               sorted);
        rcu_assign_pointer(trans->paths,                paths);
+       rcu_assign_pointer(trans->sorted,               sorted);
        rcu_assign_pointer(trans->updates,              updates);
 
        trans->nr_paths         = nr;
 
        if (old != trans->_paths_allocated)
-               kfree_rcu_mightsleep(trans->paths_allocated);
+               kfree_rcu_mightsleep(old);
 }
 
 static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans,
@@ -1555,8 +1561,14 @@ static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans,
 {
        btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths);
 
-       if (unlikely(idx == trans->nr_paths))
+       if (unlikely(idx == trans->nr_paths)) {
+               if (trans->nr_paths == BTREE_ITER_MAX) {
+                       btree_path_overflow(trans);
+                       return 0;
+               }
+
                btree_paths_realloc(trans);
+       }
 
        /*
         * Do this before marking the new path as allocated, since it won't be
@@ -2135,18 +2147,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
                        goto out_no_locked;
 
                /*
-                * iter->pos should be mononotically increasing, and always be
-                * equal to the key we just returned - except extents can
-                * straddle iter->pos:
+                * We need to check against @end before FILTER_SNAPSHOTS because
+                * if we get to a different inode that requested we might be
+                * seeing keys for a different snapshot tree that will all be
+                * filtered out.
+                *
+                * But we can't do the full check here, because bkey_start_pos()
+                * isn't monotonically increasing before FILTER_SNAPSHOTS, and
+                * that's what we check against in extents mode:
                 */
-               if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
-                       iter_pos = k.k->p;
-               else
-                       iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
-
-               if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
-                            ? bkey_gt(iter_pos, end)
-                            : bkey_ge(iter_pos, end)))
+               if (k.k->p.inode > end.inode)
                        goto end;
 
                if (iter->update_path &&
@@ -2205,6 +2215,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
                        continue;
                }
 
+               /*
+                * iter->pos should be mononotically increasing, and always be
+                * equal to the key we just returned - except extents can
+                * straddle iter->pos:
+                */
+               if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
+                       iter_pos = k.k->p;
+               else
+                       iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
+
+               if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
+                            ? bkey_gt(iter_pos, end)
+                            : bkey_ge(iter_pos, end)))
+                       goto end;
+
                break;
        }
 
@@ -2278,7 +2303,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
                btree_iter_path(trans, iter)->level);
 
        if (iter->flags & BTREE_ITER_WITH_JOURNAL)
-               return bkey_s_c_err(-EIO);
+               return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
 
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
@@ -2476,6 +2501,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                        k = bch2_btree_iter_peek_upto(&iter2, end);
 
                        if (k.k && !bkey_err(k)) {
+                               swap(iter->key_cache_path, iter2.key_cache_path);
                                iter->k = iter2.k;
                                k.k = &iter->k;
                        }
@@ -2640,21 +2666,18 @@ out:
 static inline void btree_path_list_remove(struct btree_trans *trans,
                                          struct btree_path *path)
 {
-       unsigned i;
-
        EBUG_ON(path->sorted_idx >= trans->nr_sorted);
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
        trans->nr_sorted--;
        memmove_u64s_down_small(trans->sorted + path->sorted_idx,
                                trans->sorted + path->sorted_idx + 1,
-                               DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
+                               DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
+                                            sizeof(u64) / sizeof(btree_path_idx_t)));
 #else
        array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
 #endif
-       for (i = path->sorted_idx; i < trans->nr_sorted; i++)
+       for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
                trans->paths[trans->sorted[i]].sorted_idx = i;
-
-       path->sorted_idx = U8_MAX;
 }
 
 static inline void btree_path_list_add(struct btree_trans *trans,
@@ -2662,21 +2685,21 @@ static inline void btree_path_list_add(struct btree_trans *trans,
                                       btree_path_idx_t path_idx)
 {
        struct btree_path *path = trans->paths + path_idx;
-       unsigned i;
 
        path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted;
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
        memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
                              trans->sorted + path->sorted_idx,
-                             DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
+                             DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
+                                          sizeof(u64) / sizeof(btree_path_idx_t)));
        trans->nr_sorted++;
        trans->sorted[path->sorted_idx] = path_idx;
 #else
        array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx);
 #endif
 
-       for (i = path->sorted_idx; i < trans->nr_sorted; i++)
+       for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
                trans->paths[trans->sorted[i]].sorted_idx = i;
 
        btree_trans_verify_sorted_refs(trans);
@@ -2738,6 +2761,9 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
        struct btree_trans *trans = src->trans;
 
        *dst = *src;
+#ifdef TRACK_PATH_ALLOCATED
+       dst->ip_allocated = _RET_IP_;
+#endif
        if (src->path)
                __btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
        if (src->update_path)
@@ -2758,8 +2784,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
 
        struct btree_transaction_stats *s = btree_trans_stats(trans);
-       if (s)
-               s->max_mem = max(s->max_mem, new_bytes);
+       s->max_mem = max(s->max_mem, new_bytes);
 
        new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
        if (unlikely(!new_mem)) {
@@ -2875,9 +2900,15 @@ u32 bch2_trans_begin(struct btree_trans *trans)
        }
 
        now = local_clock();
+
+       if (!IS_ENABLED(CONFIG_BCACHEFS_NO_LATENCY_ACCT) &&
+           time_after64(now, trans->last_begin_time + 10))
+               __time_stats_update(&btree_trans_stats(trans)->duration,
+                                        trans->last_begin_time, now);
+
        if (!trans->restarted &&
            (need_resched() ||
-            now - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
+            time_after64(now, trans->last_begin_time + BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS))) {
                drop_locks_do(trans, (cond_resched(), 0));
                now = local_clock();
        }
@@ -2896,13 +2927,11 @@ u32 bch2_trans_begin(struct btree_trans *trans)
        return trans->restart_count;
 }
 
-const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
+const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR] = { "(unknown)" };
 
 unsigned bch2_trans_get_fn_idx(const char *fn)
 {
-       unsigned i;
-
-       for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
+       for (unsigned i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
                if (!bch2_btree_transaction_fns[i] ||
                    bch2_btree_transaction_fns[i] == fn) {
                        bch2_btree_transaction_fns[i] = fn;
@@ -2910,7 +2939,7 @@ unsigned bch2_trans_get_fn_idx(const char *fn)
                }
 
        pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
-       return i;
+       return 0;
 }
 
 struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
@@ -2972,7 +3001,7 @@ got_trans:
        trans->paths            = trans->_paths;
        trans->updates          = trans->_updates;
 
-       *trans_paths_nr(trans->paths) = BTREE_ITER_MAX;
+       *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL;
 
        trans->paths_allocated[0] = 1;
 
@@ -3058,7 +3087,7 @@ void bch2_trans_put(struct btree_trans *trans)
        trans->paths            = NULL;
 
        if (paths_allocated != trans->_paths_allocated)
-               kfree_rcu_mightsleep(paths_allocated);
+               kvfree_rcu_mightsleep(paths_allocated);
 
        if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
                mempool_free(trans->mem, &c->btree_trans_mem_pool);
@@ -3199,7 +3228,7 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
             s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
             s++) {
                kfree(s->max_paths_text);
-               bch2_time_stats_exit(&s->lock_hold_times);
+               time_stats_exit(&s->lock_hold_times);
        }
 
        if (c->btree_trans_barrier_initialized)
@@ -3215,7 +3244,8 @@ void bch2_fs_btree_iter_init_early(struct bch_fs *c)
        for (s = c->btree_transaction_stats;
             s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
             s++) {
-               bch2_time_stats_init(&s->lock_hold_times);
+               time_stats_init(&s->duration);
+               time_stats_init(&s->lock_hold_times);
                mutex_init(&s->lock);
        }