]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to e3a7cee503 bcachefs: Don't mark superblocks past end of...
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 7 Jun 2021 17:30:40 +0000 (13:30 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Wed, 9 Jun 2021 02:55:14 +0000 (22:55 -0400)
27 files changed:
.bcachefs_revision
include/trace/events/bcachefs.h
libbcachefs/acl.c
libbcachefs/alloc_background.c
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/ec.c
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/replicas.c
libbcachefs/str_hash.h
libbcachefs/super.c
libbcachefs/tests.c
libbcachefs/xattr.c
linux/six.c

index 93876ae2f84bf15ced7a00a878e9d6fd63adc2d9..45b79dea3e133ad4d465453a05141cab8bae81ac 100644 (file)
@@ -1 +1 @@
-3913e0cac34e0993ab6dde67a2dec1ea485a2e28
+e3a7cee5034f0f218f593a0a970e8ccd8bf99565
index 7c90ba01510a756d0a0744d78492ccb515b4174a..b5fcda9e65d8f41710b1054b52370e91aa7206a0 100644 (file)
@@ -541,59 +541,66 @@ TRACE_EVENT(copygc_wait,
 );
 
 TRACE_EVENT(trans_get_iter,
-       TP_PROTO(unsigned long caller, unsigned long ip,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
                 enum btree_id btree_id,
-                struct bpos *pos_want,
-                unsigned locks_want,
-                struct bpos *pos_found,
-                unsigned locks_found,
-                unsigned uptodate),
-       TP_ARGS(caller, ip, btree_id,
-               pos_want, locks_want,
-               pos_found, locks_found,
-               uptodate),
+                struct bpos *got_pos,
+                unsigned got_locks,
+                unsigned got_uptodate,
+                struct bpos *src_pos,
+                unsigned src_locks,
+                unsigned src_uptodate),
+       TP_ARGS(trans_ip, caller_ip, btree_id,
+               got_pos, got_locks, got_uptodate,
+               src_pos, src_locks, src_uptodate),
 
        TP_STRUCT__entry(
-               __field(unsigned long,  caller                  )
-               __field(unsigned long,  ip                      )
-               __field(u8,             btree_id                )
-               __field(u8,             uptodate                )
-               __field(u8,             locks_want              )
-               __field(u8,             locks_found             )
-               __field(u64,            pos_want_inode          )
-               __field(u64,            pos_want_offset         )
-               __field(u32,            pos_want_snapshot       )
-               __field(u64,            pos_found_inode         )
-               __field(u64,            pos_found_offset        )
-               __field(u32,            pos_found_snapshot      )
+               __field(unsigned long,          trans_ip                )
+               __field(unsigned long,          caller_ip               )
+               __field(u8,                     btree_id                )
+               __field(u64,                    got_pos_inode           )
+               __field(u64,                    got_pos_offset          )
+               __field(u32,                    got_pos_snapshot        )
+               __field(u8,                     got_locks               )
+               __field(u8,                     got_uptodate            )
+               __field(u64,                    src_pos_inode           )
+               __field(u64,                    src_pos_offset          )
+               __field(u32,                    src_pos_snapshot        )
+               __field(u8,                     src_locks               )
+               __field(u8,                     src_uptodate            )
        ),
 
        TP_fast_assign(
-               __entry->caller                 = caller;
-               __entry->ip                     = ip;
+               __entry->trans_ip               = trans_ip;
+               __entry->caller_ip              = caller_ip;
                __entry->btree_id               = btree_id;
-               __entry->uptodate               = uptodate;
-               __entry->pos_want_inode         = pos_want->inode;
-               __entry->pos_want_offset        = pos_want->offset;
-               __entry->pos_want_snapshot      = pos_want->snapshot;
-               __entry->pos_found_inode        = pos_found->inode;
-               __entry->pos_found_offset       = pos_found->offset;
-               __entry->pos_found_snapshot     = pos_found->snapshot;
-       ),
-
-       TP_printk("%ps %pS btree %u uptodate %u want %llu:%llu:%u locks %u found %llu:%llu:%u locks %u",
-                 (void *) __entry->caller,
-                 (void *) __entry->ip,
+               __entry->got_pos_inode          = got_pos->inode;
+               __entry->got_pos_offset         = got_pos->offset;
+               __entry->got_pos_snapshot       = got_pos->snapshot;
+               __entry->got_locks              = got_locks;
+               __entry->got_uptodate           = got_uptodate;
+               __entry->src_pos_inode          = src_pos->inode;
+               __entry->src_pos_offset         = src_pos->offset;
+               __entry->src_pos_snapshot       = src_pos->snapshot;
+               __entry->src_locks              = src_locks;
+               __entry->src_uptodate           = src_uptodate;
+       ),
+
+       TP_printk("%ps %pS btree %u got %llu:%llu:%u l %u u %u "
+                 "src %llu:%llu:%u l %u u %u",
+                 (void *) __entry->trans_ip,
+                 (void *) __entry->caller_ip,
                  __entry->btree_id,
-                 __entry->uptodate,
-                 __entry->pos_want_inode,
-                 __entry->pos_want_offset,
-                 __entry->pos_want_snapshot,
-                 __entry->locks_want,
-                 __entry->pos_found_inode,
-                 __entry->pos_found_offset,
-                 __entry->pos_found_snapshot,
-                 __entry->locks_found)
+                 __entry->got_pos_inode,
+                 __entry->got_pos_offset,
+                 __entry->got_pos_snapshot,
+                 __entry->got_locks,
+                 __entry->got_uptodate,
+                 __entry->src_pos_inode,
+                 __entry->src_pos_offset,
+                 __entry->src_pos_snapshot,
+                 __entry->src_locks,
+                 __entry->src_uptodate)
 );
 
 TRACE_EVENT(transaction_restart_ip,
@@ -614,28 +621,241 @@ TRACE_EVENT(transaction_restart_ip,
 );
 
 DECLARE_EVENT_CLASS(transaction_restart,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip),
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip),
 
        TP_STRUCT__entry(
-               __field(unsigned long,          ip      )
+               __field(unsigned long,          trans_ip        )
+               __field(unsigned long,          caller_ip       )
        ),
 
        TP_fast_assign(
-               __entry->ip = ip;
+               __entry->trans_ip               = trans_ip;
+               __entry->caller_ip              = caller_ip;
        ),
 
-       TP_printk("%ps", (void *) __entry->ip)
+       TP_printk("%ps %pS",
+                 (void *) __entry->trans_ip,
+                 (void *) __entry->caller_ip)
 );
 
-DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_reused,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
+DEFINE_EVENT(transaction_restart,      trans_blocked_journal_reclaim,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip)
 );
 
-DEFINE_EVENT(transaction_restart,      trans_blocked_journal_reclaim,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
+DEFINE_EVENT(transaction_restart,      trans_restart_journal_res_get,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_journal_preres_get,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_journal_reclaim,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_fault_inject,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_traverse_all,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_mark_replicas,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip),
+       TP_ARGS(trans_ip, caller_ip)
+);
+
+DECLARE_EVENT_CLASS(transaction_restart_iter,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          trans_ip        )
+               __field(unsigned long,          caller_ip       )
+               __field(u8,                     btree_id        )
+               __field(u64,                    pos_inode       )
+               __field(u64,                    pos_offset      )
+               __field(u32,                    pos_snapshot    )
+       ),
+
+       TP_fast_assign(
+               __entry->trans_ip               = trans_ip;
+               __entry->caller_ip              = caller_ip;
+               __entry->btree_id               = btree_id;
+               __entry->pos_inode              = pos->inode;
+               __entry->pos_offset             = pos->offset;
+               __entry->pos_snapshot           = pos->snapshot;
+       ),
+
+       TP_printk("%ps %pS btree %u pos %llu:%llu:%u",
+                 (void *) __entry->trans_ip,
+                 (void *) __entry->caller_ip,
+                 __entry->btree_id,
+                 __entry->pos_inode,
+                 __entry->pos_offset,
+                 __entry->pos_snapshot)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_mark,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos)
+);
+
+TRACE_EVENT(iter_traverse,
+       TP_PROTO(unsigned long  trans_ip,
+                unsigned long  caller_ip,
+                enum btree_id  btree_id,
+                struct bpos    *pos,
+                int ret),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos, ret),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          trans_ip        )
+               __field(unsigned long,          caller_ip       )
+               __field(u8,                     btree_id        )
+               __field(u64,                    pos_inode       )
+               __field(u64,                    pos_offset      )
+               __field(u32,                    pos_snapshot    )
+               __field(s32,                    ret             )
+       ),
+
+       TP_fast_assign(
+               __entry->trans_ip               = trans_ip;
+               __entry->caller_ip              = caller_ip;
+               __entry->btree_id               = btree_id;
+               __entry->pos_inode              = pos->inode;
+               __entry->pos_offset             = pos->offset;
+               __entry->pos_snapshot           = pos->snapshot;
+               __entry->ret                    = ret;
+       ),
+
+       TP_printk("%ps %pS pos %u %llu:%llu:%u ret %i",
+                 (void *) __entry->trans_ip,
+                 (void *) __entry->caller_ip,
+                 __entry->btree_id,
+                 __entry->pos_inode,
+                 __entry->pos_offset,
+                 __entry->pos_snapshot,
+                 __entry->ret)
+);
+
+TRACE_EVENT(iter_set_search_pos,
+       TP_PROTO(unsigned long  trans_ip,
+                unsigned long  caller_ip,
+                enum btree_id  btree_id,
+                struct bpos    *old_pos,
+                struct bpos    *new_pos,
+                unsigned       good_level),
+       TP_ARGS(trans_ip, caller_ip, btree_id, old_pos, new_pos, good_level),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          trans_ip                )
+               __field(unsigned long,          caller_ip               )
+               __field(u8,                     btree_id                )
+               __field(u64,                    old_pos_inode           )
+               __field(u64,                    old_pos_offset          )
+               __field(u32,                    old_pos_snapshot        )
+               __field(u64,                    new_pos_inode           )
+               __field(u64,                    new_pos_offset          )
+               __field(u32,                    new_pos_snapshot        )
+               __field(u8,                     good_level              )
+       ),
+
+       TP_fast_assign(
+               __entry->trans_ip               = trans_ip;
+               __entry->caller_ip              = caller_ip;
+               __entry->btree_id               = btree_id;
+               __entry->old_pos_inode          = old_pos->inode;
+               __entry->old_pos_offset         = old_pos->offset;
+               __entry->old_pos_snapshot       = old_pos->snapshot;
+               __entry->new_pos_inode          = new_pos->inode;
+               __entry->new_pos_offset         = new_pos->offset;
+               __entry->new_pos_snapshot       = new_pos->snapshot;
+               __entry->good_level             = good_level;
+       ),
+
+       TP_printk("%ps %pS btree %u old pos %llu:%llu:%u new pos %llu:%llu:%u l %u",
+                 (void *) __entry->trans_ip,
+                 (void *) __entry->caller_ip,
+                 __entry->btree_id,
+                 __entry->old_pos_inode,
+                 __entry->old_pos_offset,
+                 __entry->old_pos_snapshot,
+                 __entry->new_pos_inode,
+                 __entry->new_pos_offset,
+                 __entry->new_pos_snapshot,
+                 __entry->good_level)
 );
 
 TRACE_EVENT(trans_restart_would_deadlock,
@@ -730,97 +950,70 @@ TRACE_EVENT(trans_restart_mem_realloced,
                  __entry->bytes)
 );
 
-DEFINE_EVENT(transaction_restart,      trans_restart_journal_res_get,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_journal_preres_get,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_journal_reclaim,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_mark_replicas,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_fault_inject,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_split,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_mark,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_upgrade,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_iter_upgrade,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_relock,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_restart_traverse,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
-DEFINE_EVENT(transaction_restart,      trans_traverse_all,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
-);
-
 DECLARE_EVENT_CLASS(node_lock_fail,
-       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-       TP_ARGS(level, iter_seq, node, node_seq),
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos,
+                unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos,
+               level, iter_seq, node, node_seq),
 
        TP_STRUCT__entry(
-               __field(u32,            level)
-               __field(u32,            iter_seq)
-               __field(u32,            node)
-               __field(u32,            node_seq)
+               __field(unsigned long,          trans_ip        )
+               __field(unsigned long,          caller_ip       )
+               __field(u8,                     btree_id        )
+               __field(u64,                    pos_inode       )
+               __field(u64,                    pos_offset      )
+               __field(u32,                    pos_snapshot    )
+               __field(u32,                    level           )
+               __field(u32,                    iter_seq        )
+               __field(u32,                    node            )
+               __field(u32,                    node_seq        )
        ),
 
        TP_fast_assign(
-               __entry->level          = level;
-               __entry->iter_seq       = iter_seq;
-               __entry->node           = node;
-               __entry->node_seq       = node_seq;
+               __entry->trans_ip               = trans_ip;
+               __entry->caller_ip              = caller_ip;
+               __entry->btree_id               = btree_id;
+               __entry->pos_inode              = pos->inode;
+               __entry->pos_offset             = pos->offset;
+               __entry->pos_snapshot           = pos->snapshot;
+               __entry->level                  = level;
+               __entry->iter_seq               = iter_seq;
+               __entry->node                   = node;
+               __entry->node_seq               = node_seq;
        ),
 
-       TP_printk("level %u iter seq %u node %u node seq %u",
+       TP_printk("%ps %pS btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u",
+                 (void *) __entry->trans_ip,
+                 (void *) __entry->caller_ip,
+                 __entry->btree_id,
+                 __entry->pos_inode,
+                 __entry->pos_offset,
+                 __entry->pos_snapshot,
                  __entry->level, __entry->iter_seq,
                  __entry->node, __entry->node_seq)
 );
 
 DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
-       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-       TP_ARGS(level, iter_seq, node, node_seq)
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos,
+                unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos,
+               level, iter_seq, node, node_seq)
 );
 
 DEFINE_EVENT(node_lock_fail, node_relock_fail,
-       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
-       TP_ARGS(level, iter_seq, node, node_seq)
+       TP_PROTO(unsigned long trans_ip,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos,
+                unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(trans_ip, caller_ip, btree_id, pos,
+               level, iter_seq, node, node_seq)
 );
 
 #endif /* _TRACE_BCACHE_H */
index 0f2d7437c740344e3191ee83041e3bc62d6c2be1..594e1f1a12915477c83db5c242b787a6f3a90913 100644 (file)
@@ -383,7 +383,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
        }
 
        new->k.p = iter->pos;
-       bch2_trans_update(trans, iter, &new->k_i, 0);
+       ret = bch2_trans_update(trans, iter, &new->k_i, 0);
        *new_acl = acl;
        acl = NULL;
 err:
index da6347390c4841e8297fbb1a2b5847c8c85d4101..2324b81c09cec79f56bb54231457948131d9b4a4 100644 (file)
@@ -340,9 +340,9 @@ retry:
                return 0;
 
        bch2_alloc_pack(c, &a, new_u);
-       bch2_trans_update(trans, iter, &a.k,
-                         BTREE_TRIGGER_NORUN);
-       ret = bch2_trans_commit(trans, NULL, NULL,
+       ret   = bch2_trans_update(trans, iter, &a.k,
+                                 BTREE_TRIGGER_NORUN) ?:
+               bch2_trans_commit(trans, NULL, NULL,
                                BTREE_INSERT_NOFAIL|flags);
 err:
        if (ret == -EINTR)
@@ -726,7 +726,8 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
        u.write_time    = atomic64_read(&c->io_clock[WRITE].now);
 
        bch2_alloc_pack(c, a, u);
-       bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_BUCKET_INVALIDATE);
+       ret = bch2_trans_update(trans, iter, &a->k,
+                               BTREE_TRIGGER_BUCKET_INVALIDATE);
 err:
        bch2_trans_iter_put(trans, iter);
        return ret;
@@ -836,6 +837,11 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
        while (!ret &&
               !fifo_full(&ca->free_inc) &&
               ca->alloc_heap.used) {
+               if (kthread_should_stop()) {
+                       ret = 1;
+                       break;
+               }
+
                ret = bch2_invalidate_one_bucket(c, ca, &journal_seq,
                                (!fifo_empty(&ca->free_inc)
                                 ? BTREE_INSERT_NOWAIT : 0));
index f6adbe8955d78ed7baffde7c8c07fe3aea67f591..013cf0b53857db8b543679e61d8f0a79e48a844c 100644 (file)
@@ -807,7 +807,10 @@ lock_node:
                        if (bch2_btree_node_relock(iter, level + 1))
                                goto retry;
 
-                       trace_trans_restart_btree_node_reused(iter->trans->ip);
+                       trace_trans_restart_btree_node_reused(iter->trans->ip,
+                                                             trace_ip,
+                                                             iter->btree_id,
+                                                             &iter->real_pos);
                        return ERR_PTR(-EINTR);
                }
        }
index b03432c13fbb1bd0ab4b3ff5c7b73dc13369c2a8..ba560fbd5f36c2e2a4848ee21108a8c753e18b36 100644 (file)
@@ -1665,9 +1665,10 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
                        bch2_bkey_buf_reassemble(&sk, c, k);
                        bch2_extent_normalize(c, bkey_i_to_s(sk.k));
 
-                       bch2_trans_update(&trans, iter, sk.k, 0);
 
-                       commit_err = bch2_trans_commit(&trans, NULL, NULL,
+                       commit_err =
+                               bch2_trans_update(&trans, iter, sk.k, 0) ?:
+                               bch2_trans_commit(&trans, NULL, NULL,
                                                       BTREE_INSERT_NOWAIT|
                                                       BTREE_INSERT_NOFAIL);
                        if (commit_err == -EINTR) {
index bdb068e9d2636fa2b05f70deddff34d47a49c96f..cd714dc2df3cb6391bc17c237211cdb165ba0eee 100644 (file)
@@ -170,8 +170,8 @@ success:
        return true;
 }
 
-static inline bool btree_iter_get_locks(struct btree_iter *iter,
-                                       bool upgrade, bool trace)
+static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade,
+                                       unsigned long trace_ip)
 {
        unsigned l = iter->level;
        int fail_idx = -1;
@@ -183,16 +183,17 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
                if (!(upgrade
                      ? bch2_btree_node_upgrade(iter, l)
                      : bch2_btree_node_relock(iter, l))) {
-                       if (trace)
-                               (upgrade
-                                ? trace_node_upgrade_fail
-                                : trace_node_relock_fail)(l, iter->l[l].lock_seq,
-                                               is_btree_node(iter, l)
-                                               ? 0
-                                               : (unsigned long) iter->l[l].b,
-                                               is_btree_node(iter, l)
-                                               ? iter->l[l].b->c.lock.state.seq
-                                               : 0);
+                       (upgrade
+                        ? trace_node_upgrade_fail
+                        : trace_node_relock_fail)(iter->trans->ip, trace_ip,
+                                       iter->btree_id, &iter->real_pos,
+                                       l, iter->l[l].lock_seq,
+                                       is_btree_node(iter, l)
+                                       ? 0
+                                       : (unsigned long) iter->l[l].b,
+                                       is_btree_node(iter, l)
+                                       ? iter->l[l].b->c.lock.state.seq
+                                       : 0);
 
                        fail_idx = l;
                        btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
@@ -372,9 +373,9 @@ static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
 #endif
 
 __flatten
-bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
+static bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip)
 {
-       return btree_iter_get_locks(iter, false, trace);
+       return btree_iter_get_locks(iter, false, trace_ip);
 }
 
 bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
@@ -386,7 +387,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
 
        iter->locks_want = new_locks_want;
 
-       if (btree_iter_get_locks(iter, true, true))
+       if (btree_iter_get_locks(iter, true, _THIS_IP_))
                return true;
 
        /*
@@ -414,7 +415,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
                    linked->btree_id == iter->btree_id &&
                    linked->locks_want < new_locks_want) {
                        linked->locks_want = new_locks_want;
-                       btree_iter_get_locks(linked, true, false);
+                       btree_iter_get_locks(linked, true, _THIS_IP_);
                }
 
        return false;
@@ -455,13 +456,22 @@ void bch2_trans_downgrade(struct btree_trans *trans)
 
 /* Btree transaction locking: */
 
+static inline bool btree_iter_should_be_locked(struct btree_trans *trans,
+                                              struct btree_iter *iter)
+{
+       return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
+               iter->should_be_locked;
+}
+
 bool bch2_trans_relock(struct btree_trans *trans)
 {
        struct btree_iter *iter;
 
        trans_for_each_iter(trans, iter)
-               if (!bch2_btree_iter_relock(iter, true)) {
-                       trace_trans_restart_relock(trans->ip);
+               if (!bch2_btree_iter_relock(iter, _RET_IP_) &&
+                   btree_iter_should_be_locked(trans, iter)) {
+                       trace_trans_restart_relock(trans->ip, _RET_IP_,
+                                       iter->btree_id, &iter->real_pos);
                        return false;
                }
        return true;
@@ -829,7 +839,14 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
 
        ret = bkey_disassemble(l->b, k, u);
 
-       if (bch2_debug_check_bkeys)
+       /*
+        * XXX: bch2_btree_bset_insert_key() generates invalid keys when we
+        * overwrite extents - it sets k->type = KEY_TYPE_deleted on the key
+        * being overwritten but doesn't change k->size. But this is ok, because
+        * those keys are never written out, we just have to avoid a spurious
+        * assertion here:
+        */
+       if (bch2_debug_check_bkeys && !bkey_deleted(ret.k))
                bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
 
        return ret;
@@ -1175,7 +1192,8 @@ err:
 
 static int btree_iter_traverse_one(struct btree_iter *, unsigned long);
 
-static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
+static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
+                                    unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter *iter;
@@ -1192,7 +1210,7 @@ retry_all:
        relock_fail = false;
 
        trans_for_each_iter(trans, iter) {
-               if (!bch2_btree_iter_relock(iter, true))
+               if (!bch2_btree_iter_relock(iter, _THIS_IP_))
                        relock_fail = true;
                sorted[nr_sorted++] = iter->idx;
        }
@@ -1269,13 +1287,13 @@ out:
 
        trans->in_traverse_all = false;
 
-       trace_trans_traverse_all(trans->ip);
+       trace_trans_traverse_all(trans->ip, trace_ip);
        return ret;
 }
 
 int bch2_btree_iter_traverse_all(struct btree_trans *trans)
 {
-       return __btree_iter_traverse_all(trans, 0);
+       return __btree_iter_traverse_all(trans, 0, _RET_IP_);
 }
 
 static inline bool btree_iter_good_node(struct btree_iter *iter,
@@ -1320,6 +1338,7 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
                                   unsigned long trace_ip)
 {
        unsigned depth_want = iter->level;
+       int ret = 0;
 
        /*
         * if we need interior nodes locked, call btree_iter_relock() to make
@@ -1327,16 +1346,18 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
         */
        if (iter->uptodate == BTREE_ITER_NEED_RELOCK ||
            iter->locks_want > 1)
-               bch2_btree_iter_relock(iter, false);
+               bch2_btree_iter_relock(iter, _THIS_IP_);
 
-       if (btree_iter_type(iter) == BTREE_ITER_CACHED)
-               return bch2_btree_iter_traverse_cached(iter);
+       if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
+               ret = bch2_btree_iter_traverse_cached(iter);
+               goto out;
+       }
 
        if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
-               return 0;
+               goto out;
 
        if (unlikely(iter->level >= BTREE_MAX_DEPTH))
-               return 0;
+               goto out;
 
        iter->level = btree_iter_up_until_good_node(iter, 0);
 
@@ -1347,12 +1368,18 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
         * btree_iter_lock_root() comes next and that it can't fail
         */
        while (iter->level > depth_want) {
-               int ret = btree_iter_node(iter, iter->level)
+               ret = btree_iter_node(iter, iter->level)
                        ? btree_iter_down(iter, trace_ip)
                        : btree_iter_lock_root(iter, depth_want, trace_ip);
                if (unlikely(ret)) {
-                       if (ret == 1)
-                               return 0;
+                       if (ret == 1) {
+                               /*
+                                * Got to the end of the btree (in
+                                * BTREE_ITER_NODES mode)
+                                */
+                               ret = 0;
+                               goto out;
+                       }
 
                        iter->level = depth_want;
 
@@ -1364,14 +1391,16 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
                                iter->l[iter->level].b =
                                        BTREE_ITER_NO_NODE_DOWN;
                        }
-                       return ret;
+                       goto out;
                }
        }
 
        iter->uptodate = BTREE_ITER_NEED_PEEK;
-
+out:
+       trace_iter_traverse(iter->trans->ip, trace_ip,
+                           iter->btree_id, &iter->real_pos, ret);
        bch2_btree_iter_verify(iter);
-       return 0;
+       return ret;
 }
 
 static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
@@ -1382,7 +1411,7 @@ static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
        ret =   bch2_trans_cond_resched(trans) ?:
                btree_iter_traverse_one(iter, _RET_IP_);
        if (unlikely(ret))
-               ret = __btree_iter_traverse_all(trans, ret);
+               ret = __btree_iter_traverse_all(trans, ret, _RET_IP_);
 
        return ret;
 }
@@ -1406,9 +1435,16 @@ btree_iter_traverse(struct btree_iter *iter)
 int __must_check
 bch2_btree_iter_traverse(struct btree_iter *iter)
 {
+       int ret;
+
        btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
 
-       return btree_iter_traverse(iter);
+       ret = btree_iter_traverse(iter);
+       if (ret)
+               return ret;
+
+       iter->should_be_locked = true;
+       return 0;
 }
 
 /* Iterate across nodes (leaf and interior nodes) */
@@ -1434,6 +1470,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
        iter->pos = iter->real_pos = b->key.k.p;
 
        bch2_btree_iter_verify(iter);
+       iter->should_be_locked = true;
 
        return b;
 }
@@ -1490,6 +1527,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
        iter->pos = iter->real_pos = b->key.k.p;
 
        bch2_btree_iter_verify(iter);
+       iter->should_be_locked = true;
 
        return b;
 }
@@ -1498,6 +1536,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
 
 static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos)
 {
+       struct bpos old_pos = iter->real_pos;
        int cmp = bpos_cmp(new_pos, iter->real_pos);
        unsigned l = iter->level;
 
@@ -1505,10 +1544,11 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
                goto out;
 
        iter->real_pos = new_pos;
+       iter->should_be_locked = false;
 
        if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
                btree_node_unlock(iter, 0);
-               iter->l[0].b = BTREE_ITER_NO_NODE_UP;
+               iter->l[0].b = BTREE_ITER_NO_NODE_CACHED;
                btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
                return;
        }
@@ -1537,6 +1577,11 @@ out:
                btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
 
        bch2_btree_iter_verify(iter);
+#ifdef CONFIG_BCACHEFS_DEBUG
+       trace_iter_set_search_pos(iter->trans->ip, _RET_IP_,
+                                 iter->btree_id,
+                                 &old_pos, &new_pos, l);
+#endif
 }
 
 inline bool bch2_btree_iter_advance(struct btree_iter *iter)
@@ -1659,6 +1704,7 @@ start:
 
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
+       iter->should_be_locked = true;
        return k;
 }
 
@@ -1743,6 +1789,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
 out:
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
+       iter->should_be_locked = true;
        return k;
 no_key:
        /*
@@ -1842,6 +1889,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
+       iter->should_be_locked = true;
+
        return k;
 }
 
@@ -1879,6 +1928,8 @@ struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
                bkey_cmp(iter->pos, ck->key.pos));
        BUG_ON(!ck->valid);
 
+       iter->should_be_locked = true;
+
        return bkey_i_to_s_c(ck->k);
 }
 
@@ -2055,13 +2106,6 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                best = iter;
        }
 
-       trace_trans_get_iter(_RET_IP_, trans->ip,
-                            btree_id,
-                            &real_pos, locks_want,
-                            best ? &best->real_pos : &pos_min,
-                            best ? best->locks_want : 0,
-                            best ? best->uptodate : BTREE_ITER_NEED_TRAVERSE);
-
        if (!best) {
                iter = btree_trans_iter_alloc(trans);
                bch2_btree_iter_init(trans, iter, btree_id);
@@ -2090,7 +2134,7 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
        locks_want = min(locks_want, BTREE_MAX_DEPTH);
        if (locks_want > iter->locks_want) {
                iter->locks_want = locks_want;
-               btree_iter_get_locks(iter, true, false);
+               btree_iter_get_locks(iter, true, _THIS_IP_);
        }
 
        while (iter->level != depth) {
@@ -2108,6 +2152,13 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
        bch2_btree_iter_set_pos(iter, pos);
        btree_iter_set_search_pos(iter, real_pos);
 
+       trace_trans_get_iter(_RET_IP_, trans->ip,
+                            btree_id,
+                            &real_pos, locks_want, iter->uptodate,
+                            best ? &best->real_pos     : &pos_min,
+                            best ? best->locks_want    : U8_MAX,
+                            best ? best->uptodate      : U8_MAX);
+
        return iter;
 }
 
index 2f63adb9e4205b8ee6e2b8ade7194c7504c45569..a2ce711fd61f0b981df5c940cfb3eb0c01420c7e 100644 (file)
@@ -111,7 +111,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
                              struct btree_node_iter *, struct bkey_packed *,
                              unsigned, unsigned);
 
-bool bch2_btree_iter_relock(struct btree_iter *, bool);
 bool bch2_trans_relock(struct btree_trans *);
 void bch2_trans_unlock(struct btree_trans *);
 
@@ -179,6 +178,7 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos
        iter->k.p.offset        = iter->pos.offset      = new_pos.offset;
        iter->k.p.snapshot      = iter->pos.snapshot    = new_pos.snapshot;
        iter->k.size = 0;
+       iter->should_be_locked = false;
 }
 
 /* Sort order for locking btree iterators: */
index 06a2c412db7a2bedad619f6498dd10656683925c..bc0f482b53d2a1dd2b6d9f11e76911ba3086f878 100644 (file)
@@ -230,6 +230,7 @@ enum btree_iter_uptodate {
 #define BTREE_ITER_NO_NODE_DOWN                ((struct btree *) 5)
 #define BTREE_ITER_NO_NODE_INIT                ((struct btree *) 6)
 #define BTREE_ITER_NO_NODE_ERROR       ((struct btree *) 7)
+#define BTREE_ITER_NO_NODE_CACHED      ((struct btree *) 8)
 
 /*
  * @pos                        - iterator's current position
@@ -251,7 +252,13 @@ struct btree_iter {
        u8                      idx;
 
        enum btree_id           btree_id:4;
-       enum btree_iter_uptodate uptodate:4;
+       enum btree_iter_uptodate uptodate:3;
+       /*
+        * True if we've returned a key (and thus are expected to keep it
+        * locked), false after set_pos - for avoiding spurious transaction
+        * restarts in bch2_trans_relock():
+        */
+       bool                    should_be_locked:1;
        unsigned                level:4,
                                min_depth:4,
                                locks_want:4,
index b0484c7acb79b8f7c0b19aca16732411328ef562..2d8093d1bf00b1dddf67bfd09098ab9b0061d994 100644 (file)
@@ -555,15 +555,15 @@ static void btree_update_nodes_written(struct btree_update *as)
         * on disk:
         */
        for (i = 0; i < as->nr_old_nodes; i++) {
-               struct btree_node *bn = READ_ONCE(as->old_nodes[i]->data);
+               struct btree *old = as->old_nodes[i];
+               __le64 seq;
 
-               /*
-                * This is technically a use after free, but it's just a read -
-                * but it might cause problems in userspace where freeing the
-                * buffer may unmap it:
-                */
-               if (bn && bn->keys.seq == as->old_nodes_seq[i])
-                       btree_node_wait_on_io(as->old_nodes[i]);
+               six_lock_read(&old->c.lock, NULL, NULL);
+               seq = old->data ? old->data->keys.seq : 0;
+               six_unlock_read(&old->c.lock);
+
+               if (seq == as->old_nodes_seq[i])
+                       btree_node_wait_on_io(old);
        }
 
        /*
@@ -955,7 +955,9 @@ retry:
         * instead of locking/reserving all the way to the root:
         */
        if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
-               trace_trans_restart_iter_upgrade(trans->ip);
+               trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_,
+                                                iter->btree_id,
+                                                &iter->real_pos);
                return ERR_PTR(-EINTR);
        }
 
@@ -996,7 +998,7 @@ retry:
                 * closure argument
                 */
                if (flags & BTREE_INSERT_NOUNLOCK) {
-                       trace_trans_restart_journal_preres_get(trans->ip);
+                       trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_);
                        ret = -EINTR;
                        goto err;
                }
@@ -1012,7 +1014,7 @@ retry:
                                BTREE_UPDATE_JOURNAL_RES,
                                journal_flags);
                if (ret) {
-                       trace_trans_restart_journal_preres_get(trans->ip);
+                       trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_);
                        goto err;
                }
 
index 70d2186e509f230fa9321c7053d077961608804d..0d566be7455e1c3cb8b459d761b762c0968ce5be 100644 (file)
@@ -228,7 +228,8 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
 }
 
 static noinline int
-bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s)
+bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
+                                  unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
        int ret;
@@ -241,7 +242,7 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s)
                return ret;
 
        if (!bch2_trans_relock(trans)) {
-               trace_trans_restart_journal_preres_get(trans->ip);
+               trace_trans_restart_journal_preres_get(trans->ip, trace_ip);
                return -EINTR;
        }
 
@@ -368,7 +369,8 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
 
 static inline int
 bch2_trans_commit_write_locked(struct btree_trans *trans,
-                              struct btree_insert_entry **stopped_at)
+                              struct btree_insert_entry **stopped_at,
+                              unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
@@ -378,7 +380,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
        int ret;
 
        if (race_fault()) {
-               trace_trans_restart_fault_inject(trans->ip);
+               trace_trans_restart_fault_inject(trans->ip, trace_ip);
                return -EINTR;
        }
 
@@ -525,7 +527,8 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree
  * Get journal reservation, take write locks, and attempt to do btree update(s):
  */
 static inline int do_bch2_trans_commit(struct btree_trans *trans,
-                                      struct btree_insert_entry **stopped_at)
+                                      struct btree_insert_entry **stopped_at,
+                                      unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
@@ -559,7 +562,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                         ? JOURNAL_RES_GET_RESERVED : 0));
        if (unlikely(ret == -EAGAIN))
                ret = bch2_trans_journal_preres_get_cold(trans,
-                                               trans->journal_preres_u64s);
+                                               trans->journal_preres_u64s, trace_ip);
        if (unlikely(ret))
                return ret;
 
@@ -578,7 +581,9 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                if (iter->nodes_locked != iter->nodes_intent_locked) {
                        if (btree_iter_keep(trans, iter)) {
                                if (!bch2_btree_iter_upgrade(iter, 1)) {
-                                       trace_trans_restart_upgrade(trans->ip);
+                                       trace_trans_restart_upgrade(trans->ip, trace_ip,
+                                                                   iter->btree_id,
+                                                                   &iter->real_pos);
                                        return -EINTR;
                                }
                        } else {
@@ -606,7 +611,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                        bch2_btree_node_lock_for_insert(c,
                                        iter_l(i->iter)->b, i->iter);
 
-       ret = bch2_trans_commit_write_locked(trans, stopped_at);
+       ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
 
        trans_for_each_update2(trans, i)
                if (!same_leaf_as_prev(trans, i))
@@ -644,7 +649,7 @@ static int journal_reclaim_wait_done(struct bch_fs *c)
 static noinline
 int bch2_trans_commit_error(struct btree_trans *trans,
                            struct btree_insert_entry *i,
-                           int ret)
+                           int ret, unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
        unsigned flags = trans->flags;
@@ -685,7 +690,9 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (!ret ||
                    ret == -EINTR ||
                    (flags & BTREE_INSERT_NOUNLOCK)) {
-                       trace_trans_restart_btree_node_split(trans->ip);
+                       trace_trans_restart_btree_node_split(trans->ip, trace_ip,
+                                                            i->iter->btree_id,
+                                                            &i->iter->real_pos);
                        ret = -EINTR;
                }
                break;
@@ -703,7 +710,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (bch2_trans_relock(trans))
                        return 0;
 
-               trace_trans_restart_mark_replicas(trans->ip);
+               trace_trans_restart_mark_replicas(trans->ip, trace_ip);
                ret = -EINTR;
                break;
        case BTREE_INSERT_NEED_JOURNAL_RES:
@@ -720,13 +727,13 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (bch2_trans_relock(trans))
                        return 0;
 
-               trace_trans_restart_journal_res_get(trans->ip);
+               trace_trans_restart_journal_res_get(trans->ip, trace_ip);
                ret = -EINTR;
                break;
        case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
                bch2_trans_unlock(trans);
 
-               trace_trans_blocked_journal_reclaim(trans->ip);
+               trace_trans_blocked_journal_reclaim(trans->ip, trace_ip);
 
                wait_event_freezable(c->journal.reclaim_wait,
                                     (ret = journal_reclaim_wait_done(c)));
@@ -736,7 +743,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (bch2_trans_relock(trans))
                        return 0;
 
-               trace_trans_restart_journal_reclaim(trans->ip);
+               trace_trans_restart_journal_reclaim(trans->ip, trace_ip);
                ret = -EINTR;
                break;
        default:
@@ -950,7 +957,9 @@ int __bch2_trans_commit(struct btree_trans *trans)
                                                             i->trigger_flags);
                                if (unlikely(ret)) {
                                        if (ret == -EINTR)
-                                               trace_trans_restart_mark(trans->ip);
+                                               trace_trans_restart_mark(trans->ip, _RET_IP_,
+                                                                        i->iter->btree_id,
+                                                                        &i->iter->pos);
                                        goto out;
                                }
                        }
@@ -976,12 +985,16 @@ int __bch2_trans_commit(struct btree_trans *trans)
        trans_for_each_update2(trans, i) {
                ret = bch2_btree_iter_traverse(i->iter);
                if (unlikely(ret)) {
-                       trace_trans_restart_traverse(trans->ip);
+                       trace_trans_restart_traverse(trans->ip, _RET_IP_,
+                                                    i->iter->btree_id,
+                                                    &i->iter->pos);
                        goto out;
                }
 
                if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) {
-                       trace_trans_restart_upgrade(trans->ip);
+                       trace_trans_restart_upgrade(trans->ip, _RET_IP_,
+                                                   i->iter->btree_id,
+                                                   &i->iter->pos);
                        ret = -EINTR;
                        goto out;
                }
@@ -997,7 +1010,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
 retry:
        memset(&trans->journal_res, 0, sizeof(trans->journal_res));
 
-       ret = do_bch2_trans_commit(trans, &i);
+       ret = do_bch2_trans_commit(trans, &i, _RET_IP_);
 
        /* make sure we didn't drop or screw up locks: */
        bch2_btree_trans_verify_locks(trans);
@@ -1023,7 +1036,7 @@ out_reset:
 
        return ret;
 err:
-       ret = bch2_trans_commit_error(trans, i, ret);
+       ret = bch2_trans_commit_error(trans, i, ret, _RET_IP_);
        if (ret)
                goto out;
 
@@ -1198,9 +1211,9 @@ int bch2_btree_delete_at(struct btree_trans *trans,
        bkey_init(&k.k);
        k.k.p = iter->pos;
 
-       bch2_trans_update(trans, iter, &k, 0);
-       return bch2_trans_commit(trans, NULL, NULL,
-                                BTREE_INSERT_NOFAIL|flags);
+       return  bch2_trans_update(trans, iter, &k, 0) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BTREE_INSERT_NOFAIL|flags);
 }
 
 int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
@@ -1251,8 +1264,8 @@ retry:
                                break;
                }
 
-               bch2_trans_update(trans, iter, &delete, 0);
-               ret = bch2_trans_commit(trans, NULL, journal_seq,
+               ret   = bch2_trans_update(trans, iter, &delete, 0) ?:
+                       bch2_trans_commit(trans, NULL, journal_seq,
                                        BTREE_INSERT_NOFAIL);
                if (ret)
                        break;
index d07085a2fd1b23bfe1af4d1b4534d25e7cf3191d..76d15a5dc62fba066cbe9ba83c99c231e8e8b3ad 100644 (file)
@@ -631,6 +631,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
        BUG_ON(type != BCH_DATA_sb &&
               type != BCH_DATA_journal);
 
+       /*
+        * Backup superblock might be past the end of our normal usable space:
+        */
+       if (b >= ca->mi.nbuckets)
+               return;
+
        preempt_disable();
 
        if (likely(c)) {
@@ -1873,7 +1879,9 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
        }
 
        bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
-       bch2_trans_update(trans, iter, n, 0);
+       ret = bch2_trans_update(trans, iter, n, 0);
+       if (ret)
+               goto err;
 out:
        ret = sectors;
 err:
@@ -2082,6 +2090,12 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
        };
        int ret = 0;
 
+       /*
+        * Backup superblock might be past the end of our normal usable space:
+        */
+       if (b >= ca->mi.nbuckets)
+               return 0;
+
        a = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u);
        if (IS_ERR(a))
                return PTR_ERR(a);
index fa7450d2b2adced23c1a5771c1465cae3396b64a..db6e4f6cac371254607d14362b0398c86bd9f778 100644 (file)
@@ -741,9 +741,8 @@ found_slot:
 
        stripe->k.p = iter->pos;
 
-       bch2_trans_update(&trans, iter, &stripe->k_i, 0);
-
-       ret = bch2_trans_commit(&trans, res, NULL,
+       ret   = bch2_trans_update(&trans, iter, &stripe->k_i, 0) ?:
+               bch2_trans_commit(&trans, res, NULL,
                                BTREE_INSERT_NOFAIL);
 err:
        bch2_trans_iter_put(&trans, iter);
@@ -791,7 +790,7 @@ static int ec_stripe_bkey_update(struct btree_trans *trans,
                stripe_blockcount_set(&new->v, i,
                        stripe_blockcount_get(existing, i));
 
-       bch2_trans_update(trans, iter, &new->k_i, 0);
+       ret = bch2_trans_update(trans, iter, &new->k_i, 0);
 err:
        bch2_trans_iter_put(trans, iter);
        return ret;
@@ -864,9 +863,8 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
                extent_stripe_ptr_add(e, s, ec_ptr, block);
 
                bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
-               bch2_trans_update(&trans, iter, sk.k, 0);
-
-               ret = bch2_trans_commit(&trans, NULL, NULL,
+               ret   = bch2_trans_update(&trans, iter, sk.k, 0) ?:
+                       bch2_trans_commit(&trans, NULL, NULL,
                                        BTREE_INSERT_NOFAIL);
                if (ret == -EINTR)
                        ret = 0;
@@ -1588,8 +1586,7 @@ write:
                stripe_blockcount_set(&new_key->v, i,
                                      m->block_sectors[i]);
 
-       bch2_trans_update(trans, iter, &new_key->k_i, 0);
-       return 0;
+       return bch2_trans_update(trans, iter, &new_key->k_i, 0);
 }
 
 int bch2_stripes_write(struct bch_fs *c, unsigned flags)
index dc7916c8b610e047979f9e11a9f7e4110aa50e1a..ef28995525c0cc955114536d65b61010acec4883 100644 (file)
@@ -2532,7 +2532,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
        }
 
        bch2_bkey_buf_init(&copy);
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
        src = bch2_trans_get_iter(&trans, BTREE_ID_extents,
                        POS(inode->v.i_ino, src_start >> 9),
                        BTREE_ITER_INTENT);
@@ -2652,7 +2652,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
        unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas;
        int ret = 0;
 
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
                        POS(inode->v.i_ino, start_sector),
index e8a329c9561fd514cf185abb0e80744e99e1afdd..c567e17694720a13a409b026163d0ed1a1b1df3d 100644 (file)
@@ -144,7 +144,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
        struct bch_inode_unpacked inode_u;
        int ret;
 
-       bch2_trans_init(&trans, c, 0, 256);
+       bch2_trans_init(&trans, c, 0, 512);
 retry:
        bch2_trans_begin(&trans);
 
index a40459d2b0f06b369d1baae25aad6cd59d33485e..89a130d9c5373c5f668245a395b3226c8388073f 100644 (file)
@@ -305,9 +305,8 @@ static int hash_redo_key(struct btree_trans *trans,
 
        bkey_init(&delete->k);
        delete->k.p = k_iter->pos;
-       bch2_trans_update(trans, k_iter, delete, 0);
-
-       return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
+       return  bch2_trans_update(trans, k_iter, delete, 0) ?:
+               bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
 }
 
 static int fsck_hash_delete_at(struct btree_trans *trans,
@@ -563,12 +562,12 @@ static int fix_overlapping_extent(struct btree_trans *trans,
                                   BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
 
        BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-       bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN);
+       ret   = bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BTREE_INSERT_NOFAIL|
+                                 BTREE_INSERT_LAZY_RW);
        bch2_trans_iter_put(trans, iter);
-
-       return bch2_trans_commit(trans, NULL, NULL,
-                                BTREE_INSERT_NOFAIL|
-                                BTREE_INSERT_LAZY_RW);
+       return ret;
 }
 
 static int inode_backpointer_exists(struct btree_trans *trans,
@@ -887,7 +886,7 @@ retry:
                        ret = __bch2_trans_do(&trans, NULL, NULL,
                                              BTREE_INSERT_NOFAIL|
                                              BTREE_INSERT_LAZY_RW,
-                               (bch2_trans_update(&trans, iter, &n->k_i, 0), 0));
+                               bch2_trans_update(&trans, iter, &n->k_i, 0));
                        kfree(n);
                        if (ret)
                                goto err;
index 6b43a9716cf0bc867601471e3be7e6d7a371ae0b..17d8eb5223cd3c418f61d4d8f5e1e9ea5aaee4cd 100644 (file)
@@ -333,8 +333,7 @@ int bch2_inode_write(struct btree_trans *trans,
 
        bch2_inode_pack(trans->c, inode_p, inode);
        inode_p->inode.k.p.snapshot = iter->snapshot;
-       bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
-       return 0;
+       return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
 }
 
 const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
@@ -580,7 +579,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached)
        struct bkey_s_c k;
        int ret;
 
-       bch2_trans_init(&trans, c, 0, 0);
+       bch2_trans_init(&trans, c, 0, 1024);
 
        /*
         * If this was a directory, there shouldn't be any real dirents left -
@@ -629,9 +628,8 @@ retry:
        delete.k.p = iter->pos;
        delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
 
-       bch2_trans_update(&trans, iter, &delete.k_i, 0);
-
-       ret = bch2_trans_commit(&trans, NULL, NULL,
+       ret   = bch2_trans_update(&trans, iter, &delete.k_i, 0) ?:
+               bch2_trans_commit(&trans, NULL, NULL,
                                BTREE_INSERT_NOFAIL);
 err:
        bch2_trans_iter_put(&trans, iter);
index 157b2a0fc58240a8ada5e0b2c15bc052ee736f25..ab37eff0c778f2bc96193546123f23cdf4b6b34f 100644 (file)
@@ -301,8 +301,9 @@ int bch2_extent_update(struct btree_trans *trans,
 
                inode_iter = bch2_inode_peek(trans, &inode_u,
                                k->k.p.inode, BTREE_ITER_INTENT);
-               if (IS_ERR(inode_iter))
-                       return PTR_ERR(inode_iter);
+               ret = PTR_ERR_OR_ZERO(inode_iter);
+               if (ret)
+                       return ret;
 
                /*
                 * XXX:
@@ -329,11 +330,14 @@ int bch2_extent_update(struct btree_trans *trans,
 
                        inode_p.inode.k.p.snapshot = iter->snapshot;
 
-                       bch2_trans_update(trans, inode_iter,
+                       ret = bch2_trans_update(trans, inode_iter,
                                          &inode_p.inode.k_i, 0);
                }
 
                bch2_trans_iter_put(trans, inode_iter);
+
+               if (ret)
+                       return ret;
        }
 
        ret =   bch2_trans_update(trans, iter, k, 0) ?:
@@ -1783,7 +1787,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
        if (!bch2_bkey_narrow_crcs(new, new_crc))
                goto out;
 
-       bch2_trans_update(trans, iter, new, 0);
+       ret = bch2_trans_update(trans, iter, new, 0);
 out:
        bch2_trans_iter_put(trans, iter);
        return ret;
index af5386d959c879cdc49e86f9d12e41c56a661588..d714779a28d0461bb79970237a47ca103e68581d 100644 (file)
@@ -807,8 +807,11 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                long b;
 
                if (new_fs) {
+                       if (c)
+                               percpu_down_read(&c->mark_lock);
                        b = bch2_bucket_alloc_new_fs(ca);
                        if (b < 0) {
+                               percpu_up_read(&c->mark_lock);
                                ret = -ENOSPC;
                                goto err;
                        }
@@ -861,6 +864,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                                                  ca->mi.bucket_size,
                                                  gc_phase(GC_PHASE_SB),
                                                  0);
+                       if (c)
+                               percpu_up_read(&c->mark_lock);
                } else {
                        ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
                                bch2_trans_mark_metadata_bucket(&trans, ca,
index ef69a19f494a1583d609a04632da248f599515e4..6ebe49ba224803b1731f2381f573182fb7ff546c 100644 (file)
@@ -73,9 +73,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
 
                bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
 
-               bch2_trans_update(&trans, iter, sk.k, 0);
-
-               ret = bch2_trans_commit(&trans, NULL, NULL,
+               ret   = bch2_trans_update(&trans, iter, sk.k, 0) ?:
+                       bch2_trans_commit(&trans, NULL, NULL,
                                        BTREE_INSERT_NOFAIL);
 
                /*
index 2fa763e35392027aa49a7221ccad620828234281..91be50812a383f3a0aa6cd8b6aab4670449b9ea1 100644 (file)
@@ -163,9 +163,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                                goto out;
                }
 
-               bch2_trans_update(&trans, iter, insert, 0);
-
-               ret = bch2_trans_commit(&trans, &op->res,
+               ret   = bch2_trans_update(&trans, iter, insert, 0) ?:
+                       bch2_trans_commit(&trans, &op->res,
                                op_journal_seq(op),
                                BTREE_INSERT_NOFAIL|
                                m->data_opts.btree_insert_flags);
index 8e6cccd3938373bd2cb5d531931d71c04cff398a..dbbbcc6dcec6eadee454c9f1e0f24d07b4676425 100644 (file)
@@ -435,6 +435,8 @@ static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
        unsigned i;
        int ret;
 
+       memset(&search, 0, sizeof(search));
+
        for (i = 0; i < cached.nr; i++) {
                bch2_replicas_entry_cached(&search.e, cached.devs[i]);
 
index eab669af7032089a674de63735f5a9630009c1a0..2ff8e5bd2744b8839a1448a93055182ffc7f9478 100644 (file)
@@ -281,7 +281,7 @@ not_found:
                        swap(iter, slot);
 
                insert->k.p = iter->pos;
-               bch2_trans_update(trans, iter, insert, 0);
+               ret = bch2_trans_update(trans, iter, insert, 0);
        }
 
        goto out;
@@ -296,20 +296,20 @@ int bch2_hash_delete_at(struct btree_trans *trans,
        struct bkey_i *delete;
        int ret;
 
+       delete = bch2_trans_kmalloc(trans, sizeof(*delete));
+       ret = PTR_ERR_OR_ZERO(delete);
+       if (ret)
+               return ret;
+
        ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
        if (ret < 0)
                return ret;
 
-       delete = bch2_trans_kmalloc(trans, sizeof(*delete));
-       if (IS_ERR(delete))
-               return PTR_ERR(delete);
-
        bkey_init(&delete->k);
        delete->k.p = iter->pos;
        delete->k.type = ret ? KEY_TYPE_hash_whiteout : KEY_TYPE_deleted;
 
-       bch2_trans_update(trans, iter, delete, 0);
-       return 0;
+       return bch2_trans_update(trans, iter, delete, 0);
 }
 
 static __always_inline
index 4c6793639fc4a542518d56329e63c091ac579ffa..2a570eb0763ee32385acba222ae0c6ee7ac93443 100644 (file)
@@ -1826,6 +1826,11 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
                goto err;
        }
 
+       ret = bch2_trans_mark_dev_sb(c, ca);
+       if (ret) {
+               goto err;
+       }
+
        mutex_lock(&c->sb_lock);
        mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
        mi->nbuckets = cpu_to_le64(nbuckets);
index 254e3b3142045a2763b5eb148a831ec27ddca869..63f4a83ad1de8844a1736c1d7bd748313bd6912c 100644 (file)
@@ -621,7 +621,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
        bkey_init(&delete.k);
        delete.k.p = k.k->p;
 
-       bch2_trans_update(trans, iter, &delete, 0);
+       ret = bch2_trans_update(trans, iter, &delete, 0);
 err:
        bch2_trans_iter_put(trans, iter);
        return ret;
index 858aa87660533ef1498e73b3ff27b290c3e21646..e7b40b3ca4aa551bd8e5710672e632419fb95338 100644 (file)
@@ -560,8 +560,10 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
 
 const struct xattr_handler *bch2_xattr_handlers[] = {
        &bch_xattr_user_handler,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
        &posix_acl_access_xattr_handler,
        &posix_acl_default_xattr_handler,
+#endif
        &bch_xattr_trusted_handler,
        &bch_xattr_security_handler,
 #ifndef NO_BCACHEFS_FS
index 68295914f34249094be085cf01483447954d9134..fca1208720b67dfd7e96915679572737dd626dba 100644 (file)
@@ -139,7 +139,7 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock,
                                                bool try)
 {
        const struct six_lock_vals l[] = LOCK_VALS;
-       union six_lock_state old = {0}, new;
+       union six_lock_state old, new;
        bool ret;
        u64 v;