]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 90a9c61e2b bcachefs: Switch bch2_btree_delete_range()...
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 11 Aug 2022 00:28:55 +0000 (20:28 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Thu, 11 Aug 2022 21:34:34 +0000 (17:34 -0400)
65 files changed:
.bcachefs_revision
include/linux/bio.h
include/linux/blkdev.h
include/linux/errname.h [new file with mode: 0644]
include/linux/prandom.h [new file with mode: 0644]
include/linux/random.h
include/linux/six.h
include/trace/events/bcachefs.h
libbcachefs/acl.c
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/backpointers.c
libbcachefs/bcachefs.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_key_cache.c
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/checksum.c
libbcachefs/data_update.c
libbcachefs/debug.c
libbcachefs/dirent.c
libbcachefs/ec.c
libbcachefs/errcode.c [new file with mode: 0644]
libbcachefs/errcode.h
libbcachefs/error.c
libbcachefs/error.h
libbcachefs/fs-io.c
libbcachefs/fs-io.h
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/journal_io.c
libbcachefs/journal_reclaim.c
libbcachefs/journal_seq_blacklist.c
libbcachefs/lru.c
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/movinggc.c
libbcachefs/movinggc.h
libbcachefs/opts.h
libbcachefs/quota.c
libbcachefs/rebalance.c
libbcachefs/recovery.c
libbcachefs/reflink.c
libbcachefs/subvolume.c
libbcachefs/super-io.c
libbcachefs/super.c
libbcachefs/tests.c
libbcachefs/trace.c
libbcachefs/util.c
libbcachefs/xattr.c
linux/bio.c
linux/blkdev.c
linux/six.c

index 91d229ac45c27e5dfc01cb24e4f2401402d69594..7f12b42887436adc2ec043c9d1fc3ae50c0fff8e 100644 (file)
@@ -1 +1 @@
-1bda24d7cc91cb84fe5bcbc40c871e9d00542bd2
+90a9c61e2bcf20935aebda1c0c8078ad0fff2475
index 7d596df6c47ee7df3afeaf7ed446cf2209ea5fbe..0ad5a87dc8e893037fafd4b216cbc58518c2a390 100644 (file)
@@ -242,7 +242,7 @@ int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
 struct bio *bio_alloc_clone(struct block_device *, struct bio *,
                            gfp_t, struct bio_set *);
 
-struct bio *bio_kmalloc(gfp_t, unsigned int);
+struct bio *bio_kmalloc(unsigned int, gfp_t);
 
 extern void bio_endio(struct bio *);
 
index 4ce43b5cc629b6e4150b12eb7b61f3f281834816..01b3d4adda15f92c9b1484b7057ba2f4b9509278 100644 (file)
@@ -69,8 +69,7 @@ static inline void submit_bio(struct bio *bio)
        generic_make_request(bio);
 }
 
-int blkdev_issue_discard(struct block_device *, sector_t,
-                        sector_t, gfp_t, unsigned long);
+int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t);
 
 #define bdev_get_queue(bdev)           (&((bdev)->queue))
 
@@ -85,7 +84,7 @@ int blkdev_issue_discard(struct block_device *, sector_t,
 #define PAGE_SECTORS           (1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK            (PAGE_SECTORS - 1)
 
-#define blk_queue_discard(q)           ((void) (q), 0)
+#define bdev_max_discard_sectors(bdev) ((void) (bdev), 0)
 #define blk_queue_nonrot(q)            ((void) (q), 0)
 
 unsigned bdev_logical_block_size(struct block_device *bdev);
diff --git a/include/linux/errname.h b/include/linux/errname.h
new file mode 100644 (file)
index 0000000..443d504
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef _LINUX_ERRNAME_H
+#define _LINUX_ERRNAME_H
+
+#include <string.h>
+
+static inline const char *errname(int err)
+{
+       return strerror(abs(err));
+}
+
+#endif /* _LINUX_ERRNAME_H */
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
new file mode 100644 (file)
index 0000000..6f177cd
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _LINUX_PRANDOM_H
+#define _LINUX_PRANDOM_H
+
+#include <linux/random.h>
+
+static inline void prandom_bytes(void *buf, int nbytes)
+{
+       return get_random_bytes(buf, nbytes);
+}
+
+#define prandom_type(type)                             \
+static inline type prandom_##type(void)                        \
+{                                                      \
+       type v;                                         \
+                                                       \
+       prandom_bytes(&v, sizeof(v));                   \
+       return v;                                       \
+}
+
+prandom_type(int);
+prandom_type(long);
+prandom_type(u32);
+prandom_type(u64);
+#undef prandom_type
+
+#endif /* _LINUX_PRANDOM_H */
+
index 28c595a0c0cfe3429ff69c19fa83edb424d7d38c..ea101d53acd6003572ebcddf88db95a1deb1ba7c 100644 (file)
@@ -29,11 +29,6 @@ static inline void get_random_bytes(void *buf, int nbytes)
        BUG_ON(getrandom(buf, nbytes, 0) != nbytes);
 }
 
-static inline void prandom_bytes(void *buf, int nbytes)
-{
-       return get_random_bytes(buf, nbytes);
-}
-
 #define get_random_type(type)                          \
 static inline type get_random_##type(void)             \
 {                                                      \
index 477c33eb00d7dca36ad07cde09cd5681455cf6b2..41ddf63b7470e4286322c72d2a390636dd50ac2d 100644 (file)
@@ -200,4 +200,11 @@ void six_lock_pcpu_free_rcu(struct six_lock *);
 void six_lock_pcpu_free(struct six_lock *);
 void six_lock_pcpu_alloc(struct six_lock *);
 
+struct six_lock_count {
+       unsigned read;
+       unsigned intent;
+};
+
+struct six_lock_count six_lock_counts(struct six_lock *);
+
 #endif /* _LINUX_SIX_H */
index 66ad356e95bc795bd781686743fa1ac24be2da5e..2c9807426930e238540cf553b2ce4cb0043e2900 100644 (file)
@@ -7,21 +7,29 @@
 
 #include <linux/tracepoint.h>
 
+#define TRACE_BPOS_entries(name)                               \
+       __field(u64,                    name##_inode    )       \
+       __field(u64,                    name##_offset   )       \
+       __field(u32,                    name##_snapshot )
+
+#define TRACE_BPOS_assign(dst, src)                            \
+       __entry->dst##_inode            = (src).inode;          \
+       __entry->dst##_offset           = (src).offset;         \
+       __entry->dst##_snapshot         = (src).snapshot
+
 DECLARE_EVENT_CLASS(bpos,
        TP_PROTO(struct bpos *p),
        TP_ARGS(p),
 
        TP_STRUCT__entry(
-               __field(u64,    inode                           )
-               __field(u64,    offset                          )
+               TRACE_BPOS_entries(p)
        ),
 
        TP_fast_assign(
-               __entry->inode  = p->inode;
-               __entry->offset = p->offset;
+               TRACE_BPOS_assign(p, *p);
        ),
 
-       TP_printk("%llu:%llu", __entry->inode, __entry->offset)
+       TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot)
 );
 
 DECLARE_EVENT_CLASS(bkey,
@@ -230,23 +238,22 @@ DECLARE_EVENT_CLASS(btree_node,
        TP_STRUCT__entry(
                __field(dev_t,          dev                     )
                __field(u8,             level                   )
-               __field(u8,             id                      )
-               __field(u64,            inode                   )
-               __field(u64,            offset                  )
+               __field(u8,             btree_id                )
+               TRACE_BPOS_entries(pos)
        ),
 
        TP_fast_assign(
                __entry->dev            = c->dev;
                __entry->level          = b->c.level;
-               __entry->id             = b->c.btree_id;
-               __entry->inode          = b->key.k.p.inode;
-               __entry->offset         = b->key.k.p.offset;
+               __entry->btree_id       = b->c.btree_id;
+               TRACE_BPOS_assign(pos, b->key.k.p);
        ),
 
-       TP_printk("%d,%d  %u id %u %llu:%llu",
+       TP_printk("%d,%d %u %s %llu:%llu:%u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->level, __entry->id,
-                 __entry->inode, __entry->offset)
+                 __entry->level,
+                 bch2_btree_ids[__entry->btree_id],
+                 __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
 );
 
 DEFINE_EVENT(btree_node, btree_read,
@@ -376,43 +383,36 @@ TRACE_EVENT(btree_cache_scan,
 );
 
 TRACE_EVENT(btree_node_relock_fail,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos,
-                unsigned long node,
-                u32 iter_lock_seq,
-                u32 node_lock_seq),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos, node, iter_lock_seq, node_lock_seq),
+                struct btree_path *path,
+                unsigned level),
+       TP_ARGS(trans, caller_ip, path, level),
 
        TP_STRUCT__entry(
                __array(char,                   trans_fn, 24    )
                __field(unsigned long,          caller_ip       )
                __field(u8,                     btree_id        )
-               __field(u64,                    pos_inode       )
-               __field(u64,                    pos_offset      )
-               __field(u32,                    pos_snapshot    )
+               TRACE_BPOS_entries(pos)
                __field(unsigned long,          node            )
                __field(u32,                    iter_lock_seq   )
                __field(u32,                    node_lock_seq   )
        ),
 
        TP_fast_assign(
-               strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
                __entry->caller_ip              = caller_ip;
-               __entry->btree_id               = btree_id;
-               __entry->pos_inode              = pos->inode;
-               __entry->pos_offset             = pos->offset;
-               __entry->pos_snapshot           = pos->snapshot;
-               __entry->node                   = node;
-               __entry->iter_lock_seq          = iter_lock_seq;
-               __entry->node_lock_seq          = node_lock_seq;
+               __entry->btree_id               = path->btree_id;
+               TRACE_BPOS_assign(pos, path->pos);
+               __entry->node                   = (unsigned long) btree_path_node(path, level);
+               __entry->iter_lock_seq          = path->l[level].lock_seq;
+               __entry->node_lock_seq          = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0;
        ),
 
-       TP_printk("%s %pS btree %u pos %llu:%llu:%u, node %lu iter seq %u lock seq %u",
+       TP_printk("%s %pS btree %s pos %llu:%llu:%u, node %lu iter seq %u lock seq %u",
                  __entry->trans_fn,
                  (void *) __entry->caller_ip,
-                 __entry->btree_id,
+                 bch2_btree_ids[__entry->btree_id],
                  __entry->pos_inode,
                  __entry->pos_offset,
                  __entry->pos_snapshot,
@@ -421,6 +421,56 @@ TRACE_EVENT(btree_node_relock_fail,
                  __entry->node_lock_seq)
 );
 
+TRACE_EVENT(btree_node_upgrade_fail,
+       TP_PROTO(struct btree_trans *trans,
+                unsigned long caller_ip,
+                struct btree_path *path,
+                unsigned level),
+       TP_ARGS(trans, caller_ip, path, level),
+
+       TP_STRUCT__entry(
+               __array(char,                   trans_fn, 24    )
+               __field(unsigned long,          caller_ip       )
+               __field(u8,                     btree_id        )
+               TRACE_BPOS_entries(pos)
+               __field(u8,                     locked          )
+               __field(u8,                     self_read_count )
+               __field(u8,                     self_intent_count)
+               __field(u8,                     read_count      )
+               __field(u8,                     intent_count    )
+       ),
+
+       TP_fast_assign(
+               struct six_lock_count c;
+
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+               __entry->caller_ip              = caller_ip;
+               __entry->btree_id               = path->btree_id;
+               TRACE_BPOS_assign(pos, path->pos);
+               __entry->locked                 = btree_node_locked(path, level);
+
+               c = bch2_btree_node_lock_counts(trans, NULL, path->l[level].b, level),
+               __entry->self_read_count        = c.read;
+               __entry->self_intent_count      = c.intent;
+               c = six_lock_counts(&path->l[level].b->c.lock);
+               __entry->read_count             = c.read;
+               __entry->intent_count           = c.intent;
+       ),
+
+       TP_printk("%s %pS btree %s pos %llu:%llu:%u, locked %u held %u:%u lock count %u:%u",
+                 __entry->trans_fn,
+                 (void *) __entry->caller_ip,
+                 bch2_btree_ids[__entry->btree_id],
+                 __entry->pos_inode,
+                 __entry->pos_offset,
+                 __entry->pos_snapshot,
+                 __entry->locked,
+                 __entry->self_read_count,
+                 __entry->self_intent_count,
+                 __entry->read_count,
+                 __entry->intent_count)
+);
+
 /* Garbage collection */
 
 DEFINE_EVENT(bch_fs, gc_gens_start,
@@ -456,55 +506,68 @@ TRACE_EVENT(bucket_alloc,
 
 TRACE_EVENT(bucket_alloc_fail,
        TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
+                u64 free,
                 u64 avail,
+                u64 copygc_wait_amount,
+                s64 copygc_waiting_for,
                 u64 seen,
                 u64 open,
                 u64 need_journal_commit,
                 u64 nouse,
                 bool nonblocking,
-                int ret),
-       TP_ARGS(ca, alloc_reserve, avail, seen, open, need_journal_commit, nouse, nonblocking, ret),
+                const char *err),
+       TP_ARGS(ca, alloc_reserve, free, avail, copygc_wait_amount, copygc_waiting_for,
+               seen, open, need_journal_commit, nouse, nonblocking, err),
 
        TP_STRUCT__entry(
                __field(dev_t,                  dev                     )
                __array(char,   reserve,        16                      )
+               __field(u64,                    free                    )
                __field(u64,                    avail                   )
+               __field(u64,                    copygc_wait_amount      )
+               __field(s64,                    copygc_waiting_for      )
                __field(u64,                    seen                    )
                __field(u64,                    open                    )
                __field(u64,                    need_journal_commit     )
                __field(u64,                    nouse                   )
                __field(bool,                   nonblocking             )
-               __field(int,                    ret                     )
+               __array(char,                   err,    16              )
        ),
 
        TP_fast_assign(
                __entry->dev            = ca->dev;
                strlcpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
+               __entry->free           = free;
                __entry->avail          = avail;
+               __entry->copygc_wait_amount     = copygc_wait_amount;
+               __entry->copygc_waiting_for     = copygc_waiting_for;
                __entry->seen           = seen;
                __entry->open           = open;
                __entry->need_journal_commit = need_journal_commit;
                __entry->nouse          = nouse;
                __entry->nonblocking    = nonblocking;
-               __entry->ret            = ret;
+               strlcpy(__entry->err, err, sizeof(__entry->err));
        ),
 
-       TP_printk("%d,%d reserve %s avail %llu seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u ret %i",
+       TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u err %s",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->reserve,
+                 __entry->free,
                  __entry->avail,
+                 __entry->copygc_wait_amount,
+                 __entry->copygc_waiting_for,
                  __entry->seen,
                  __entry->open,
                  __entry->need_journal_commit,
                  __entry->nouse,
                  __entry->nonblocking,
-                 __entry->ret)
+                 __entry->err)
 );
 
 TRACE_EVENT(discard_buckets,
        TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
-                u64 need_journal_commit, u64 discarded, int ret),
-       TP_ARGS(c, seen, open, need_journal_commit, discarded, ret),
+                u64 need_journal_commit, u64 discarded, const char *err),
+       TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
 
        TP_STRUCT__entry(
                __field(dev_t,          dev                     )
@@ -512,7 +575,7 @@ TRACE_EVENT(discard_buckets,
                __field(u64,            open                    )
                __field(u64,            need_journal_commit     )
                __field(u64,            discarded               )
-               __field(int,            ret                     )
+               __array(char,           err,    16              )
        ),
 
        TP_fast_assign(
@@ -521,16 +584,16 @@ TRACE_EVENT(discard_buckets,
                __entry->open                   = open;
                __entry->need_journal_commit    = need_journal_commit;
                __entry->discarded              = discarded;
-               __entry->ret                    = ret;
+               strlcpy(__entry->err, err, sizeof(__entry->err));
        ),
 
-       TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu ret %i",
+       TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->seen,
                  __entry->open,
                  __entry->need_journal_commit,
                  __entry->discarded,
-                 __entry->ret)
+                 __entry->err)
 );
 
 TRACE_EVENT(invalidate_bucket,
@@ -649,9 +712,9 @@ TRACE_EVENT(copygc_wait,
 );
 
 DECLARE_EVENT_CLASS(transaction_event,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip),
+       TP_ARGS(trans, caller_ip),
 
        TP_STRUCT__entry(
                __array(char,                   trans_fn, 24    )
@@ -659,7 +722,7 @@ DECLARE_EVENT_CLASS(transaction_event,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
                __entry->caller_ip              = caller_ip;
        ),
 
@@ -667,217 +730,206 @@ DECLARE_EVENT_CLASS(transaction_event,
 );
 
 DEFINE_EVENT(transaction_event,        transaction_commit,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
-DEFINE_EVENT(transaction_event,        transaction_restart_ip,
-       TP_PROTO(const char *trans_fn,
+DEFINE_EVENT(transaction_event,        transaction_restart_injected,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_blocked_journal_reclaim,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_journal_res_get,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_journal_preres_get,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_journal_reclaim,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_fault_inject,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_traverse_all,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_mark_replicas,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_key_cache_raced,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip),
-       TP_ARGS(trans_fn, caller_ip)
+       TP_ARGS(trans, caller_ip)
+);
+
+DEFINE_EVENT(transaction_event,        trans_restart_too_many_iters,
+       TP_PROTO(struct btree_trans *trans,
+                unsigned long caller_ip),
+       TP_ARGS(trans, caller_ip)
 );
 
 DECLARE_EVENT_CLASS(transaction_restart_iter,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos),
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path),
 
        TP_STRUCT__entry(
                __array(char,                   trans_fn, 24    )
                __field(unsigned long,          caller_ip       )
                __field(u8,                     btree_id        )
-               __field(u64,                    pos_inode       )
-               __field(u64,                    pos_offset      )
-               __field(u32,                    pos_snapshot    )
+               TRACE_BPOS_entries(pos)
        ),
 
        TP_fast_assign(
-               strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
                __entry->caller_ip              = caller_ip;
-               __entry->btree_id               = btree_id;
-               __entry->pos_inode              = pos->inode;
-               __entry->pos_offset             = pos->offset;
-               __entry->pos_snapshot           = pos->snapshot;
+               __entry->btree_id               = path->btree_id;
+               TRACE_BPOS_assign(pos, path->pos)
        ),
 
-       TP_printk("%s %pS btree %u pos %llu:%llu:%u",
+       TP_printk("%s %pS btree %s pos %llu:%llu:%u",
                  __entry->trans_fn,
                  (void *) __entry->caller_ip,
-                 __entry->btree_id,
+                 bch2_btree_ids[__entry->btree_id],
                  __entry->pos_inode,
                  __entry->pos_offset,
                  __entry->pos_snapshot)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_event,        transaction_restart_key_cache_upgrade,
+       TP_PROTO(struct btree_trans *trans,
+                unsigned long caller_ip),
+       TP_ARGS(trans, caller_ip)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+                struct btree_path *path),
+       TP_ARGS(trans, caller_ip, path)
 );
 
 TRACE_EVENT(trans_restart_would_deadlock,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long  caller_ip,
-                bool           in_traverse_all,
                 unsigned       reason,
-                enum btree_id  have_btree_id,
-                unsigned       have_iter_type,
-                struct bpos    *have_pos,
-                enum btree_id  want_btree_id,
-                unsigned       want_iter_type,
+                struct btree_path *have,
+                struct btree_path *want,
                 struct bpos    *want_pos),
-       TP_ARGS(trans_fn, caller_ip, in_traverse_all, reason,
-               have_btree_id, have_iter_type, have_pos,
-               want_btree_id, want_iter_type, want_pos),
+       TP_ARGS(trans, caller_ip, reason,
+               have, want, want_pos),
 
        TP_STRUCT__entry(
                __array(char,                   trans_fn, 24    )
@@ -885,35 +937,24 @@ TRACE_EVENT(trans_restart_would_deadlock,
                __field(u8,                     in_traverse_all )
                __field(u8,                     reason          )
                __field(u8,                     have_btree_id   )
-               __field(u8,                     have_iter_type  )
+               __field(u8,                     have_type       )
                __field(u8,                     want_btree_id   )
-               __field(u8,                     want_iter_type  )
-
-               __field(u64,                    have_pos_inode  )
-               __field(u64,                    have_pos_offset )
-               __field(u32,                    have_pos_snapshot)
-               __field(u32,                    want_pos_snapshot)
-               __field(u64,                    want_pos_inode  )
-               __field(u64,                    want_pos_offset )
+               __field(u8,                     want_type       )
+               TRACE_BPOS_entries(have_pos)
+               TRACE_BPOS_entries(want_pos)
        ),
 
        TP_fast_assign(
-               strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
                __entry->caller_ip              = caller_ip;
-               __entry->in_traverse_all        = in_traverse_all;
+               __entry->in_traverse_all        = trans->in_traverse_all;
                __entry->reason                 = reason;
-               __entry->have_btree_id          = have_btree_id;
-               __entry->have_iter_type         = have_iter_type;
-               __entry->want_btree_id          = want_btree_id;
-               __entry->want_iter_type         = want_iter_type;
-
-               __entry->have_pos_inode         = have_pos->inode;
-               __entry->have_pos_offset        = have_pos->offset;
-               __entry->have_pos_snapshot      = have_pos->snapshot;
-
-               __entry->want_pos_inode         = want_pos->inode;
-               __entry->want_pos_offset        = want_pos->offset;
-               __entry->want_pos_snapshot      = want_pos->snapshot;
+               __entry->have_btree_id          = have->btree_id;
+               __entry->have_type              = have->cached;
+               __entry->want_btree_id          = want->btree_id;
+               __entry->want_type              = want->cached;
+               TRACE_BPOS_assign(have_pos, have->pos);
+               TRACE_BPOS_assign(want_pos, *want_pos);
        ),
 
        TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u",
@@ -922,37 +963,37 @@ TRACE_EVENT(trans_restart_would_deadlock,
                  __entry->in_traverse_all,
                  __entry->reason,
                  __entry->have_btree_id,
-                 __entry->have_iter_type,
+                 __entry->have_type,
                  __entry->have_pos_inode,
                  __entry->have_pos_offset,
                  __entry->have_pos_snapshot,
                  __entry->want_btree_id,
-                 __entry->want_iter_type,
+                 __entry->want_type,
                  __entry->want_pos_inode,
                  __entry->want_pos_offset,
                  __entry->want_pos_snapshot)
 );
 
 TRACE_EVENT(trans_restart_would_deadlock_write,
-       TP_PROTO(const char *trans_fn),
-       TP_ARGS(trans_fn),
+       TP_PROTO(struct btree_trans *trans),
+       TP_ARGS(trans),
 
        TP_STRUCT__entry(
                __array(char,                   trans_fn, 24    )
        ),
 
        TP_fast_assign(
-               strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
        ),
 
        TP_printk("%s", __entry->trans_fn)
 );
 
 TRACE_EVENT(trans_restart_mem_realloced,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
                 unsigned long bytes),
-       TP_ARGS(trans_fn, caller_ip, bytes),
+       TP_ARGS(trans, caller_ip, bytes),
 
        TP_STRUCT__entry(
                __array(char,                   trans_fn, 24    )
@@ -961,7 +1002,7 @@ TRACE_EVENT(trans_restart_mem_realloced,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
                __entry->caller_ip      = caller_ip;
                __entry->bytes          = bytes;
        ),
@@ -973,32 +1014,28 @@ TRACE_EVENT(trans_restart_mem_realloced,
 );
 
 TRACE_EVENT(trans_restart_key_cache_key_realloced,
-       TP_PROTO(const char *trans_fn,
+       TP_PROTO(struct btree_trans *trans,
                 unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos,
+                struct btree_path *path,
                 unsigned old_u64s,
                 unsigned new_u64s),
-       TP_ARGS(trans_fn, caller_ip, btree_id, pos, old_u64s, new_u64s),
+       TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s),
 
        TP_STRUCT__entry(
                __array(char,                   trans_fn, 24    )
                __field(unsigned long,          caller_ip       )
                __field(enum btree_id,          btree_id        )
-               __field(u64,                    inode           )
-               __field(u64,                    offset          )
-               __field(u32,                    snapshot        )
+               TRACE_BPOS_entries(pos)
                __field(u32,                    old_u64s        )
                __field(u32,                    new_u64s        )
        ),
 
        TP_fast_assign(
-               strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
-               __entry->caller_ip      = caller_ip;
-               __entry->btree_id       = btree_id;
-               __entry->inode          = pos->inode;
-               __entry->offset         = pos->offset;
-               __entry->snapshot       = pos->snapshot;
+               strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+               __entry->caller_ip              = caller_ip;
+
+               __entry->btree_id       = path->btree_id;
+               TRACE_BPOS_assign(pos, path->pos);
                __entry->old_u64s       = old_u64s;
                __entry->new_u64s       = new_u64s;
        ),
@@ -1007,9 +1044,9 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
                  __entry->trans_fn,
                  (void *) __entry->caller_ip,
                  bch2_btree_ids[__entry->btree_id],
-                 __entry->inode,
-                 __entry->offset,
-                 __entry->snapshot,
+                 __entry->pos_inode,
+                 __entry->pos_offset,
+                 __entry->pos_snapshot,
                  __entry->old_u64s,
                  __entry->new_u64s)
 );
index 5070caf8f349adbec532ae966346fe22e6fda142..5c6ccf68509404713564277388737371b3c46705 100644 (file)
@@ -236,7 +236,7 @@ retry:
                        &X_SEARCH(acl_to_xattr_type(type), "", 0),
                        0);
        if (ret) {
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        goto retry;
                if (ret != -ENOENT)
                        acl = ERR_PTR(ret);
@@ -335,7 +335,7 @@ retry:
 btree_err:
        bch2_trans_iter_exit(&trans, &inode_iter);
 
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
        if (unlikely(ret))
                goto err;
index ca1f45cc80b7f24de2be1cf86886e7cb796ecbfb..2281b8d45982a3cf8870219d014f8d4524324bc4 100644 (file)
@@ -543,7 +543,7 @@ int bch2_alloc_read(struct bch_fs *c)
        bch2_trans_exit(&trans);
 
        if (ret)
-               bch_err(c, "error reading alloc info: %i", ret);
+               bch_err(c, "error reading alloc info: %s", bch2_err_str(ret));
 
        return ret;
 }
@@ -794,7 +794,7 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_iter alloc_iter;
-       struct bkey_s_c k, freespace_k;
+       struct bkey_s_c alloc_k;
        struct bch_alloc_v4 a;
        u64 genbits;
        struct bpos pos;
@@ -804,14 +804,6 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
        struct printbuf buf = PRINTBUF;
        int ret;
 
-       freespace_k = bch2_btree_iter_peek(iter);
-       if (!freespace_k.k)
-               return 1;
-
-       ret = bkey_err(freespace_k);
-       if (ret)
-               return ret;
-
        pos = iter->pos;
        pos.offset &= ~(~0ULL << 56);
        genbits = iter->pos.offset & (~0ULL << 56);
@@ -823,18 +815,18 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
                        bch2_btree_ids[iter->btree_id], pos.inode, pos.offset))
                goto delete;
 
-       k = bch2_btree_iter_peek_slot(&alloc_iter);
-       ret = bkey_err(k);
+       alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
+       ret = bkey_err(alloc_k);
        if (ret)
                goto err;
 
-       bch2_alloc_to_v4(k, &a);
+       bch2_alloc_to_v4(alloc_k, &a);
 
        if (fsck_err_on(a.data_type != state ||
                        (state == BCH_DATA_free &&
                         genbits != alloc_freespace_genbits(a)), c,
                        "%s\n  incorrectly set in %s index (free %u, genbits %llu should be %llu)",
-                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+                       (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
                        bch2_btree_ids[iter->btree_id],
                        a.data_type == state,
                        genbits >> 56, alloc_freespace_genbits(a) >> 56))
@@ -855,6 +847,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
 {
        struct btree_trans trans;
        struct btree_iter iter, discard_iter, freespace_iter;
+       struct bkey_s_c k;
        int ret = 0;
 
        bch2_trans_init(&trans, c, 0, 0);
@@ -884,36 +877,16 @@ int bch2_check_alloc_info(struct bch_fs *c)
        if (ret < 0)
                goto err;
 
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN,
-                            BTREE_ITER_PREFETCH);
-       while (1) {
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_NOFAIL|
-                                     BTREE_INSERT_LAZY_RW,
-                       bch2_check_discard_freespace_key(&trans, &iter));
-               if (ret)
-                       break;
-
-               bch2_btree_iter_advance(&iter);
-       }
-       bch2_trans_iter_exit(&trans, &iter);
-
-       if (ret < 0)
-               goto err;
-
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN,
-                            BTREE_ITER_PREFETCH);
-       while (1) {
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_NOFAIL|
-                                     BTREE_INSERT_LAZY_RW,
-                       bch2_check_discard_freespace_key(&trans, &iter));
-               if (ret)
-                       break;
-
-               bch2_btree_iter_advance(&iter);
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_need_discard, POS_MIN,
+                       BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+               bch2_check_discard_freespace_key(&trans, &iter)) ?:
+             for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_freespace, POS_MIN,
+                       BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+               bch2_check_discard_freespace_key(&trans, &iter));
 err:
        bch2_trans_exit(&trans);
        return ret < 0 ? ret : 0;
@@ -1016,17 +989,44 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
        return ret < 0 ? ret : 0;
 }
 
-static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
-                                  struct bch_dev *ca, bool *discard_done)
+static int bch2_discard_one_bucket(struct btree_trans *trans,
+                                  struct btree_iter *need_discard_iter,
+                                  struct bpos *discard_pos_done,
+                                  u64 *seen,
+                                  u64 *open,
+                                  u64 *need_journal_commit,
+                                  u64 *discarded)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter iter;
+       struct bpos pos = need_discard_iter->pos;
+       struct btree_iter iter = { NULL };
        struct bkey_s_c k;
+       struct bch_dev *ca;
        struct bkey_i_alloc_v4 *a;
        struct printbuf buf = PRINTBUF;
-       int ret;
+       bool did_discard = false;
+       int ret = 0;
+
+       ca = bch_dev_bkey_exists(c, pos.inode);
+       if (!percpu_ref_tryget(&ca->io_ref)) {
+               bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
+               return 0;
+       }
+
+       if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
+               (*open)++;
+               goto out;
+       }
 
-       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, pos,
+       if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+                       c->journal.flushed_seq_ondisk,
+                       pos.inode, pos.offset)) {
+               (*need_journal_commit)++;
+               goto out;
+       }
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+                            need_discard_iter->pos,
                             BTREE_ITER_CACHED);
        k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
@@ -1062,7 +1062,8 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
                goto out;
        }
 
-       if (!*discard_done && ca->mi.discard && !c->opts.nochanges) {
+       if (bkey_cmp(*discard_pos_done, iter.pos) &&
+           ca->mi.discard && !c->opts.nochanges) {
                /*
                 * This works without any other locks because this is the only
                 * thread that removes items from the need_discard tree
@@ -1071,20 +1072,32 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
                blkdev_issue_discard(ca->disk_sb.bdev,
                                     k.k->p.offset * ca->mi.bucket_size,
                                     ca->mi.bucket_size,
-                                    GFP_KERNEL, 0);
-               *discard_done = true;
+                                    GFP_KERNEL);
 
-               ret = bch2_trans_relock(trans) ? 0 : -EINTR;
+               ret = bch2_trans_relock(trans);
                if (ret)
                        goto out;
        }
 
+       *discard_pos_done = iter.pos;
+       did_discard = true;
+
        SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
        a->v.data_type = alloc_data_type(a->v, a->v.data_type);
 write:
-       ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+       ret =   bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BTREE_INSERT_USE_RESERVE|BTREE_INSERT_NOFAIL);
+       if (ret)
+               goto out;
+
+       if (did_discard) {
+               this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
+               (*discarded)++;
+       }
 out:
        bch2_trans_iter_exit(trans, &iter);
+       percpu_ref_put(&ca->io_ref);
        printbuf_exit(&buf);
        return ret;
 }
@@ -1092,61 +1105,27 @@ out:
 static void bch2_do_discards_work(struct work_struct *work)
 {
        struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
-       struct bch_dev *ca = NULL;
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
+       struct bpos discard_pos_done = POS_MAX;
        int ret;
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_need_discard,
-                          POS_MIN, 0, k, ret) {
-               bool discard_done = false;
-
-               if (ca && k.k->p.inode != ca->dev_idx) {
-                       percpu_ref_put(&ca->io_ref);
-                       ca = NULL;
-               }
-
-               if (!ca) {
-                       ca = bch_dev_bkey_exists(c, k.k->p.inode);
-                       if (!percpu_ref_tryget(&ca->io_ref)) {
-                               ca = NULL;
-                               bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
-                               continue;
-                       }
-               }
-
-               seen++;
-
-               if (bch2_bucket_is_open_safe(c, k.k->p.inode, k.k->p.offset)) {
-                       open++;
-                       continue;
-               }
-
-               if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
-                               c->journal.flushed_seq_ondisk,
-                               k.k->p.inode, k.k->p.offset)) {
-                       need_journal_commit++;
-                       continue;
-               }
-
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_USE_RESERVE|
-                                     BTREE_INSERT_NOFAIL,
-                               bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done));
-               if (ret)
-                       break;
-
-               this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
-               discarded++;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
-
-       if (ca)
-               percpu_ref_put(&ca->io_ref);
+       /*
+        * We're doing the commit in bch2_discard_one_bucket instead of using
+        * for_each_btree_key_commit() so that we can increment counters after
+        * successful commit:
+        */
+       ret = for_each_btree_key2(&trans, iter,
+                       BTREE_ID_need_discard, POS_MIN, 0, k,
+               bch2_discard_one_bucket(&trans, &iter, &discard_pos_done,
+                                       &seen,
+                                       &open,
+                                       &need_journal_commit,
+                                       &discarded));
 
        bch2_trans_exit(&trans);
 
@@ -1155,7 +1134,8 @@ static void bch2_do_discards_work(struct work_struct *work)
 
        percpu_ref_put(&c->writes);
 
-       trace_discard_buckets(c, seen, open, need_journal_commit, discarded, ret);
+       trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
+                             bch2_err_str(ret));
 }
 
 void bch2_do_discards(struct bch_fs *c)
@@ -1165,29 +1145,20 @@ void bch2_do_discards(struct bch_fs *c)
                percpu_ref_put(&c->writes);
 }
 
-static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca,
-                                struct bpos *bucket_pos, unsigned *cached_sectors)
+static int invalidate_one_bucket(struct btree_trans *trans,
+                                struct btree_iter *lru_iter, struct bkey_s_c k,
+                                unsigned dev_idx, s64 *nr_to_invalidate)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter lru_iter, alloc_iter = { NULL };
-       struct bkey_s_c k;
+       struct btree_iter alloc_iter = { NULL };
        struct bkey_i_alloc_v4 *a;
-       u64 bucket, idx;
+       struct bpos bucket;
        struct printbuf buf = PRINTBUF;
-       int ret;
-
-       bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
-                            POS(ca->dev_idx, 0), 0);
-next_lru:
-       k = bch2_btree_iter_peek(&lru_iter);
-       ret = bkey_err(k);
-       if (ret)
-               goto out;
+       unsigned cached_sectors;
+       int ret = 0;
 
-       if (!k.k || k.k->p.inode != ca->dev_idx) {
-               ret = 1;
-               goto out;
-       }
+       if (*nr_to_invalidate <= 0 || k.k->p.inode != dev_idx)
+               return 1;
 
        if (k.k->type != KEY_TYPE_lru) {
                prt_printf(&buf, "non lru key in lru btree:\n  ");
@@ -1195,26 +1166,22 @@ next_lru:
 
                if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
                        bch_err(c, "%s", buf.buf);
-                       bch2_btree_iter_advance(&lru_iter);
-                       goto next_lru;
                } else {
                        bch2_trans_inconsistent(trans, "%s", buf.buf);
                        ret = -EINVAL;
-                       goto out;
                }
-       }
 
-       idx     = k.k->p.offset;
-       bucket  = le64_to_cpu(bkey_s_c_to_lru(k).v->idx);
+               goto out;
+       }
 
-       *bucket_pos = POS(ca->dev_idx, bucket);
+       bucket = POS(dev_idx, le64_to_cpu(bkey_s_c_to_lru(k).v->idx));
 
-       a = bch2_trans_start_alloc_update(trans, &alloc_iter, *bucket_pos);
+       a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
        ret = PTR_ERR_OR_ZERO(a);
        if (ret)
                goto out;
 
-       if (idx != alloc_lru_idx(a->v)) {
+       if (k.k->p.offset != alloc_lru_idx(a->v)) {
                prt_printf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n  ");
                bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
                prt_printf(&buf, "\n  ");
@@ -1222,19 +1189,18 @@ next_lru:
 
                if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
                        bch_err(c, "%s", buf.buf);
-                       bch2_btree_iter_advance(&lru_iter);
-                       goto next_lru;
                } else {
                        bch2_trans_inconsistent(trans, "%s", buf.buf);
                        ret = -EINVAL;
-                       goto out;
                }
+
+               goto out;
        }
 
        if (!a->v.cached_sectors)
                bch_err(c, "invalidating empty bucket, confused");
 
-       *cached_sectors = a->v.cached_sectors;
+       cached_sectors = a->v.cached_sectors;
 
        SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
        a->v.gen++;
@@ -1244,13 +1210,18 @@ next_lru:
        a->v.io_time[READ]      = atomic64_read(&c->io_clock[READ].now);
        a->v.io_time[WRITE]     = atomic64_read(&c->io_clock[WRITE].now);
 
-       ret = bch2_trans_update(trans, &alloc_iter, &a->k_i,
-                               BTREE_TRIGGER_BUCKET_INVALIDATE);
+       ret =   bch2_trans_update(trans, &alloc_iter, &a->k_i,
+                               BTREE_TRIGGER_BUCKET_INVALIDATE) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BTREE_INSERT_USE_RESERVE|BTREE_INSERT_NOFAIL);
        if (ret)
                goto out;
+
+       trace_invalidate_bucket(c, bucket.inode, bucket.offset, cached_sectors);
+       this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]);
+       --*nr_to_invalidate;
 out:
        bch2_trans_iter_exit(trans, &alloc_iter);
-       bch2_trans_iter_exit(trans, &lru_iter);
        printbuf_exit(&buf);
        return ret;
 }
@@ -1260,8 +1231,9 @@ static void bch2_do_invalidates_work(struct work_struct *work)
        struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
        struct bch_dev *ca;
        struct btree_trans trans;
-       struct bpos bucket;
-       unsigned i, sectors;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       unsigned i;
        int ret = 0;
 
        bch2_trans_init(&trans, c, 0, 0);
@@ -1270,17 +1242,13 @@ static void bch2_do_invalidates_work(struct work_struct *work)
                s64 nr_to_invalidate =
                        should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
 
-               while (nr_to_invalidate-- >= 0) {
-                       ret = commit_do(&trans, NULL, NULL,
-                                             BTREE_INSERT_USE_RESERVE|
-                                             BTREE_INSERT_NOFAIL,
-                                       invalidate_one_bucket(&trans, ca, &bucket,
-                                                             &sectors));
-                       if (ret)
-                               break;
+               ret = for_each_btree_key2(&trans, iter, BTREE_ID_lru,
+                               POS(ca->dev_idx, 0), BTREE_ITER_INTENT, k,
+                       invalidate_one_bucket(&trans, &iter, k, ca->dev_idx, &nr_to_invalidate));
 
-                       trace_invalidate_bucket(c, bucket.inode, bucket.offset, sectors);
-                       this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]);
+               if (ret < 0) {
+                       percpu_ref_put(&ca->ref);
+                       break;
                }
        }
 
@@ -1295,16 +1263,13 @@ void bch2_do_invalidates(struct bch_fs *c)
                percpu_ref_put(&c->writes);
 }
 
-static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter)
+static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter,
+                                struct bkey_s_c k, struct bch_dev *ca)
 {
        struct bch_alloc_v4 a;
-       struct bkey_s_c k;
-       int ret;
 
-       k = bch2_btree_iter_peek_slot(iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
+       if (iter->pos.offset >= ca->mi.nbuckets)
+               return 1;
 
        bch2_alloc_to_v4(k, &a);
        return bch2_bucket_do_index(trans, k, &a, true);
@@ -1320,25 +1285,16 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_alloc,
-                          POS(ca->dev_idx, ca->mi.first_bucket),
-                          BTREE_ITER_SLOTS|
-                          BTREE_ITER_PREFETCH, k, ret) {
-               if (iter.pos.offset >= ca->mi.nbuckets)
-                       break;
-
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_LAZY_RW,
-                                bucket_freespace_init(&trans, &iter));
-               if (ret)
-                       break;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+                       POS(ca->dev_idx, ca->mi.first_bucket),
+                       BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_LAZY_RW,
+               bucket_freespace_init(&trans, &iter, k, ca));
 
        bch2_trans_exit(&trans);
 
-       if (ret) {
-               bch_err(ca, "error initializing free space: %i", ret);
+       if (ret < 0) {
+               bch_err(ca, "error initializing free space: %s", bch2_err_str(ret));
                return ret;
        }
 
@@ -1347,7 +1303,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
        SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
        mutex_unlock(&c->sb_lock);
 
-       return ret;
+       return 0;
 }
 
 int bch2_fs_freespace_init(struct bch_fs *c)
index 2ac6b5046c67b66e27533d1b93f71bd5d0332a78..044bc72992d4186d551da21ba63defa09334da5e 100644 (file)
@@ -150,11 +150,13 @@ void bch2_do_discards(struct bch_fs *);
 static inline u64 should_invalidate_buckets(struct bch_dev *ca,
                                            struct bch_dev_usage u)
 {
-       u64 free = u.d[BCH_DATA_free].buckets +
-               u.d[BCH_DATA_need_discard].buckets;
+       u64 want_free = ca->mi.nbuckets >> 7;
+       u64 free = max_t(s64, 0,
+                          u.d[BCH_DATA_free].buckets
+                        + u.d[BCH_DATA_need_discard].buckets
+                        - bch2_dev_buckets_reserved(ca, RESERVE_none));
 
-       return clamp_t(s64, (ca->mi.nbuckets >> 7) - free,
-                      0, u.d[BCH_DATA_cached].buckets);
+       return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
 }
 
 void bch2_do_invalidates(struct bch_fs *);
index 7a878a6906abfe32a9f0aa05cff3b533fc5fe4da..0a9f1313414b7eb6a681fd745ce6d22e11e48532 100644 (file)
@@ -26,6 +26,7 @@
 #include "error.h"
 #include "io.h"
 #include "journal.h"
+#include "movinggc.h"
 
 #include <linux/math64.h>
 #include <linux/rculist.h>
@@ -226,7 +227,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
                        c->blocked_allocate_open_bucket = local_clock();
 
                spin_unlock(&c->freelist_lock);
-               return ERR_PTR(-OPEN_BUCKETS_EMPTY);
+               return ERR_PTR(-BCH_ERR_open_buckets_empty);
        }
 
        /* Recheck under lock: */
@@ -339,6 +340,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
                                skipped_nouse,
                                cl);
 err:
+       set_btree_iter_dontneed(&iter);
        bch2_trans_iter_exit(trans, &iter);
        printbuf_exit(&buf);
        return ob;
@@ -395,7 +397,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans,
        *cur_bucket = max_t(u64, *cur_bucket, ca->mi.first_bucket);
        *cur_bucket = max_t(u64, *cur_bucket, ca->new_fs_bucket_idx);
 
-       for_each_btree_key(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *cur_bucket),
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *cur_bucket),
                           BTREE_ITER_SLOTS, k, ret) {
                struct bch_alloc_v4 a;
 
@@ -425,7 +427,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans,
 
        *cur_bucket = iter.pos.offset;
 
-       return ob ?: ERR_PTR(ret ?: -FREELIST_EMPTY);
+       return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
 }
 
 static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
@@ -454,6 +456,11 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
 
        BUG_ON(ca->new_fs_bucket_idx);
 
+       /*
+        * XXX:
+        * On transaction restart, we'd like to restart from the bucket we were
+        * at previously
+        */
        for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
                                     POS(ca->dev_idx, *cur_bucket), 0, k, ret) {
                if (k.k->p.inode != ca->dev_idx)
@@ -462,10 +469,9 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
                for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k));
                     *cur_bucket < k.k->p.offset && !ob;
                     (*cur_bucket)++) {
-                       if (btree_trans_too_many_iters(trans)) {
-                               ob = ERR_PTR(-EINTR);
+                       ret = btree_trans_too_many_iters(trans);
+                       if (ret)
                                break;
-                       }
 
                        (*buckets_seen)++;
 
@@ -476,7 +482,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
                                              skipped_nouse,
                                              k, cl);
                }
-               if (ob)
+
+               if (ob || ret)
                        break;
        }
        bch2_trans_iter_exit(trans, &iter);
@@ -496,8 +503,10 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
 {
        struct open_bucket *ob = NULL;
        struct bch_dev_usage usage;
+       bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
+       u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
        u64 avail;
-       u64 cur_bucket = 0;
+       u64 cur_bucket = start;
        u64 buckets_seen = 0;
        u64 skipped_open = 0;
        u64 skipped_need_journal_commit = 0;
@@ -506,7 +515,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
        int ret;
 again:
        usage = bch2_dev_usage_read(ca);
-       avail = dev_buckets_free(ca, usage,reserve);
+       avail = dev_buckets_free(ca, usage, reserve);
 
        if (usage.d[BCH_DATA_need_discard].buckets > avail)
                bch2_do_discards(c);
@@ -527,7 +536,7 @@ again:
                if (!c->blocked_allocate)
                        c->blocked_allocate = local_clock();
 
-               ob = ERR_PTR(-FREELIST_EMPTY);
+               ob = ERR_PTR(-BCH_ERR_freelist_empty);
                goto err;
        }
 
@@ -551,17 +560,30 @@ again:
 
        if (skipped_need_journal_commit * 2 > avail)
                bch2_journal_flush_async(&c->journal, NULL);
+
+       if (!ob && !ret && !freespace_initialized && start) {
+               start = cur_bucket = 0;
+               goto again;
+       }
+
+       if (!freespace_initialized)
+               ca->bucket_alloc_trans_early_cursor = cur_bucket;
 err:
        if (!ob)
-               ob = ERR_PTR(ret ?: -FREELIST_EMPTY);
+               ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
 
        if (IS_ERR(ob)) {
-               trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], avail,
+               trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve],
+                                       usage.d[BCH_DATA_free].buckets,
+                                       avail,
+                                       bch2_copygc_wait_amount(c),
+                                       c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
                                        buckets_seen,
                                        skipped_open,
                                        skipped_need_journal_commit,
                                        skipped_nouse,
-                                       cl == NULL, PTR_ERR(ob));
+                                       cl == NULL,
+                                       bch2_err_str(PTR_ERR(ob)));
                atomic_long_inc(&c->bucket_alloc_fail);
        }
 
@@ -648,7 +670,7 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
                bch2_dev_alloc_list(c, stripe, devs_may_alloc);
        unsigned dev;
        struct bch_dev *ca;
-       int ret = -INSUFFICIENT_DEVICES;
+       int ret = -BCH_ERR_insufficient_devices;
        unsigned i;
 
        BUG_ON(*nr_effective >= nr_replicas);
@@ -846,8 +868,8 @@ static int open_bucket_add_buckets(struct bch_fs *c,
                                                 target, erasure_code,
                                                 nr_replicas, nr_effective,
                                                 have_cache, flags, _cl);
-                       if (ret == -FREELIST_EMPTY ||
-                           ret == -OPEN_BUCKETS_EMPTY)
+                       if (bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+                           bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
                                return ret;
                        if (*nr_effective >= nr_replicas)
                                return 0;
@@ -868,7 +890,9 @@ retry_blocking:
        ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
                                nr_replicas, nr_effective, have_cache,
                                reserve, flags, cl);
-       if (ret && ret != -INSUFFICIENT_DEVICES && !cl && _cl) {
+       if (ret &&
+           !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
+           !cl && _cl) {
                cl = _cl;
                goto retry_blocking;
        }
@@ -1111,7 +1135,7 @@ alloc_done:
        if (erasure_code && !ec_open_bucket(c, &ptrs))
                pr_debug("failed to get ec bucket: ret %u", ret);
 
-       if (ret == -INSUFFICIENT_DEVICES &&
+       if (ret == -BCH_ERR_insufficient_devices &&
            nr_effective >= nr_replicas_required)
                ret = 0;
 
@@ -1142,19 +1166,18 @@ err:
 
        mutex_unlock(&wp->lock);
 
-       if (ret == -FREELIST_EMPTY &&
+       if (bch2_err_matches(ret, BCH_ERR_freelist_empty) &&
            try_decrease_writepoints(c, write_points_nr))
                goto retry;
 
-       switch (ret) {
-       case -OPEN_BUCKETS_EMPTY:
-       case -FREELIST_EMPTY:
+       if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
+           bch2_err_matches(ret, BCH_ERR_freelist_empty))
                return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
-       case -INSUFFICIENT_DEVICES:
+
+       if (bch2_err_matches(ret, BCH_ERR_insufficient_devices))
                return ERR_PTR(-EROFS);
-       default:
-               return ERR_PTR(ret);
-       }
+
+       return ERR_PTR(ret);
 }
 
 struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
index 08d6795860f34c7d8c6ecadf55458ee98af4f897..5a46b25b0587e9257df68427e6bd148411518e40 100644 (file)
@@ -443,8 +443,8 @@ int bch2_get_next_backpointer(struct btree_trans *trans,
                goto out;
        }
 
-       for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers,
-                          bp_pos, 0, k, ret) {
+       for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
+                                    bp_pos, 0, k, ret) {
                if (bpos_cmp(k.k->p, bp_end_pos) >= 0)
                        break;
 
@@ -569,22 +569,16 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
        return NULL;
 }
 
-static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter)
+static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
+                                       struct bkey_s_c k)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter alloc_iter = { NULL };
        struct bch_dev *ca;
-       struct bkey_s_c k, alloc_k;
+       struct bkey_s_c alloc_k;
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       k = bch2_btree_iter_peek(bp_iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-       if (!k.k)
-               return 0;
-
        if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
                        "backpointer for mising device:\n%s",
                        (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
@@ -619,25 +613,14 @@ fsck_err:
 /* verify that every backpointer has a corresponding alloc key */
 int bch2_check_btree_backpointers(struct bch_fs *c)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c, 0, 0);
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0);
-
-       do {
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_LAZY_RW|
-                                     BTREE_INSERT_NOFAIL,
-                                     bch2_check_btree_backpointer(&trans, &iter));
-               if (ret)
-                       break;
-       } while (bch2_btree_iter_advance(&iter));
+       struct bkey_s_c k;
 
-       bch2_trans_iter_exit(&trans, &iter);
-       bch2_trans_exit(&trans);
-       return ret;
+       return bch2_trans_run(c,
+               for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_backpointers, POS_MIN, 0, k,
+                       NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+                 bch2_check_btree_backpointer(&trans, &iter, k)));
 }
 
 static int check_bp_exists(struct btree_trans *trans,
index 31e387b103ee32bc0d3c6418d9d34fe9b486d8c1..8ffdb4dee47aef18d40f3de255cb80df0054073d 100644 (file)
@@ -319,6 +319,8 @@ BCH_DEBUG_PARAMS_DEBUG()
 #undef BCH_DEBUG_PARAM
 #endif
 
+#define BCH_LOCK_TIME_NR 128
+
 #define BCH_TIME_STATS()                       \
        x(btree_node_mem_alloc)                 \
        x(btree_node_split)                     \
@@ -463,6 +465,7 @@ struct bch_dev {
 
        /* Allocator: */
        u64                     new_fs_bucket_idx;
+       u64                     bucket_alloc_trans_early_cursor;
 
        unsigned                nr_open_buckets;
        unsigned                nr_btree_reserve;
@@ -528,6 +531,11 @@ struct btree_debug {
        unsigned                id;
 };
 
+struct lock_held_stats {
+       struct time_stats       times[BCH_LOCK_TIME_NR];
+       const char              *names[BCH_LOCK_TIME_NR];
+};
+
 struct bch_fs_pcpu {
        u64                     sectors_available;
 };
@@ -921,6 +929,8 @@ struct bch_fs {
        bool                    promote_whole_extents;
 
        struct time_stats       times[BCH_TIME_STAT_NR];
+
+       struct lock_held_stats lock_held_stats;
 };
 
 static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
index 4d032ae3b7f460e6c55e38bf8ff0521925252a6e..5a6c93d1aab70eb726258ab5ed0aab4031bb590e 100644 (file)
@@ -7,6 +7,7 @@
 #include "btree_iter.h"
 #include "btree_locking.h"
 #include "debug.h"
+#include "errcode.h"
 #include "error.h"
 
 #include <linux/prefetch.h>
@@ -700,20 +701,16 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
         * been freed:
         */
        if (trans && !bch2_btree_node_relock(trans, path, level + 1)) {
-               trace_trans_restart_relock_parent_for_fill(trans->fn,
-                                       _THIS_IP_, btree_id, &path->pos);
-               btree_trans_restart(trans);
-               return ERR_PTR(-EINTR);
+               trace_trans_restart_relock_parent_for_fill(trans, _THIS_IP_, path);
+               return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock));
        }
 
        b = bch2_btree_node_mem_alloc(c, level != 0);
 
        if (trans && b == ERR_PTR(-ENOMEM)) {
                trans->memory_allocation_failure = true;
-               trace_trans_restart_memory_allocation_failure(trans->fn,
-                               _THIS_IP_, btree_id, &path->pos);
-               btree_trans_restart(trans);
-               return ERR_PTR(-EINTR);
+               trace_trans_restart_memory_allocation_failure(trans, _THIS_IP_, path);
+               return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
        }
 
        if (IS_ERR(b))
@@ -750,18 +747,19 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
        if (!sync)
                return NULL;
 
-       if (trans &&
-           (!bch2_trans_relock(trans) ||
-            !bch2_btree_path_relock_intent(trans, path))) {
-               BUG_ON(!trans->restarted);
-               return ERR_PTR(-EINTR);
+       if (trans) {
+               int ret = bch2_trans_relock(trans) ?:
+                       bch2_btree_path_relock_intent(trans, path);
+               if (ret) {
+                       BUG_ON(!trans->restarted);
+                       return ERR_PTR(ret);
+               }
        }
 
        if (!six_relock_type(&b->c.lock, lock_type, seq)) {
-               trace_trans_restart_relock_after_fill(trans->fn, _THIS_IP_,
-                                          btree_id, &path->pos);
-               btree_trans_restart(trans);
-               return ERR_PTR(-EINTR);
+               if (trans)
+                       trace_trans_restart_relock_after_fill(trans, _THIS_IP_, path);
+               return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
        }
 
        return b;
@@ -772,7 +770,9 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
        struct btree *b = container_of(lock, struct btree, c.lock);
        const struct bkey_i *k = p;
 
-       return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
+       if (b->hash_val != btree_ptr_hash_val(k))
+               return BCH_ERR_lock_fail_node_reused;
+       return 0;
 }
 
 static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
@@ -831,6 +831,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
        struct bset_tree *t;
+       int ret;
 
        EBUG_ON(level >= BTREE_MAX_DEPTH);
 
@@ -893,13 +894,16 @@ lock_node:
                 * was removed - and we'll bail out:
                 */
                if (btree_node_read_locked(path, level + 1))
-                       btree_node_unlock(path, level + 1);
+                       btree_node_unlock(trans, path, level + 1);
 
-               if (!btree_node_lock(trans, path, b, k->k.p, level, lock_type,
-                                    lock_node_check_fn, (void *) k, trace_ip)) {
-                       if (!trans->restarted)
+               ret = btree_node_lock(trans, path, b, k->k.p, level, lock_type,
+                                     lock_node_check_fn, (void *) k, trace_ip);
+               if (unlikely(ret)) {
+                       if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
                                goto retry;
-                       return ERR_PTR(-EINTR);
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                               return ERR_PTR(ret);
+                       BUG();
                }
 
                if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
@@ -909,12 +913,8 @@ lock_node:
                        if (bch2_btree_node_relock(trans, path, level + 1))
                                goto retry;
 
-                       trace_trans_restart_btree_node_reused(trans->fn,
-                                                             trace_ip,
-                                                             path->btree_id,
-                                                             &path->pos);
-                       btree_trans_restart(trans);
-                       return ERR_PTR(-EINTR);
+                       trace_trans_restart_btree_node_reused(trans, trace_ip, path);
+                       return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
                }
        }
 
@@ -930,11 +930,13 @@ lock_node:
                 * should_be_locked is not set on this path yet, so we need to
                 * relock it specifically:
                 */
-               if (trans &&
-                   (!bch2_trans_relock(trans) ||
-                    !bch2_btree_path_relock_intent(trans, path))) {
-                       BUG_ON(!trans->restarted);
-                       return ERR_PTR(-EINTR);
+               if (trans) {
+                       int ret = bch2_trans_relock(trans) ?:
+                               bch2_btree_path_relock_intent(trans, path);
+                       if (ret) {
+                               BUG_ON(!trans->restarted);
+                               return ERR_PTR(ret);
+                       }
                }
 
                if (!six_relock_type(&b->c.lock, lock_type, seq))
index 214529b613f97c40f7d8733269ce3a928fadddc0..2f563365ea4cb8d4de2691f51a4ef1dc5c60e2f4 100644 (file)
@@ -98,7 +98,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
                                  buf1.buf, buf2.buf) &&
                            !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
                                bch_info(c, "Halting mark and sweep to start topology repair pass");
-                               ret = FSCK_ERR_START_TOPOLOGY_REPAIR;
+                               ret = -BCH_ERR_need_topology_repair;
                                goto err;
                        } else {
                                set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
@@ -126,7 +126,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
                          buf1.buf, buf2.buf) &&
                    !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
                        bch_info(c, "Halting mark and sweep to start topology repair pass");
-                       ret = FSCK_ERR_START_TOPOLOGY_REPAIR;
+                       ret = -BCH_ERR_need_topology_repair;
                        goto err;
                } else {
                        set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
@@ -402,8 +402,8 @@ again:
                }
 
                if (ret) {
-                       bch_err(c, "%s: error %i getting btree node",
-                               __func__, ret);
+                       bch_err(c, "%s: error getting btree node: %s",
+                               __func__, bch2_err_str(ret));
                        break;
                }
 
@@ -471,8 +471,8 @@ again:
                ret = PTR_ERR_OR_ZERO(cur);
 
                if (ret) {
-                       bch_err(c, "%s: error %i getting btree node",
-                               __func__, ret);
+                       bch_err(c, "%s: error getting btree node: %s",
+                               __func__, bch2_err_str(ret));
                        goto err;
                }
 
@@ -537,7 +537,7 @@ static int bch2_repair_topology(struct bch_fs *c)
 
                if (ret == DROP_THIS_NODE) {
                        bch_err(c, "empty btree root - repair unimplemented");
-                       ret = FSCK_ERR_EXIT;
+                       ret = -BCH_ERR_fsck_repair_unimplemented;
                }
        }
 
@@ -804,7 +804,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
 fsck_err:
 err:
        if (ret)
-               bch_err(c, "%s: ret %i", __func__, ret);
+               bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -910,7 +910,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
                                       false, &k, true);
                if (ret) {
-                       bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret);
+                       bch_err(c, "%s: error from bch2_gc_mark_key: %s",
+                               __func__, bch2_err_str(ret));
                        goto fsck_err;
                }
 
@@ -959,7 +960,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                                          (printbuf_reset(&buf),
                                           bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
                                    !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
-                                       ret = FSCK_ERR_START_TOPOLOGY_REPAIR;
+                                       ret = -BCH_ERR_need_topology_repair;
                                        bch_info(c, "Halting mark and sweep to start topology repair pass");
                                        goto fsck_err;
                                } else {
@@ -970,8 +971,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                                        continue;
                                }
                        } else if (ret) {
-                               bch_err(c, "%s: error %i getting btree node",
-                                       __func__, ret);
+                               bch_err(c, "%s: error getting btree node: %s",
+                                       __func__, bch2_err_str(ret));
                                break;
                        }
 
@@ -1012,7 +1013,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
        if (mustfix_fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c,
                        "btree root with incorrect min_key: %s", buf.buf)) {
                bch_err(c, "repair unimplemented");
-               ret = FSCK_ERR_EXIT;
+               ret = -BCH_ERR_fsck_repair_unimplemented;
                goto fsck_err;
        }
 
@@ -1021,7 +1022,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
        if (mustfix_fsck_err_on(bpos_cmp(b->data->max_key, SPOS_MAX), c,
                        "btree root with incorrect max_key: %s", buf.buf)) {
                bch_err(c, "repair unimplemented");
-               ret = FSCK_ERR_EXIT;
+               ret = -BCH_ERR_fsck_repair_unimplemented;
                goto fsck_err;
        }
 
@@ -1038,7 +1039,7 @@ fsck_err:
        six_unlock_read(&b->c.lock);
 
        if (ret < 0)
-               bch_err(c, "%s: ret %i", __func__, ret);
+               bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
        printbuf_exit(&buf);
        return ret;
 }
@@ -1071,7 +1072,7 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
                        : bch2_gc_btree(&trans, ids[i], initial, metadata_only);
 
        if (ret < 0)
-               bch_err(c, "%s: ret %i", __func__, ret);
+               bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
 
        bch2_trans_exit(&trans);
        return ret;
@@ -1269,7 +1270,7 @@ fsck_err:
        if (ca)
                percpu_ref_put(&ca->ref);
        if (ret)
-               bch_err(c, "%s: ret %i", __func__, ret);
+               bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
 
        percpu_up_write(&c->mark_lock);
        printbuf_exit(&buf);
@@ -1324,21 +1325,19 @@ static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
 
 static int bch2_alloc_write_key(struct btree_trans *trans,
                                struct btree_iter *iter,
+                               struct bkey_s_c k,
                                bool metadata_only)
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
        struct bucket gc, *b;
-       struct bkey_s_c k;
        struct bkey_i_alloc_v4 *a;
        struct bch_alloc_v4 old, new;
        enum bch_data_type type;
        int ret;
 
-       k = bch2_btree_iter_peek_slot(iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
+       if (bkey_cmp(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
+               return 1;
 
        bch2_alloc_to_v4(k, &old);
        new = old;
@@ -1431,31 +1430,21 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
        bch2_trans_init(&trans, c, 0, 0);
 
        for_each_member_device(ca, c, i) {
-               for_each_btree_key(&trans, iter, BTREE_ID_alloc,
-                                  POS(ca->dev_idx, ca->mi.first_bucket),
-                                  BTREE_ITER_SLOTS|
-                                  BTREE_ITER_PREFETCH, k, ret) {
-                       if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
-                               break;
-
-                       ret = commit_do(&trans, NULL, NULL,
-                                             BTREE_INSERT_LAZY_RW,
-                                       bch2_alloc_write_key(&trans, &iter,
-                                                            metadata_only));
-                       if (ret)
-                               break;
-               }
-               bch2_trans_iter_exit(&trans, &iter);
-
-               if (ret) {
-                       bch_err(c, "error writing alloc info: %i", ret);
+               ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+                               POS(ca->dev_idx, ca->mi.first_bucket),
+                               BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
+                               NULL, NULL, BTREE_INSERT_LAZY_RW,
+                       bch2_alloc_write_key(&trans, &iter, k, metadata_only));
+
+               if (ret < 0) {
+                       bch_err(c, "error writing alloc info: %s", bch2_err_str(ret));
                        percpu_ref_put(&ca->ref);
                        break;
                }
        }
 
        bch2_trans_exit(&trans);
-       return ret;
+       return ret < 0 ? ret : 0;
 }
 
 static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
@@ -1512,7 +1501,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
        bch2_trans_exit(&trans);
 
        if (ret)
-               bch_err(c, "error reading alloc info at gc start: %i", ret);
+               bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret));
 
        return ret;
 }
@@ -1539,72 +1528,79 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
        };
 }
 
-static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
+static int bch2_gc_write_reflink_key(struct btree_trans *trans,
+                                    struct btree_iter *iter,
+                                    struct bkey_s_c k,
+                                    size_t *idx)
 {
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct reflink_gc *r;
-       size_t idx = 0;
+       struct bch_fs *c = trans->c;
+       const __le64 *refcount = bkey_refcount_c(k);
        struct printbuf buf = PRINTBUF;
+       struct reflink_gc *r;
        int ret = 0;
 
-       if (metadata_only)
+       if (!refcount)
                return 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
+       while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
+              r->offset < k.k->p.offset)
+               ++*idx;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               const __le64 *refcount = bkey_refcount_c(k);
+       if (!r ||
+           r->offset != k.k->p.offset ||
+           r->size != k.k->size) {
+               bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
+               return -EINVAL;
+       }
 
-               if (!refcount)
-                       continue;
+       if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
+                       "reflink key has wrong refcount:\n"
+                       "  %s\n"
+                       "  should be %u",
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+                       r->refcount)) {
+               struct bkey_i *new;
 
-               r = genradix_ptr(&c->reflink_gc_table, idx++);
-               if (!r ||
-                   r->offset != k.k->p.offset ||
-                   r->size != k.k->size) {
-                       bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
-                       ret = -EINVAL;
-                       break;
-               }
+               new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+               ret = PTR_ERR_OR_ZERO(new);
+               if (ret)
+                       return ret;
 
-               if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
-                               "reflink key has wrong refcount:\n"
-                               "  %s\n"
-                               "  should be %u",
-                               (printbuf_reset(&buf),
-                                bch2_bkey_val_to_text(&buf, c, k), buf.buf),
-                               r->refcount)) {
-                       struct bkey_i *new;
+               bkey_reassemble(new, k);
 
-                       new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
-                       if (!new) {
-                               ret = -ENOMEM;
-                               break;
-                       }
+               if (!r->refcount)
+                       new->k.type = KEY_TYPE_deleted;
+               else
+                       *bkey_refcount(new) = cpu_to_le64(r->refcount);
 
-                       bkey_reassemble(new, k);
+               ret = bch2_trans_update(trans, iter, new, 0);
+       }
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
+}
 
-                       if (!r->refcount)
-                               new->k.type = KEY_TYPE_deleted;
-                       else
-                               *bkey_refcount(new) = cpu_to_le64(r->refcount);
+static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       size_t idx = 0;
+       int ret = 0;
 
-                       ret = commit_do(&trans, NULL, NULL, 0,
-                               __bch2_btree_insert(&trans, BTREE_ID_reflink, new));
-                       kfree(new);
+       if (metadata_only)
+               return 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       ret = for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_reflink, POS_MIN,
+                       BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL,
+               bch2_gc_write_reflink_key(&trans, &iter, k, &idx));
 
-                       if (ret)
-                               break;
-               }
-       }
-fsck_err:
-       bch2_trans_iter_exit(&trans, &iter);
        c->reflink_gc_nr = 0;
        bch2_trans_exit(&trans);
-       printbuf_exit(&buf);
        return ret;
 }
 
@@ -1656,66 +1652,73 @@ static void bch2_gc_reflink_reset(struct bch_fs *c, bool metadata_only)
                r->refcount = 0;
 }
 
-static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
+static int bch2_gc_write_stripes_key(struct btree_trans *trans,
+                                    struct btree_iter *iter,
+                                    struct bkey_s_c k)
 {
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct gc_stripe *m;
-       const struct bch_stripe *s;
+       struct bch_fs *c = trans->c;
        struct printbuf buf = PRINTBUF;
+       const struct bch_stripe *s;
+       struct gc_stripe *m;
        unsigned i;
        int ret = 0;
 
-       if (metadata_only)
+       if (k.k->type != KEY_TYPE_stripe)
                return 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
+       s = bkey_s_c_to_stripe(k).v;
+       m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               if (k.k->type != KEY_TYPE_stripe)
-                       continue;
-
-               s = bkey_s_c_to_stripe(k).v;
-               m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
-
-               for (i = 0; i < s->nr_blocks; i++)
-                       if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
-                               goto inconsistent;
-               continue;
+       for (i = 0; i < s->nr_blocks; i++)
+               if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
+                       goto inconsistent;
+       return 0;
 inconsistent:
-               if (fsck_err_on(true, c,
-                               "stripe has wrong block sector count %u:\n"
-                               "  %s\n"
-                               "  should be %u", i,
-                               (printbuf_reset(&buf),
-                                bch2_bkey_val_to_text(&buf, c, k), buf.buf),
-                               m ? m->block_sectors[i] : 0)) {
-                       struct bkey_i_stripe *new;
-
-                       new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
-                       if (!new) {
-                               ret = -ENOMEM;
-                               break;
-                       }
+       if (fsck_err_on(true, c,
+                       "stripe has wrong block sector count %u:\n"
+                       "  %s\n"
+                       "  should be %u", i,
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+                       m ? m->block_sectors[i] : 0)) {
+               struct bkey_i_stripe *new;
+
+               new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+               ret = PTR_ERR_OR_ZERO(new);
+               if (ret)
+                       return ret;
 
-                       bkey_reassemble(&new->k_i, k);
+               bkey_reassemble(&new->k_i, k);
 
-                       for (i = 0; i < new->v.nr_blocks; i++)
-                               stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
+               for (i = 0; i < new->v.nr_blocks; i++)
+                       stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
 
-                       ret = commit_do(&trans, NULL, NULL, 0,
-                               __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
-                       kfree(new);
-               }
+               ret = bch2_trans_update(trans, iter, &new->k_i, 0);
        }
 fsck_err:
-       bch2_trans_iter_exit(&trans, &iter);
+       printbuf_exit(&buf);
+       return ret;
+}
 
-       bch2_trans_exit(&trans);
+static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret = 0;
 
-       printbuf_exit(&buf);
+       if (metadata_only)
+               return 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       ret = for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_stripes, POS_MIN,
+                       BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL,
+               bch2_gc_write_stripes_key(&trans, &iter, k));
+
+       bch2_trans_exit(&trans);
        return ret;
 }
 
@@ -1777,7 +1780,7 @@ again:
 
        ret = bch2_gc_btrees(c, initial, metadata_only);
 
-       if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR &&
+       if (ret == -BCH_ERR_need_topology_repair &&
            !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) &&
            !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
                set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
@@ -1785,8 +1788,8 @@ again:
                ret = 0;
        }
 
-       if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR)
-               ret = FSCK_ERR_EXIT;
+       if (ret == -BCH_ERR_need_topology_repair)
+               ret = -BCH_ERR_fsck_errors_not_fixed;
 
        if (ret)
                goto out;
@@ -1969,7 +1972,7 @@ int bch2_gc_gens(struct bch_fs *c)
                                        BTREE_INSERT_NOFAIL,
                                gc_btree_gens_key(&trans, &iter, k));
                        if (ret) {
-                               bch_err(c, "error recalculating oldest_gen: %i", ret);
+                               bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
                                goto err;
                        }
                }
@@ -1982,7 +1985,7 @@ int bch2_gc_gens(struct bch_fs *c)
                        BTREE_INSERT_NOFAIL,
                bch2_alloc_write_oldest_gen(&trans, &iter, k));
        if (ret) {
-               bch_err(c, "error writing oldest_gen: %i", ret);
+               bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -2054,7 +2057,7 @@ static int bch2_gc_thread(void *arg)
                ret = bch2_gc_gens(c);
 #endif
                if (ret < 0)
-                       bch_err(c, "btree gc failed: %i", ret);
+                       bch_err(c, "btree gc failed: %s", bch2_err_str(ret));
 
                debug_check_no_locks_held();
        }
@@ -2084,7 +2087,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
 
        p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
        if (IS_ERR(p)) {
-               bch_err(c, "error creating gc thread: %li", PTR_ERR(p));
+               bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p)));
                return PTR_ERR(p);
        }
 
index 9bf3f77bcae614427a8297b5d040eeab0dbeaad0..ae731b3a390840f4d61019ac44d258a9767dae56 100644 (file)
@@ -543,7 +543,7 @@ enum btree_validate_ret {
        struct printbuf out = PRINTBUF;                                 \
                                                                        \
        btree_err_msg(&out, c, ca, b, i, b->written, write);            \
-       prt_printf(&out, ": " msg, ##__VA_ARGS__);                              \
+       prt_printf(&out, ": " msg, ##__VA_ARGS__);                      \
                                                                        \
        if (type == BTREE_ERR_FIXABLE &&                                \
            write == READ &&                                            \
@@ -558,7 +558,7 @@ enum btree_validate_ret {
                                                                        \
                switch (type) {                                         \
                case BTREE_ERR_FIXABLE:                                 \
-                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       ret = -BCH_ERR_fsck_errors_not_fixed;           \
                        goto fsck_err;                                  \
                case BTREE_ERR_WANT_RETRY:                              \
                        if (have_retry) {                               \
@@ -570,7 +570,7 @@ enum btree_validate_ret {
                        ret = BTREE_RETRY_READ;                         \
                        goto fsck_err;                                  \
                case BTREE_ERR_FATAL:                                   \
-                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       ret = -BCH_ERR_fsck_errors_not_fixed;           \
                        goto fsck_err;                                  \
                }                                                       \
                break;                                                  \
@@ -578,7 +578,7 @@ enum btree_validate_ret {
                bch_err(c, "corrupt metadata before write: %s", out.buf);\
                                                                        \
                if (bch2_fs_inconsistent(c)) {                          \
-                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       ret = -BCH_ERR_fsck_errors_not_fixed;           \
                        goto fsck_err;                                  \
                }                                                       \
                break;                                                  \
index 923381d87cc62591a773d7d473e3bba211030d00..946c462e84aa15282bd3d48ec1550c8febca42bf 100644 (file)
@@ -16,6 +16,7 @@
 #include "replicas.h"
 #include "subvolume.h"
 
+#include <linux/prandom.h>
 #include <linux/prefetch.h>
 #include <trace/events/bcachefs.h>
 
@@ -46,7 +47,7 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans)
        if (need_resched() || race_fault()) {
                bch2_trans_unlock(trans);
                schedule();
-               return bch2_trans_relock(trans) ? 0 : -EINTR;
+               return bch2_trans_relock(trans);
        } else {
                return 0;
        }
@@ -99,12 +100,6 @@ static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos
        return p;
 }
 
-static inline bool is_btree_node(struct btree_path *path, unsigned l)
-{
-       return l < BTREE_MAX_DEPTH &&
-               (unsigned long) path->l[l].b >= 128;
-}
-
 static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
 {
        struct bpos pos = iter->pos;
@@ -143,15 +138,37 @@ void bch2_btree_node_unlock_write(struct btree_trans *trans,
        bch2_btree_node_unlock_write_inlined(trans, path, b);
 }
 
-void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
+                                                 struct btree_path *skip,
+                                                 struct btree *b,
+                                                 unsigned level)
 {
-       struct btree_path *linked;
-       unsigned readers = 0;
+       struct btree_path *path;
+       struct six_lock_count ret = { 0, 0 };
+
+       if (IS_ERR_OR_NULL(b))
+               return ret;
+
+       trans_for_each_path(trans, path)
+               if (path != skip && path->l[level].b == b) {
+                       ret.read += btree_node_read_locked(path, level);
+                       ret.intent += btree_node_intent_locked(path, level);
+               }
+
+       return ret;
+}
 
-       trans_for_each_path(trans, linked)
-               if (linked->l[b->c.level].b == b &&
-                   btree_node_read_locked(linked, b->c.level))
-                       readers++;
+static inline void six_lock_readers_add(struct six_lock *lock, int nr)
+{
+       if (!lock->readers)
+               atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter);
+       else
+               this_cpu_add(*lock->readers, nr);
+}
+
+void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
+{
+       int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->c.level).read;
 
        /*
         * Must drop our read locks before calling six_lock_write() -
@@ -159,19 +176,9 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
         * goes to 0, and it's safe because we have the node intent
         * locked:
         */
-       if (!b->c.lock.readers)
-               atomic64_sub(__SIX_VAL(read_lock, readers),
-                            &b->c.lock.state.counter);
-       else
-               this_cpu_sub(*b->c.lock.readers, readers);
-
+       six_lock_readers_add(&b->c.lock, -readers);
        six_lock_write(&b->c.lock, NULL, NULL);
-
-       if (!b->c.lock.readers)
-               atomic64_add(__SIX_VAL(read_lock, readers),
-                            &b->c.lock.state.counter);
-       else
-               this_cpu_add(*b->c.lock.readers, readers);
+       six_lock_readers_add(&b->c.lock, readers);
 }
 
 bool __bch2_btree_node_relock(struct btree_trans *trans,
@@ -193,14 +200,9 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
                return true;
        }
 fail:
-       if (b != BTREE_ITER_NO_NODE_CACHED &&
-           b != BTREE_ITER_NO_NODE_INIT)
-               trace_btree_node_relock_fail(trans->fn, _RET_IP_,
-                                            path->btree_id,
-                                            &path->pos,
-                                            (unsigned long) b,
-                                            path->l[level].lock_seq,
-                                            is_btree_node(path, level) ? b->c.lock.state.seq : 0);
+       if (b != ERR_PTR(-BCH_ERR_no_btree_node_cached) &&
+           b != ERR_PTR(-BCH_ERR_no_btree_node_init))
+               trace_btree_node_relock_fail(trans, _RET_IP_, path, level);
        return false;
 }
 
@@ -236,10 +238,11 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
 
        if (btree_node_lock_seq_matches(path, b, level) &&
            btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
-               btree_node_unlock(path, level);
+               btree_node_unlock(trans, path, level);
                goto success;
        }
 
+       trace_btree_node_upgrade_fail(trans, _RET_IP_, path, level);
        return false;
 success:
        mark_btree_node_intent_locked(trans, path, level);
@@ -271,11 +274,13 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
         * the node that we failed to relock:
         */
        if (fail_idx >= 0) {
-               __bch2_btree_path_unlock(path);
+               __bch2_btree_path_unlock(trans, path);
                btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
 
                do {
-                       path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+                       path->l[fail_idx].b = upgrade
+                               ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
+                               : ERR_PTR(-BCH_ERR_no_btree_node_relock);
                        --fail_idx;
                } while (fail_idx >= 0);
        }
@@ -297,13 +302,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
 }
 
 /* Slowpath: */
-bool __bch2_btree_node_lock(struct btree_trans *trans,
-                           struct btree_path *path,
-                           struct btree *b,
-                           struct bpos pos, unsigned level,
-                           enum six_lock_type type,
-                           six_lock_should_sleep_fn should_sleep_fn, void *p,
-                           unsigned long ip)
+int __bch2_btree_node_lock(struct btree_trans *trans,
+                          struct btree_path *path,
+                          struct btree *b,
+                          struct bpos pos, unsigned level,
+                          enum six_lock_type type,
+                          six_lock_should_sleep_fn should_sleep_fn, void *p,
+                          unsigned long ip)
 {
        struct btree_path *linked;
        unsigned reason;
@@ -373,16 +378,8 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
        return btree_node_lock_type(trans, path, b, pos, level,
                                    type, should_sleep_fn, p);
 deadlock:
-       trace_trans_restart_would_deadlock(trans->fn, ip,
-                       trans->in_traverse_all, reason,
-                       linked->btree_id,
-                       linked->cached,
-                       &linked->pos,
-                       path->btree_id,
-                       path->cached,
-                       &pos);
-       btree_trans_restart(trans);
-       return false;
+       trace_trans_restart_would_deadlock(trans, ip, reason, linked, path, &pos);
+       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
 }
 
 /* Btree iterator locking: */
@@ -420,8 +417,8 @@ static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
 /*
  * Only for btree_cache.c - only relocks intent locks
  */
-bool bch2_btree_path_relock_intent(struct btree_trans *trans,
-                                  struct btree_path *path)
+int bch2_btree_path_relock_intent(struct btree_trans *trans,
+                                 struct btree_path *path)
 {
        unsigned l;
 
@@ -429,30 +426,32 @@ bool bch2_btree_path_relock_intent(struct btree_trans *trans,
             l < path->locks_want && btree_path_node(path, l);
             l++) {
                if (!bch2_btree_node_relock(trans, path, l)) {
-                       __bch2_btree_path_unlock(path);
+                       __bch2_btree_path_unlock(trans, path);
                        btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-                       trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_,
-                                                  path->btree_id, &path->pos);
-                       btree_trans_restart(trans);
-                       return false;
+                       trace_trans_restart_relock_path_intent(trans, _RET_IP_, path);
+                       return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
                }
        }
 
-       return true;
+       return 0;
 }
 
 __flatten
-static bool bch2_btree_path_relock(struct btree_trans *trans,
+static bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
                        struct btree_path *path, unsigned long trace_ip)
 {
-       bool ret = btree_path_get_locks(trans, path, false);
+       return btree_path_get_locks(trans, path, false);
+}
 
-       if (!ret) {
-               trace_trans_restart_relock_path(trans->fn, trace_ip,
-                                               path->btree_id, &path->pos);
-               btree_trans_restart(trans);
+static int bch2_btree_path_relock(struct btree_trans *trans,
+                       struct btree_path *path, unsigned long trace_ip)
+{
+       if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
+               trace_trans_restart_relock_path(trans, trace_ip, path);
+               return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
        }
-       return ret;
+
+       return 0;
 }
 
 bool __bch2_btree_path_upgrade(struct btree_trans *trans,
@@ -500,7 +499,8 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
        return false;
 }
 
-void __bch2_btree_path_downgrade(struct btree_path *path,
+void __bch2_btree_path_downgrade(struct btree_trans *trans,
+                                struct btree_path *path,
                                 unsigned new_locks_want)
 {
        unsigned l;
@@ -512,7 +512,7 @@ void __bch2_btree_path_downgrade(struct btree_path *path,
        while (path->nodes_locked &&
               (l = __fls(path->nodes_locked)) >= path->locks_want) {
                if (l > path->level) {
-                       btree_node_unlock(path, l);
+                       btree_node_unlock(trans, path, l);
                } else {
                        if (btree_node_intent_locked(path, l)) {
                                six_lock_downgrade(&path->l[l].b->c.lock);
@@ -530,27 +530,26 @@ void bch2_trans_downgrade(struct btree_trans *trans)
        struct btree_path *path;
 
        trans_for_each_path(trans, path)
-               bch2_btree_path_downgrade(path);
+               bch2_btree_path_downgrade(trans, path);
 }
 
 /* Btree transaction locking: */
 
-bool bch2_trans_relock(struct btree_trans *trans)
+int bch2_trans_relock(struct btree_trans *trans)
 {
        struct btree_path *path;
 
        if (unlikely(trans->restarted))
-               return false;
+               return -BCH_ERR_transaction_restart_relock;
 
        trans_for_each_path(trans, path)
                if (path->should_be_locked &&
-                   !bch2_btree_path_relock(trans, path, _RET_IP_)) {
-                       trace_trans_restart_relock(trans->fn, _RET_IP_,
-                                       path->btree_id, &path->pos);
+                   bch2_btree_path_relock(trans, path, _RET_IP_)) {
+                       trace_trans_restart_relock(trans, _RET_IP_, path);
                        BUG_ON(!trans->restarted);
-                       return false;
+                       return -BCH_ERR_transaction_restart_relock;
                }
-       return true;
+       return 0;
 }
 
 void bch2_trans_unlock(struct btree_trans *trans)
@@ -558,7 +557,7 @@ void bch2_trans_unlock(struct btree_trans *trans)
        struct btree_path *path;
 
        trans_for_each_path(trans, path)
-               __bch2_btree_path_unlock(path);
+               __bch2_btree_path_unlock(trans, path);
 
        /*
         * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking
@@ -586,7 +585,7 @@ static void bch2_btree_path_verify_cached(struct btree_trans *trans,
               bkey_cmp(ck->key.pos, path->pos));
 
        if (!locked)
-               btree_node_unlock(path, 0);
+               btree_node_unlock(trans, path, 0);
 }
 
 static void bch2_btree_path_verify_level(struct btree_trans *trans,
@@ -643,7 +642,7 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans,
        }
 
        if (!locked)
-               btree_node_unlock(path, level);
+               btree_node_unlock(trans, path, level);
        return;
 err:
        bch2_bpos_to_text(&buf1, path->pos);
@@ -1020,27 +1019,29 @@ static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
                        bch2_btree_node_iter_peek_all(&l->iter, l->b));
 }
 
-static inline struct bkey_s_c btree_path_level_peek(struct bch_fs *c,
+static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
                                                    struct btree_path *path,
                                                    struct btree_path_level *l,
                                                    struct bkey *u)
 {
-       struct bkey_s_c k = __btree_iter_unpack(c, l, u,
+       struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
                        bch2_btree_node_iter_peek(&l->iter, l->b));
 
        path->pos = k.k ? k.k->p : l->b->key.k.p;
+       bch2_btree_path_verify_level(trans, path, l - path->l);
        return k;
 }
 
-static inline struct bkey_s_c btree_path_level_prev(struct bch_fs *c,
+static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
                                                    struct btree_path *path,
                                                    struct btree_path_level *l,
                                                    struct bkey *u)
 {
-       struct bkey_s_c k = __btree_iter_unpack(c, l, u,
+       struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
                        bch2_btree_node_iter_prev(&l->iter, l->b));
 
        path->pos = k.k ? k.k->p : l->b->data->min_key;
+       bch2_btree_path_verify_level(trans, path, l - path->l);
        return k;
 }
 
@@ -1115,7 +1116,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
        }
 
        if (!parent_locked)
-               btree_node_unlock(path, plevel);
+               btree_node_unlock(trans, path, plevel);
 }
 
 static inline void __btree_path_level_init(struct btree_path *path,
@@ -1167,7 +1168,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
 
                        if (path->nodes_locked &&
                            t != BTREE_NODE_UNLOCKED) {
-                               btree_node_unlock(path, b->c.level);
+                               btree_node_unlock(trans, path, b->c.level);
                                six_lock_increment(&b->c.lock, t);
                                mark_btree_node_locked(trans, path, b->c.level, t);
                        }
@@ -1195,7 +1196,9 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
        struct btree *b = container_of(lock, struct btree, c.lock);
        struct btree **rootp = p;
 
-       return b == *rootp ? 0 : -1;
+       if (b != *rootp)
+               return BCH_ERR_lock_fail_root_changed;
+       return 0;
 }
 
 static inline int btree_path_lock_root(struct btree_trans *trans,
@@ -1207,6 +1210,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
        struct btree *b, **rootp = &c->btree_roots[path->btree_id].b;
        enum six_lock_type lock_type;
        unsigned i;
+       int ret;
 
        EBUG_ON(path->nodes_locked);
 
@@ -1228,20 +1232,23 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
                }
 
                lock_type = __btree_lock_want(path, path->level);
-               if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX,
-                                             path->level, lock_type,
-                                             lock_root_check_fn, rootp,
-                                             trace_ip))) {
-                       if (trans->restarted)
-                               return -EINTR;
-                       continue;
+               ret = btree_node_lock(trans, path, b, SPOS_MAX,
+                                     path->level, lock_type,
+                                     lock_root_check_fn, rootp,
+                                     trace_ip);
+               if (unlikely(ret)) {
+                       if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
+                               continue;
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                               return ret;
+                       BUG();
                }
 
                if (likely(b == READ_ONCE(*rootp) &&
                           b->c.level == path->level &&
                           !race_fault())) {
                        for (i = 0; i < path->level; i++)
-                               path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
+                               path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
                        path->l[path->level].b = b;
                        for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
                                path->l[i].b = NULL;
@@ -1286,7 +1293,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
        }
 
        if (!was_locked)
-               btree_node_unlock(path, path->level);
+               btree_node_unlock(trans, path, path->level);
 
        bch2_bkey_buf_exit(&tmp, c);
        return ret;
@@ -1321,7 +1328,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p
        }
 
        if (!was_locked)
-               btree_node_unlock(path, path->level);
+               btree_node_unlock(trans, path, path->level);
 
        bch2_bkey_buf_exit(&tmp, c);
        return ret;
@@ -1346,7 +1353,7 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
        bp->mem_ptr = (unsigned long)b;
 
        if (!locked)
-               btree_node_unlock(path, plevel);
+               btree_node_unlock(trans, path, plevel);
 }
 
 static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
@@ -1419,7 +1426,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
                btree_node_mem_ptr_set(trans, path, level + 1, b);
 
        if (btree_node_read_locked(path, level + 1))
-               btree_node_unlock(path, level + 1);
+               btree_node_unlock(trans, path, level + 1);
        path->level = level;
 
        bch2_btree_path_verify_locks(path);
@@ -1439,11 +1446,11 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
        int i, ret = 0;
 
        if (trans->in_traverse_all)
-               return -EINTR;
+               return -BCH_ERR_transaction_restart_in_traverse_all;
 
        trans->in_traverse_all = true;
 retry_all:
-       trans->restarted = false;
+       trans->restarted = 0;
        trans->traverse_all_idx = U8_MAX;
 
        trans_for_each_path(trans, path)
@@ -1487,7 +1494,8 @@ retry_all:
                 */
                if (path->uptodate) {
                        ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
-                       if (ret == -EINTR || ret == -ENOMEM)
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+                           ret == -ENOMEM)
                                goto retry_all;
                        if (ret)
                                goto err;
@@ -1509,7 +1517,7 @@ err:
 
        trans->in_traverse_all = false;
 
-       trace_trans_traverse_all(trans->fn, trace_ip);
+       trace_trans_traverse_all(trans, trace_ip);
        return ret;
 }
 
@@ -1528,14 +1536,6 @@ static inline bool btree_path_good_node(struct btree_trans *trans,
        return true;
 }
 
-static void btree_path_set_level_up(struct btree_path *path)
-{
-       btree_node_unlock(path, path->level);
-       path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
-       path->level++;
-       btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-}
-
 static void btree_path_set_level_down(struct btree_trans *trans,
                                      struct btree_path *path,
                                      unsigned new_level)
@@ -1546,7 +1546,7 @@ static void btree_path_set_level_down(struct btree_trans *trans,
 
        for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
                if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
-                       btree_node_unlock(path, l);
+                       btree_node_unlock(trans, path, l);
 
        btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
        bch2_btree_path_verify(trans, path);
@@ -1559,22 +1559,16 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
        unsigned i, l = path->level;
 
        while (btree_path_node(path, l) &&
-              !btree_path_good_node(trans, path, l, check_pos)) {
-               btree_node_unlock(path, l);
-               path->l[l].b = BTREE_ITER_NO_NODE_UP;
-               l++;
-       }
+              !btree_path_good_node(trans, path, l, check_pos))
+               __btree_path_set_level_up(trans, path, l++);
 
        /* If we need intent locks, take them too: */
        for (i = l + 1;
             i < path->locks_want && btree_path_node(path, i);
             i++)
                if (!bch2_btree_node_relock(trans, path, i))
-                       while (l <= i) {
-                               btree_node_unlock(path, l);
-                               path->l[l].b = BTREE_ITER_NO_NODE_UP;
-                               l++;
-                       }
+                       while (l <= i)
+                               __btree_path_set_level_up(trans, path, l++);
 
        return l;
 }
@@ -1594,19 +1588,17 @@ static int btree_path_traverse_one(struct btree_trans *trans,
                                   unsigned long trace_ip)
 {
        unsigned depth_want = path->level;
-       int ret = 0;
+       int ret = trans->restarted;
 
-       if (unlikely(trans->restarted)) {
-               ret = -EINTR;
+       if (unlikely(ret))
                goto out;
-       }
 
        /*
         * Ensure we obey path->should_be_locked: if it's set, we can't unlock
         * and re-traverse the path without a transaction restart:
         */
        if (path->should_be_locked) {
-               ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR;
+               ret = bch2_btree_path_relock(trans, path, trace_ip);
                goto out;
        }
 
@@ -1640,22 +1632,16 @@ static int btree_path_traverse_one(struct btree_trans *trans,
                                goto out;
                        }
 
-                       __bch2_btree_path_unlock(path);
+                       __bch2_btree_path_unlock(trans, path);
                        path->level = depth_want;
-
-                       if (ret == -EIO)
-                               path->l[path->level].b =
-                                       BTREE_ITER_NO_NODE_ERROR;
-                       else
-                               path->l[path->level].b =
-                                       BTREE_ITER_NO_NODE_DOWN;
+                       path->l[path->level].b = ERR_PTR(ret);
                        goto out;
                }
        }
 
        path->uptodate = BTREE_ITER_UPTODATE;
 out:
-       BUG_ON((ret == -EINTR) != !!trans->restarted);
+       BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
        bch2_btree_path_verify(trans, path);
        return ret;
 }
@@ -1663,6 +1649,16 @@ out:
 int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
                                          struct btree_path *path, unsigned flags)
 {
+       if (0 && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+               unsigned restart_probability_bits = 4 << min(trans->restart_count, 32U);
+               u64 mask = ~(~0ULL << restart_probability_bits);
+
+               if ((prandom_u32() & mask) == mask) {
+                       trace_transaction_restart_injected(trans, _RET_IP_);
+                       return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
+               }
+       }
+
        if (path->uptodate < BTREE_ITER_NEED_RELOCK)
                return 0;
 
@@ -1737,8 +1733,8 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
        bch2_btree_path_check_sort(trans, path, cmp);
 
        if (unlikely(path->cached)) {
-               btree_node_unlock(path, 0);
-               path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
+               btree_node_unlock(trans, path, 0);
+               path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
                btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
                goto out;
        }
@@ -1760,7 +1756,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
 
        if (l != path->level) {
                btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-               __bch2_btree_path_unlock(path);
+               __bch2_btree_path_unlock(trans, path);
        }
 out:
        bch2_btree_path_verify(trans, path);
@@ -1771,37 +1767,37 @@ out:
 
 static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
 {
-       struct btree_path *next;
+       struct btree_path *sib;
 
-       next = prev_btree_path(trans, path);
-       if (next && !btree_path_cmp(next, path))
-               return next;
+       sib = prev_btree_path(trans, path);
+       if (sib && !btree_path_cmp(sib, path))
+               return sib;
 
-       next = next_btree_path(trans, path);
-       if (next && !btree_path_cmp(next, path))
-               return next;
+       sib = next_btree_path(trans, path);
+       if (sib && !btree_path_cmp(sib, path))
+               return sib;
 
        return NULL;
 }
 
 static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
 {
-       struct btree_path *next;
+       struct btree_path *sib;
 
-       next = prev_btree_path(trans, path);
-       if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
-               return next;
+       sib = prev_btree_path(trans, path);
+       if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
+               return sib;
 
-       next = next_btree_path(trans, path);
-       if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
-               return next;
+       sib = next_btree_path(trans, path);
+       if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
+               return sib;
 
        return NULL;
 }
 
 static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
 {
-       __bch2_btree_path_unlock(path);
+       __bch2_btree_path_unlock(trans, path);
        btree_path_list_remove(trans, path);
        trans->paths_allocated &= ~(1ULL << path->idx);
 }
@@ -1816,26 +1812,23 @@ void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool inte
        if (!__btree_path_put(path, intent))
                return;
 
-       /*
-        * Perhaps instead we should check for duplicate paths in traverse_all:
-        */
-       if (path->preserve &&
-           (dup = have_path_at_pos(trans, path))) {
-               dup->preserve = true;
-               path->preserve = false;
-               goto free;
-       }
+       dup = path->preserve
+               ? have_path_at_pos(trans, path)
+               : have_node_at_pos(trans, path);
+
+       if (!dup && !(!path->preserve && !is_btree_node(path, path->level)))
+               return;
 
-       if (!path->preserve &&
-           (dup = have_node_at_pos(trans, path)))
-               goto free;
-       return;
-free:
        if (path->should_be_locked &&
-           !btree_node_locked(dup, path->level))
+           !trans->restarted &&
+           (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
                return;
 
-       dup->should_be_locked |= path->should_be_locked;
+       if (dup) {
+               dup->preserve           |= path->preserve;
+               dup->should_be_locked   |= path->should_be_locked;
+       }
+
        __bch2_path_free(trans, path);
 }
 
@@ -1891,10 +1884,10 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
 
                bch2_bpos_to_text(&buf, path->pos);
 
-               printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
+               printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n",
                       path->idx, path->ref, path->intent_ref,
-                      path->should_be_locked ? " S" : "",
-                      path->preserve ? " P" : "",
+                      path->preserve ? 'P' : ' ',
+                      path->should_be_locked ? 'S' : ' ',
                       bch2_btree_ids[path->btree_id],
                       path->level,
                       buf.buf,
@@ -1947,6 +1940,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
        struct btree_path *path, *path_pos = NULL;
        bool cached = flags & BTREE_ITER_CACHED;
        bool intent = flags & BTREE_ITER_INTENT;
+       bool have_dup = false;
        int i;
 
        BUG_ON(trans->restarted);
@@ -1954,14 +1948,24 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
        bch2_trans_verify_locks(trans);
 
        trans_for_each_path_inorder(trans, path, i) {
-               if (__btree_path_cmp(path,
-                                    btree_id,
-                                    cached,
-                                    pos,
-                                    level) > 0)
+               int cmp = __btree_path_cmp(path,
+                                          btree_id,
+                                          cached,
+                                          pos,
+                                          level);
+               if (cmp > 0)
                        break;
 
                path_pos = path;
+
+               if (cmp == 0) {
+                       if (path->ref || path->preserve) {
+                               path->preserve = true;
+                               have_dup = true;
+                       } else {
+                               break;
+                       }
+               }
        }
 
        if (path_pos &&
@@ -1985,14 +1989,14 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
                path->nodes_locked              = 0;
                path->nodes_intent_locked       = 0;
                for (i = 0; i < ARRAY_SIZE(path->l); i++)
-                       path->l[i].b            = BTREE_ITER_NO_NODE_INIT;
+                       path->l[i].b            = ERR_PTR(-BCH_ERR_no_btree_node_init);
 #ifdef CONFIG_BCACHEFS_DEBUG
                path->ip_allocated              = ip;
 #endif
                btree_trans_verify_sorted(trans);
        }
 
-       if (!(flags & BTREE_ITER_NOPRESERVE))
+       if (!(flags & BTREE_ITER_NOPRESERVE) && !have_dup)
                path->preserve = true;
 
        if (path->intent_ref)
@@ -2039,11 +2043,7 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
                EBUG_ON(ck &&
                        (path->btree_id != ck->key.btree_id ||
                         bkey_cmp(path->pos, ck->key.pos)));
-
-               /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
-               if (unlikely(!ck || !ck->valid))
-                       return bkey_s_c_null;
-
+               EBUG_ON(!ck || !ck->valid);
                EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
 
                *u = ck->k->k;
@@ -2079,7 +2079,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter)
        if (ret)
                return ret;
 
-       iter->path->should_be_locked = true;
+       btree_path_set_should_be_locked(iter->path);
        return 0;
 }
 
@@ -2110,8 +2110,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
        iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
                                        iter->flags & BTREE_ITER_INTENT,
                                        btree_iter_ip_allocated(iter));
-       iter->path->should_be_locked = true;
-       BUG_ON(iter->path->uptodate);
+       btree_path_set_should_be_locked(iter->path);
 out:
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
@@ -2139,28 +2138,24 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
 
        /* got to end? */
        if (!btree_path_node(path, path->level + 1)) {
-               btree_path_set_level_up(path);
+               btree_path_set_level_up(trans, path);
                return NULL;
        }
 
        if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
-               __bch2_btree_path_unlock(path);
-               path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
-               path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+               __bch2_btree_path_unlock(trans, path);
+               path->l[path->level].b          = ERR_PTR(-BCH_ERR_no_btree_node_relock);
+               path->l[path->level + 1].b      = ERR_PTR(-BCH_ERR_no_btree_node_relock);
                btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-               trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_,
-                                          path->btree_id, &path->pos);
-               btree_trans_restart(trans);
-               ret = -EINTR;
+               trace_trans_restart_relock_next_node(trans, _THIS_IP_, path);
+               ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
                goto err;
        }
 
        b = btree_path_node(path, path->level + 1);
 
        if (!bpos_cmp(iter->pos, b->key.k.p)) {
-               btree_node_unlock(path, path->level);
-               path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
-               path->level++;
+               __btree_path_set_level_up(trans, path, path->level++);
        } else {
                /*
                 * Haven't gotten to the end of the parent node: go back down to
@@ -2186,7 +2181,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
        iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
                                        iter->flags & BTREE_ITER_INTENT,
                                        btree_iter_ip_allocated(iter));
-       iter->path->should_be_locked = true;
+       btree_path_set_should_be_locked(iter->path);
        BUG_ON(iter->path->uptodate);
 out:
        bch2_btree_iter_verify_entry_exit(iter);
@@ -2328,7 +2323,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
        if (unlikely(ret))
                return bkey_s_c_err(ret);
 
-       iter->key_cache_path->should_be_locked = true;
+       btree_path_set_should_be_locked(iter->key_cache_path);
 
        return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
 }
@@ -2356,7 +2351,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
                        goto out;
                }
 
-               iter->path->should_be_locked = true;
+               btree_path_set_should_be_locked(iter->path);
 
                k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
 
@@ -2444,7 +2439,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
        while (1) {
                k = __bch2_btree_iter_peek(iter, search_key);
                if (!k.k || bkey_err(k))
-                       goto out;
+                       goto out_no_locked;
 
                /*
                 * iter->pos should be mononotically increasing, and always be
@@ -2461,7 +2456,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
                if (bkey_cmp(iter_pos, end) > 0) {
                        bch2_btree_iter_set_pos(iter, end);
                        k = bkey_s_c_null;
-                       goto out;
+                       goto out_no_locked;
                }
 
                if (iter->update_path &&
@@ -2523,18 +2518,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
        iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
                                iter->flags & BTREE_ITER_INTENT,
                                btree_iter_ip_allocated(iter));
-       BUG_ON(!iter->path->nodes_locked);
-out:
+
+       btree_path_set_should_be_locked(iter->path);
+out_no_locked:
        if (iter->update_path) {
                if (iter->update_path->uptodate &&
-                   !bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)) {
-                       k = bkey_s_c_err(-EINTR);
-               } else {
-                       BUG_ON(!(iter->update_path->nodes_locked & 1));
-                       iter->update_path->should_be_locked = true;
-               }
+                   (ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)))
+                       k = bkey_s_c_err(ret);
+               else
+                       btree_path_set_should_be_locked(iter->update_path);
        }
-       iter->path->should_be_locked = true;
 
        if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
                iter->pos.snapshot = iter->snapshot;
@@ -2578,13 +2571,13 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
                        /* ensure that iter->k is consistent with iter->pos: */
                        bch2_btree_iter_set_pos(iter, iter->pos);
                        k = bkey_s_c_err(ret);
-                       goto out;
+                       goto out_no_locked;
                }
 
                /* Already at end? */
                if (!btree_path_node(iter->path, iter->path->level)) {
                        k = bkey_s_c_null;
-                       goto out;
+                       goto out_no_locked;
                }
 
                k = btree_path_level_peek_all(trans->c,
@@ -2595,7 +2588,7 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
                    (iter->advanced &&
                     !bpos_cmp(path_l(iter->path)->b->key.k.p, iter->pos))) {
                        iter->pos = path_l(iter->path)->b->key.k.p;
-                       btree_path_set_level_up(iter->path);
+                       btree_path_set_level_up(trans, iter->path);
                        iter->advanced = false;
                        continue;
                }
@@ -2637,8 +2630,8 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
        }
 
        iter->pos = k.k->p;
-out:
-       iter->path->should_be_locked = true;
+       btree_path_set_should_be_locked(iter->path);
+out_no_locked:
        bch2_btree_iter_verify(iter);
 
        return k;
@@ -2692,16 +2685,16 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
                        /* ensure that iter->k is consistent with iter->pos: */
                        bch2_btree_iter_set_pos(iter, iter->pos);
                        k = bkey_s_c_err(ret);
-                       goto out;
+                       goto out_no_locked;
                }
 
-               k = btree_path_level_peek(trans->c, iter->path,
+               k = btree_path_level_peek(trans, iter->path,
                                          &iter->path->l[0], &iter->k);
                if (!k.k ||
                    ((iter->flags & BTREE_ITER_IS_EXTENTS)
                     ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
                     : bpos_cmp(k.k->p, search_key) > 0))
-                       k = btree_path_level_prev(trans->c, iter->path,
+                       k = btree_path_level_prev(trans, iter->path,
                                                  &iter->path->l[0], &iter->k);
 
                bch2_btree_path_check_sort(trans, iter->path, 0);
@@ -2758,7 +2751,7 @@ got_key:
                        /* Start of btree: */
                        bch2_btree_iter_set_pos(iter, POS_MIN);
                        k = bkey_s_c_null;
-                       goto out;
+                       goto out_no_locked;
                }
        }
 
@@ -2770,10 +2763,11 @@ got_key:
 
        if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
                iter->pos.snapshot = iter->snapshot;
-out:
+
+       btree_path_set_should_be_locked(iter->path);
+out_no_locked:
        if (saved_path)
                bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
-       iter->path->should_be_locked = true;
 
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
@@ -2846,9 +2840,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 
                if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
                    (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
-                       if (!bkey_err(k))
+                       if (bkey_err(k)) {
+                               goto out_no_locked;
+                       } else {
                                iter->k = *k.k;
-                       goto out;
+                               goto out;
+                       }
                }
 
                k = bch2_btree_path_peek_slot(iter->path, &iter->k);
@@ -2902,8 +2899,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                }
        }
 out:
-       iter->path->should_be_locked = true;
-
+       btree_path_set_should_be_locked(iter->path);
+out_no_locked:
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
        ret = bch2_btree_iter_verify_ret(iter, k);
@@ -3184,9 +3181,8 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
                trans->mem_bytes = new_bytes;
 
                if (old_bytes) {
-                       trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes);
-                       btree_trans_restart(trans);
-                       return ERR_PTR(-EINTR);
+                       trace_trans_restart_mem_realloced(trans, _RET_IP_, new_bytes);
+                       return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
                }
        }
 
@@ -3200,11 +3196,11 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
  * bch2_trans_begin() - reset a transaction after a interrupted attempt
  * @trans: transaction to reset
  *
- * While iterating over nodes or updating nodes a attempt to lock a btree
- * node may return EINTR when the trylock fails. When this occurs
- * bch2_trans_begin() should be called and the transaction retried.
+ * While iterating over nodes or updating nodes a attempt to lock a btree node
+ * may return BCH_ERR_transaction_restart when the trylock fails. When this
+ * occurs bch2_trans_begin() should be called and the transaction retried.
  */
-void bch2_trans_begin(struct btree_trans *trans)
+u32 bch2_trans_begin(struct btree_trans *trans)
 {
        struct btree_path *path;
 
@@ -3250,11 +3246,20 @@ void bch2_trans_begin(struct btree_trans *trans)
                bch2_trans_relock(trans);
        }
 
+       trans->last_restarted_ip = _RET_IP_;
        if (trans->restarted)
                bch2_btree_path_traverse_all(trans);
 
-       trans->restarted = false;
        trans->last_begin_time = ktime_get_ns();
+       return trans->restart_count;
+}
+
+void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count)
+{
+       bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans,
+               "trans->restart_count %u, should be %u, last restarted by %ps\n",
+               trans->restart_count, restart_count,
+               (void *) trans->last_restarted_ip);
 }
 
 static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
@@ -3291,6 +3296,15 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
        trans->last_begin_time  = ktime_get_ns();
        trans->task             = current;
 
+       while (c->lock_held_stats.names[trans->lock_name_idx] != fn
+              && c->lock_held_stats.names[trans->lock_name_idx] != 0)
+               trans->lock_name_idx++;
+
+       if (trans->lock_name_idx >= BCH_LOCK_TIME_NR)
+               pr_warn_once("lock_times array not big enough!");
+       else
+               c->lock_held_stats.names[trans->lock_name_idx] = fn;
+
        bch2_trans_alloc_paths(trans, c);
 
        if (expected_mem_bytes) {
@@ -3393,18 +3407,18 @@ void bch2_trans_exit(struct btree_trans *trans)
 
 static void __maybe_unused
 bch2_btree_path_node_to_text(struct printbuf *out,
-                            struct btree_bkey_cached_common *_b,
+                            struct btree_bkey_cached_common *b,
                             bool cached)
 {
        prt_printf(out, "    l=%u %s:",
-              _b->level, bch2_btree_ids[_b->btree_id]);
-       bch2_bpos_to_text(out, btree_node_pos(_b, cached));
+              b->level, bch2_btree_ids[b->btree_id]);
+       bch2_bpos_to_text(out, btree_node_pos(b, cached));
 }
 
 void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
 {
        struct btree_path *path;
-       struct btree *b;
+       struct btree_bkey_cached_common *b;
        static char lock_types[] = { 'r', 'i', 'w' };
        unsigned l;
 
@@ -3423,12 +3437,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
                prt_printf(out, "\n");
 
                for (l = 0; l < BTREE_MAX_DEPTH; l++) {
-                       if (btree_node_locked(path, l)) {
+                       if (btree_node_locked(path, l) &&
+                           !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
                                prt_printf(out, "    %s l=%u ",
                                       btree_node_intent_locked(path, l) ? "i" : "r", l);
-                               bch2_btree_path_node_to_text(out,
-                                               (void *) path->l[l].b,
-                                               path->cached);
+                               bch2_btree_path_node_to_text(out, b, path->cached);
                                prt_printf(out, "\n");
                        }
                }
@@ -3446,8 +3459,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
                bch2_bpos_to_text(out, trans->locking_pos);
 
                prt_printf(out, " node ");
-               bch2_btree_path_node_to_text(out,
-                               (void *) b, path->cached);
+               bch2_btree_path_node_to_text(out, b, path->cached);
                prt_printf(out, "\n");
        }
 }
index 4b9d03b875ef4740dd7524b441abb6801d11c0c6..f38fd25b49929484e54763dbae8215232d069646 100644 (file)
@@ -5,6 +5,8 @@
 #include "bset.h"
 #include "btree_types.h"
 
+#include <trace/events/bcachefs.h>
+
 static inline void __btree_path_get(struct btree_path *path, bool intent)
 {
        path->ref++;
@@ -159,19 +161,36 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *,
                              struct btree *, struct btree_node_iter *,
                              struct bkey_packed *, unsigned, unsigned);
 
-bool bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
+int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
 
 void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
 
-bool bch2_trans_relock(struct btree_trans *);
+int bch2_trans_relock(struct btree_trans *);
 void bch2_trans_unlock(struct btree_trans *);
 
+static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count)
+{
+       return restart_count != trans->restart_count;
+}
+
+void bch2_trans_verify_not_restarted(struct btree_trans *, u32);
+
+__always_inline
+static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
+{
+       BUG_ON(err <= 0);
+       BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
+
+       trans->restarted = err;
+       trans->restart_count++;
+       return -err;
+}
+
 __always_inline
-static inline int btree_trans_restart(struct btree_trans *trans)
+static inline int btree_trans_restart(struct btree_trans *trans, int err)
 {
-       trans->restarted = true;
-       bch2_trans_unlock(trans);
-       return -EINTR;
+       btree_trans_restart_nounlock(trans, err);
+       return -err;
 }
 
 bool bch2_btree_node_upgrade(struct btree_trans *,
@@ -191,14 +210,15 @@ static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
                : path->uptodate == BTREE_ITER_UPTODATE;
 }
 
-void __bch2_btree_path_downgrade(struct btree_path *, unsigned);
+void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned);
 
-static inline void bch2_btree_path_downgrade(struct btree_path *path)
+static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
+                                            struct btree_path *path)
 {
        unsigned new_locks_want = path->level + !!path->intent_ref;
 
        if (path->locks_want > new_locks_want)
-               __bch2_btree_path_downgrade(path, new_locks_want);
+               __bch2_btree_path_downgrade(trans, path, new_locks_want);
 }
 
 void bch2_trans_downgrade(struct btree_trans *);
@@ -279,11 +299,12 @@ void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
 
 static inline void set_btree_iter_dontneed(struct btree_iter *iter)
 {
-       iter->path->preserve = false;
+       if (!iter->trans->restarted)
+               iter->path->preserve = false;
 }
 
 void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-void bch2_trans_begin(struct btree_trans *);
+u32 bch2_trans_begin(struct btree_trans *);
 
 static inline struct btree *
 __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter)
@@ -291,7 +312,7 @@ __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter
        struct btree *b;
 
        while (b = bch2_btree_iter_peek_node(iter),
-              PTR_ERR_OR_ZERO(b) == -EINTR)
+              bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
                bch2_trans_begin(trans);
 
        return b;
@@ -315,6 +336,15 @@ static inline int bkey_err(struct bkey_s_c k)
        return PTR_ERR_OR_ZERO(k.k);
 }
 
+static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
+                                                            unsigned flags)
+{
+       BUG_ON(flags & BTREE_ITER_ALL_LEVELS);
+
+       return  flags & BTREE_ITER_SLOTS      ? bch2_btree_iter_peek_slot(iter) :
+                                               bch2_btree_iter_peek_prev(iter);
+}
+
 static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
                                                        unsigned flags)
 {
@@ -338,8 +368,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *
 
 static inline int btree_trans_too_many_iters(struct btree_trans *trans)
 {
-       return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2
-               ? -EINTR : 0;
+       if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) {
+               trace_trans_restart_too_many_iters(trans, _THIS_IP_);
+               return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
+       }
+
+       return 0;
 }
 
 static inline struct bkey_s_c
@@ -350,12 +384,52 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
 
        while (btree_trans_too_many_iters(trans) ||
               (k = bch2_btree_iter_peek_type(iter, flags),
-               bkey_err(k) == -EINTR))
+               bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
                bch2_trans_begin(trans);
 
        return k;
 }
 
+#define lockrestart_do(_trans, _do)                                    \
+({                                                                     \
+       int _ret;                                                       \
+                                                                       \
+       do {                                                            \
+               bch2_trans_begin(_trans);                               \
+               _ret = (_do);                                           \
+       } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart));  \
+                                                                       \
+       _ret;                                                           \
+})
+
+/*
+ * nested_lockrestart_do(), nested_commit_do():
+ *
+ * These are like lockrestart_do() and commit_do(), with two differences:
+ *
+ *  - We don't call bch2_trans_begin() unless we had a transaction restart
+ *  - We return -BCH_ERR_transaction_restart_nested if we succeeded after a
+ *  transaction restart
+ */
+#define nested_lockrestart_do(_trans, _do)                             \
+({                                                                     \
+       u32 _restart_count, _orig_restart_count;                        \
+       int _ret;                                                       \
+                                                                       \
+       _restart_count = _orig_restart_count = (_trans)->restart_count; \
+                                                                       \
+       while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\
+               _restart_count = bch2_trans_begin(_trans);              \
+                                                                       \
+       if (!_ret)                                                      \
+               bch2_trans_verify_not_restarted(_trans, _restart_count);\
+                                                                       \
+       if (!_ret && trans_was_restarted(_trans, _orig_restart_count))  \
+               _ret = -BCH_ERR_transaction_restart_nested;             \
+                                                                       \
+       _ret;                                                           \
+})
+
 #define for_each_btree_key2(_trans, _iter, _btree_id,                  \
                            _start, _flags, _k, _do)                    \
 ({                                                                     \
@@ -364,7 +438,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
        bch2_trans_iter_init((_trans), &(_iter), (_btree_id),           \
                             (_start), (_flags));                       \
                                                                        \
-       do {                                                            \
+       while (1) {                                                     \
                bch2_trans_begin(_trans);                               \
                (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags));   \
                if (!(_k).k) {                                          \
@@ -373,9 +447,42 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
                }                                                       \
                                                                        \
                _ret = bkey_err(_k) ?: (_do);                           \
-               if (!_ret)                                              \
-                       bch2_btree_iter_advance(&(_iter));              \
-       } while (_ret == 0 || _ret == -EINTR);                          \
+               if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+                       continue;                                       \
+               if (_ret)                                               \
+                       break;                                          \
+               if (!bch2_btree_iter_advance(&(_iter)))                 \
+                       break;                                          \
+       }                                                               \
+                                                                       \
+       bch2_trans_iter_exit((_trans), &(_iter));                       \
+       _ret;                                                           \
+})
+
+#define for_each_btree_key_reverse(_trans, _iter, _btree_id,           \
+                                  _start, _flags, _k, _do)             \
+({                                                                     \
+       int _ret = 0;                                                   \
+                                                                       \
+       bch2_trans_iter_init((_trans), &(_iter), (_btree_id),           \
+                            (_start), (_flags));                       \
+                                                                       \
+       while (1) {                                                     \
+               bch2_trans_begin(_trans);                               \
+               (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
+               if (!(_k).k) {                                          \
+                       _ret = 0;                                       \
+                       break;                                          \
+               }                                                       \
+                                                                       \
+               _ret = bkey_err(_k) ?: (_do);                           \
+               if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+                       continue;                                       \
+               if (_ret)                                               \
+                       break;                                          \
+               if (!bch2_btree_iter_rewind(&(_iter)))                  \
+                       break;                                          \
+       }                                                               \
                                                                        \
        bch2_trans_iter_exit((_trans), &(_iter));                       \
        _ret;                                                           \
index a5b0a956e70568125d2a41f47bd475562066060f..fa90581f7ade7d6869e5bc8244e489f9d819bfaf 100644 (file)
@@ -5,6 +5,7 @@
 #include "btree_key_cache.h"
 #include "btree_locking.h"
 #include "btree_update.h"
+#include "errcode.h"
 #include "error.h"
 #include "journal.h"
 #include "journal_reclaim.h"
@@ -290,9 +291,8 @@ static int btree_key_cache_fill(struct btree_trans *trans,
        k = bch2_btree_path_peek_slot(path, &u);
 
        if (!bch2_btree_node_relock(trans, ck_path, 0)) {
-               trace_trans_restart_relock_key_cache_fill(trans->fn,
-                               _THIS_IP_, ck_path->btree_id, &ck_path->pos);
-               ret = btree_trans_restart(trans);
+               trace_trans_restart_relock_key_cache_fill(trans, _THIS_IP_, ck_path);
+               ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
                goto err;
        }
 
@@ -347,8 +347,10 @@ static int bkey_cached_check_fn(struct six_lock *lock, void *p)
        struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
        const struct btree_path *path = p;
 
-       return ck->key.btree_id == path->btree_id &&
-               !bpos_cmp(ck->key.pos, path->pos) ? 0 : -1;
+       if (ck->key.btree_id != path->btree_id &&
+           bpos_cmp(ck->key.pos, path->pos))
+               return BCH_ERR_lock_fail_node_reused;
+       return 0;
 }
 
 __flatten
@@ -370,11 +372,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
 retry:
        ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
        if (!ck) {
-               if (flags & BTREE_ITER_CACHED_NOCREATE) {
-                       path->l[0].b = NULL;
-                       return 0;
-               }
-
                ck = btree_key_cache_create(c, path->btree_id, path->pos);
                ret = PTR_ERR_OR_ZERO(ck);
                if (ret)
@@ -387,14 +384,15 @@ retry:
        } else {
                enum six_lock_type lock_want = __btree_lock_want(path, 0);
 
-               if (!btree_node_lock(trans, path, (void *) ck, path->pos, 0,
-                                    lock_want,
-                                    bkey_cached_check_fn, path, _THIS_IP_)) {
-                       if (!trans->restarted)
+               ret = btree_node_lock(trans, path, (void *) ck, path->pos, 0,
+                                     lock_want,
+                                     bkey_cached_check_fn, path, _THIS_IP_);
+               if (ret) {
+                       if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
                                goto retry;
-
-                       ret = -EINTR;
-                       goto err;
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                               goto err;
+                       BUG();
                }
 
                if (ck->key.btree_id != path->btree_id ||
@@ -409,11 +407,15 @@ retry:
        path->l[0].lock_seq     = ck->c.lock.state.seq;
        path->l[0].b            = (void *) ck;
 fill:
-       if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) {
+       if (!ck->valid) {
+               /*
+                * Using the underscore version because we haven't set
+                * path->uptodate yet:
+                */
                if (!path->locks_want &&
                    !__bch2_btree_path_upgrade(trans, path, 1)) {
-                       trace_transaction_restart_ip(trans->fn, _THIS_IP_);
-                       ret = btree_trans_restart(trans);
+                       trace_transaction_restart_key_cache_upgrade(trans, _THIS_IP_);
+                       ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
                        goto err;
                }
 
@@ -426,13 +428,14 @@ fill:
                set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
 
        path->uptodate = BTREE_ITER_UPTODATE;
+       BUG_ON(!ck->valid);
        BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
 
        return ret;
 err:
-       if (ret != -EINTR) {
-               btree_node_unlock(path, 0);
-               path->l[0].b = BTREE_ITER_NO_NODE_ERROR;
+       if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+               btree_node_unlock(trans, path, 0);
+               path->l[0].b = ERR_PTR(ret);
        }
        return ret;
 }
@@ -455,8 +458,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                             BTREE_ITER_ALL_SNAPSHOTS);
        bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
                             BTREE_ITER_CACHED|
-                            BTREE_ITER_CACHED_NOFILL|
-                            BTREE_ITER_CACHED_NOCREATE|
                             BTREE_ITER_INTENT);
        b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
 
@@ -497,13 +498,14 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                                   ? JOURNAL_WATERMARK_reserved
                                   : 0)|
                                  commit_flags);
-       if (ret) {
-               bch2_fs_fatal_err_on(ret != -EINTR &&
-                                    ret != -EAGAIN &&
-                                    !bch2_journal_error(j), c,
-                       "error flushing key cache: %i", ret);
+
+       bch2_fs_fatal_err_on(ret &&
+                            !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+                            !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
+                            !bch2_journal_error(j), c,
+                            "error flushing key cache: %s", bch2_err_str(ret));
+       if (ret)
                goto out;
-       }
 
        bch2_journal_pin_drop(j, &ck->journal);
        bch2_journal_preres_put(j, &ck->res);
index 67c970d727ac09089c96920084a58e5c132a69bf..c3f3cb8733f249c0f2831c145a6f8e16e643fe8e 100644 (file)
 
 #include "btree_iter.h"
 
+static inline bool is_btree_node(struct btree_path *path, unsigned l)
+{
+       return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b);
+}
+
 /* matches six lock types */
 enum btree_node_locked_type {
        BTREE_NODE_UNLOCKED             = -1,
@@ -58,7 +63,7 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
        path->nodes_intent_locked &= ~(1 << level);
 }
 
-static inline void mark_btree_node_locked(struct btree_trans *trans,
+static inline void mark_btree_node_locked_noreset(struct btree_trans *trans,
                                          struct btree_path *path,
                                          unsigned level,
                                          enum six_lock_type type)
@@ -73,11 +78,22 @@ static inline void mark_btree_node_locked(struct btree_trans *trans,
        path->nodes_intent_locked |= type << level;
 }
 
+static inline void mark_btree_node_locked(struct btree_trans *trans,
+                                         struct btree_path *path,
+                                         unsigned level,
+                                         enum six_lock_type type)
+{
+       mark_btree_node_locked_noreset(trans, path, level, type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+       path->l[level].lock_taken_time = ktime_get_ns();
+#endif
+}
+
 static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
                                                 struct btree_path *path,
                                                 unsigned level)
 {
-       mark_btree_node_locked(trans, path, level, SIX_LOCK_intent);
+       mark_btree_node_locked_noreset(trans, path, level, SIX_LOCK_intent);
 }
 
 static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
@@ -99,23 +115,35 @@ btree_lock_want(struct btree_path *path, int level)
        return BTREE_NODE_UNLOCKED;
 }
 
-static inline void btree_node_unlock(struct btree_path *path, unsigned level)
+static inline void btree_node_unlock(struct btree_trans *trans,
+                                    struct btree_path *path, unsigned level)
 {
        int lock_type = btree_node_locked_type(path, level);
 
        EBUG_ON(level >= BTREE_MAX_DEPTH);
 
-       if (lock_type != BTREE_NODE_UNLOCKED)
+       if (lock_type != BTREE_NODE_UNLOCKED) {
                six_unlock_type(&path->l[level].b->c.lock, lock_type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+               if (trans->lock_name_idx < BCH_LOCK_TIME_NR) {
+                       struct bch_fs *c = trans->c;
+
+                       __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx],
+                                              path->l[level].lock_taken_time,
+                                                ktime_get_ns());
+               }
+#endif
+       }
        mark_btree_node_unlocked(path, level);
 }
 
-static inline void __bch2_btree_path_unlock(struct btree_path *path)
+static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
+                                           struct btree_path *path)
 {
        btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK);
 
        while (path->nodes_locked)
-               btree_node_unlock(path, __ffs(path->nodes_locked));
+               btree_node_unlock(trans, path, __ffs(path->nodes_locked));
 }
 
 static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
@@ -132,7 +160,7 @@ static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
        }
 }
 
-static inline bool btree_node_lock_type(struct btree_trans *trans,
+static inline int btree_node_lock_type(struct btree_trans *trans,
                                       struct btree_path *path,
                                       struct btree *b,
                                       struct bpos pos, unsigned level,
@@ -141,10 +169,10 @@ static inline bool btree_node_lock_type(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        u64 start_time;
-       bool ret;
+       int ret;
 
        if (six_trylock_type(&b->c.lock, type))
-               return true;
+               return 0;
 
        start_time = local_clock();
 
@@ -153,14 +181,15 @@ static inline bool btree_node_lock_type(struct btree_trans *trans,
        trans->locking_btree_id = path->btree_id;
        trans->locking_level    = level;
        trans->locking_lock_type = type;
-       trans->locking          = b;
-       ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
+       trans->locking          = &b->c;
+       ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p);
        trans->locking = NULL;
 
        if (ret)
-               bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
+               return ret;
 
-       return ret;
+       bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
+       return 0;
 }
 
 /*
@@ -183,26 +212,34 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
        return false;
 }
 
-bool __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
-                           struct btree *, struct bpos, unsigned,
-                           enum six_lock_type,
-                           six_lock_should_sleep_fn, void *,
-                           unsigned long);
+int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
+                          struct btree *, struct bpos, unsigned,
+                          enum six_lock_type,
+                          six_lock_should_sleep_fn, void *,
+                          unsigned long);
 
-static inline bool btree_node_lock(struct btree_trans *trans,
+static inline int btree_node_lock(struct btree_trans *trans,
                        struct btree_path *path,
                        struct btree *b, struct bpos pos, unsigned level,
                        enum six_lock_type type,
                        six_lock_should_sleep_fn should_sleep_fn, void *p,
                        unsigned long ip)
 {
+       int ret = 0;
+
        EBUG_ON(level >= BTREE_MAX_DEPTH);
        EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
 
-       return likely(six_trylock_type(&b->c.lock, type)) ||
-               btree_node_lock_increment(trans, b, level, type) ||
-               __bch2_btree_node_lock(trans, path, b, pos, level, type,
-                                      should_sleep_fn, p, ip);
+       if (likely(six_trylock_type(&b->c.lock, type)) ||
+           btree_node_lock_increment(trans, b, level, type) ||
+           !(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type,
+                                          should_sleep_fn, p, ip))) {
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+               path->l[b->c.level].lock_taken_time = ktime_get_ns();
+#endif
+       }
+
+       return ret;
 }
 
 bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
@@ -254,6 +291,30 @@ static inline void bch2_btree_node_lock_write(struct btree_trans *trans,
                __bch2_btree_node_lock_write(trans, b);
 }
 
-#endif /* _BCACHEFS_BTREE_LOCKING_H */
+static inline void btree_path_set_should_be_locked(struct btree_path *path)
+{
+       EBUG_ON(!btree_node_locked(path, path->level));
+       EBUG_ON(path->uptodate);
 
+       path->should_be_locked = true;
+}
 
+static inline void __btree_path_set_level_up(struct btree_trans *trans,
+                                     struct btree_path *path,
+                                     unsigned l)
+{
+       btree_node_unlock(trans, path, l);
+       path->l[l].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
+}
+
+static inline void btree_path_set_level_up(struct btree_trans *trans,
+                                   struct btree_path *path)
+{
+       __btree_path_set_level_up(trans, path, path->level++);
+       btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+}
+
+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
+                               struct btree_path *, struct btree *, unsigned);
+
+#endif /* _BCACHEFS_BTREE_LOCKING_H */
index be12c9ff7ea3cf7d823d8a6dff0ce162892c6d23..1ff999179b4e1ca2e651168aee383f4019662c81 100644 (file)
@@ -199,15 +199,13 @@ struct btree_node_iter {
 #define BTREE_ITER_IS_EXTENTS          (1 << 4)
 #define BTREE_ITER_NOT_EXTENTS         (1 << 5)
 #define BTREE_ITER_CACHED              (1 << 6)
-#define BTREE_ITER_CACHED_NOFILL       (1 << 7)
-#define BTREE_ITER_CACHED_NOCREATE     (1 << 8)
-#define BTREE_ITER_WITH_KEY_CACHE      (1 << 9)
-#define BTREE_ITER_WITH_UPDATES                (1 << 10)
-#define BTREE_ITER_WITH_JOURNAL                (1 << 11)
-#define __BTREE_ITER_ALL_SNAPSHOTS     (1 << 12)
-#define BTREE_ITER_ALL_SNAPSHOTS       (1 << 13)
-#define BTREE_ITER_FILTER_SNAPSHOTS    (1 << 14)
-#define BTREE_ITER_NOPRESERVE          (1 << 15)
+#define BTREE_ITER_WITH_KEY_CACHE      (1 << 7)
+#define BTREE_ITER_WITH_UPDATES                (1 << 8)
+#define BTREE_ITER_WITH_JOURNAL                (1 << 9)
+#define __BTREE_ITER_ALL_SNAPSHOTS     (1 << 10)
+#define BTREE_ITER_ALL_SNAPSHOTS       (1 << 11)
+#define BTREE_ITER_FILTER_SNAPSHOTS    (1 << 12)
+#define BTREE_ITER_NOPRESERVE          (1 << 13)
 
 enum btree_path_uptodate {
        BTREE_ITER_UPTODATE             = 0,
@@ -215,15 +213,6 @@ enum btree_path_uptodate {
        BTREE_ITER_NEED_TRAVERSE        = 2,
 };
 
-#define BTREE_ITER_NO_NODE_GET_LOCKS   ((struct btree *) 1)
-#define BTREE_ITER_NO_NODE_DROP                ((struct btree *) 2)
-#define BTREE_ITER_NO_NODE_LOCK_ROOT   ((struct btree *) 3)
-#define BTREE_ITER_NO_NODE_UP          ((struct btree *) 4)
-#define BTREE_ITER_NO_NODE_DOWN                ((struct btree *) 5)
-#define BTREE_ITER_NO_NODE_INIT                ((struct btree *) 6)
-#define BTREE_ITER_NO_NODE_ERROR       ((struct btree *) 7)
-#define BTREE_ITER_NO_NODE_CACHED      ((struct btree *) 8)
-
 struct btree_path {
        u8                      idx;
        u8                      sorted_idx;
@@ -251,6 +240,9 @@ struct btree_path {
                struct btree    *b;
                struct btree_node_iter iter;
                u32             lock_seq;
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+               u64             lock_taken_time;
+#endif
        }                       l[BTREE_MAX_DEPTH];
 #ifdef CONFIG_BCACHEFS_DEBUG
        unsigned long           ip_allocated;
@@ -391,7 +383,7 @@ struct btree_trans {
        const char              *fn;
        struct list_head        list;
        u64                     last_begin_time;
-       struct btree            *locking;
+       struct btree_bkey_cached_common *locking;
        unsigned                locking_path_idx;
        struct bpos             locking_pos;
        u8                      locking_btree_id;
@@ -405,9 +397,12 @@ struct btree_trans {
        u8                      traverse_all_idx;
        bool                    used_mempool:1;
        bool                    in_traverse_all:1;
-       bool                    restarted:1;
        bool                    memory_allocation_failure:1;
        bool                    is_initial_gc:1;
+       enum bch_errcode        restarted:16;
+       u32                     restart_count;
+       unsigned long           last_restarted_ip;
+
        /*
         * For when bch2_trans_update notices we'll be splitting a compressed
         * extent:
@@ -437,6 +432,7 @@ struct btree_trans {
        unsigned                journal_u64s;
        unsigned                journal_preres_u64s;
        struct replicas_delta_list *fs_usage_deltas;
+       int                      lock_name_idx;
 };
 
 #define BTREE_FLAGS()                                                  \
index e9127dbf7e241ab62491af2e800dd15c8aa56d4b..89941fb8caa06f35f00339e597f5a0bccfe2721c 100644 (file)
@@ -90,7 +90,6 @@ int bch2_trans_log_msg(struct btree_trans *, const char *);
  * This is main entry point for btree updates.
  *
  * Return values:
- * -EINTR: locking changed, this function should be called again.
  * -EROFS: filesystem read only
  * -EIO: journal or btree node IO error
  */
@@ -106,29 +105,33 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
        return __bch2_trans_commit(trans);
 }
 
-#define lockrestart_do(_trans, _do)                                    \
+#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do)        \
+       lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+                                       (_journal_seq), (_flags)))
+
+#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
+       nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+                                       (_journal_seq), (_flags)))
+
+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)                \
 ({                                                                     \
+       struct btree_trans trans;                                       \
        int _ret;                                                       \
                                                                        \
-       do {                                                            \
-               bch2_trans_begin(_trans);                               \
-               _ret = (_do);                                           \
-       } while (_ret == -EINTR);                                       \
+       bch2_trans_init(&trans, (_c), 0, 0);                            \
+       _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
+       bch2_trans_exit(&trans);                                        \
                                                                        \
        _ret;                                                           \
 })
 
-#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do)        \
-       lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
-                                       (_journal_seq), (_flags)))
-
-#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)                \
+#define bch2_trans_run(_c, _do)                                                \
 ({                                                                     \
        struct btree_trans trans;                                       \
        int _ret;                                                       \
                                                                        \
        bch2_trans_init(&trans, (_c), 0, 0);                            \
-       _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
+       _ret = (_do);                                                   \
        bch2_trans_exit(&trans);                                        \
                                                                        \
        _ret;                                                           \
index c3ef2387ddad23db4809b08ab063950e25c0338c..e4138614e0810558c9cc66d3dc85e231878ce797 100644 (file)
@@ -1005,9 +1005,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                nr_nodes[1] += 1;
 
        if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) {
-               trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_,
-                                                path->btree_id, &path->pos);
-               ret = btree_trans_restart(trans);
+               trace_trans_restart_iter_upgrade(trans, _RET_IP_, path);
+               ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
                return ERR_PTR(ret);
        }
 
@@ -1016,9 +1015,10 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        else if (!down_read_trylock(&c->gc_lock)) {
                bch2_trans_unlock(trans);
                down_read(&c->gc_lock);
-               if (!bch2_trans_relock(trans)) {
+               ret = bch2_trans_relock(trans);
+               if (ret) {
                        up_read(&c->gc_lock);
-                       return ERR_PTR(-EINTR);
+                       return ERR_PTR(ret);
                }
        }
 
@@ -1060,8 +1060,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                                      journal_flags);
        if (ret) {
                bch2_btree_update_free(as);
-               trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_);
-               btree_trans_restart(trans);
+               trace_trans_restart_journal_preres_get(trans, _RET_IP_);
+               ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
                return ERR_PTR(ret);
        }
 
@@ -1076,10 +1076,9 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        if (ret)
                goto err;
 
-       if (!bch2_trans_relock(trans)) {
-               ret = -EINTR;
+       ret = bch2_trans_relock(trans);
+       if (ret)
                goto err;
-       }
 
        return as;
 err:
@@ -1650,7 +1649,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       sib_path->should_be_locked = true;
+       btree_path_set_should_be_locked(sib_path);
 
        m = sib_path->l[level].b;
 
@@ -1830,7 +1829,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 
        bch2_btree_update_done(as);
 out:
-       bch2_btree_path_downgrade(iter->path);
+       bch2_btree_path_downgrade(trans, iter->path);
        return ret;
 }
 
@@ -1943,10 +1942,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
                BUG_ON(iter2.path->level != b->c.level);
                BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p));
 
-               btree_node_unlock(iter2.path, iter2.path->level);
-               path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP;
-               iter2.path->level++;
-               btree_path_set_dirty(iter2.path, BTREE_ITER_NEED_TRAVERSE);
+               btree_path_set_level_up(trans, iter2.path);
 
                bch2_btree_path_check_sort(trans, iter2.path, 0);
 
@@ -2017,10 +2013,8 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
        int ret = 0;
 
        if (!btree_node_intent_locked(path, b->c.level) &&
-           !bch2_btree_path_upgrade(trans, path, b->c.level + 1)) {
-               btree_trans_restart(trans);
-               return -EINTR;
-       }
+           !bch2_btree_path_upgrade(trans, path, b->c.level + 1))
+               return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
 
        closure_init_stack(&cl);
 
@@ -2033,8 +2027,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
                if (ret) {
                        bch2_trans_unlock(trans);
                        closure_sync(&cl);
-                       if (!bch2_trans_relock(trans))
-                               return -EINTR;
+                       ret = bch2_trans_relock(trans);
+                       if (ret)
+                               return ret;
                }
 
                new_hash = bch2_btree_node_mem_alloc(c, false);
index aed26b5790e87ab2cfbc3f22bbd856111de0dd8f..dd832f12b128555d45c2c084ef9a07da98ec97e2 100644 (file)
@@ -10,6 +10,7 @@
 #include "btree_locking.h"
 #include "buckets.h"
 #include "debug.h"
+#include "errcode.h"
 #include "error.h"
 #include "extent_update.h"
 #include "journal.h"
@@ -282,9 +283,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
        if (ret)
                return ret;
 
-       if (!bch2_trans_relock(trans)) {
-               trace_trans_restart_journal_preres_get(trans->fn, trace_ip);
-               return -EINTR;
+       ret = bch2_trans_relock(trans);
+       if (ret) {
+               trace_trans_restart_journal_preres_get(trans, trace_ip);
+               return ret;
        }
 
        return 0;
@@ -373,15 +375,8 @@ btree_key_can_insert_cached(struct btree_trans *trans,
         * Keys returned by peek() are no longer valid pointers, so we need a
         * transaction restart:
         */
-       trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_,
-                                            path->btree_id, &path->pos,
-                                            old_u64s, new_u64s);
-       /*
-        * Not using btree_trans_restart() because we can't unlock here, we have
-        * write locks held:
-        */
-       trans->restarted = true;
-       return -EINTR;
+       trace_trans_restart_key_cache_key_realloced(trans, _RET_IP_, path, old_u64s, new_u64s);
+       return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced);
 }
 
 /* Triggers: */
@@ -572,9 +567,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
        int ret;
 
        if (race_fault()) {
-               trace_trans_restart_fault_inject(trans->fn, trace_ip);
-               trans->restarted = true;
-               return -EINTR;
+               trace_trans_restart_fault_inject(trans, trace_ip);
+               return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject);
        }
 
        /*
@@ -726,8 +720,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
                        btree_insert_key_leaf(trans, i);
                else if (!i->key_cache_already_flushed)
                        bch2_btree_insert_key_cached(trans, i->path, i->k);
-               else
+               else {
                        bch2_btree_key_cache_drop(trans, i->path);
+                       btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE);
+               }
        }
 
        return ret;
@@ -806,6 +802,7 @@ static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct
 static inline int trans_lock_write(struct btree_trans *trans)
 {
        struct btree_insert_entry *i;
+       int ret;
 
        trans_for_each_update(trans, i) {
                if (same_leaf_as_prev(trans, i))
@@ -815,10 +812,11 @@ static inline int trans_lock_write(struct btree_trans *trans)
                        if (have_conflicting_read_lock(trans, i->path))
                                goto fail;
 
-                       btree_node_lock_type(trans, i->path,
+                       ret = btree_node_lock_type(trans, i->path,
                                             insert_l(i)->b,
                                             i->path->pos, i->level,
                                             SIX_LOCK_write, NULL, NULL);
+                       BUG_ON(ret);
                }
 
                bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
@@ -833,8 +831,8 @@ fail:
                bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b);
        }
 
-       trace_trans_restart_would_deadlock_write(trans->fn);
-       return btree_trans_restart(trans);
+       trace_trans_restart_would_deadlock_write(trans);
+       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
 }
 
 static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
@@ -965,12 +963,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
        switch (ret) {
        case BTREE_INSERT_BTREE_NODE_FULL:
                ret = bch2_btree_split_leaf(trans, i->path, trans->flags);
-               if (!ret)
-                       return 0;
-
-               if (ret == -EINTR)
-                       trace_trans_restart_btree_node_split(trans->fn, trace_ip,
-                                               i->btree_id, &i->path->pos);
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                       trace_trans_restart_btree_node_split(trans, trace_ip, i->path);
                break;
        case BTREE_INSERT_NEED_MARK_REPLICAS:
                bch2_trans_unlock(trans);
@@ -979,19 +973,16 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (ret)
                        break;
 
-               if (bch2_trans_relock(trans))
-                       return 0;
-
-               trace_trans_restart_mark_replicas(trans->fn, trace_ip);
-               ret = -EINTR;
+               ret = bch2_trans_relock(trans);
+               if (ret)
+                       trace_trans_restart_mark_replicas(trans, trace_ip);
                break;
        case BTREE_INSERT_NEED_JOURNAL_RES:
                bch2_trans_unlock(trans);
 
                if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
                    !(trans->flags & JOURNAL_WATERMARK_reserved)) {
-                       trans->restarted = true;
-                       ret = -EAGAIN;
+                       ret = -BCH_ERR_journal_reclaim_would_deadlock;
                        break;
                }
 
@@ -999,34 +990,30 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (ret)
                        break;
 
-               if (bch2_trans_relock(trans))
-                       return 0;
-
-               trace_trans_restart_journal_res_get(trans->fn, trace_ip);
-               ret = -EINTR;
+               ret = bch2_trans_relock(trans);
+               if (ret)
+                       trace_trans_restart_journal_res_get(trans, trace_ip);
                break;
        case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
                bch2_trans_unlock(trans);
 
-               trace_trans_blocked_journal_reclaim(trans->fn, trace_ip);
+               trace_trans_blocked_journal_reclaim(trans, trace_ip);
 
                wait_event_freezable(c->journal.reclaim_wait,
                                     (ret = journal_reclaim_wait_done(c)));
                if (ret < 0)
                        break;
 
-               if (bch2_trans_relock(trans))
-                       return 0;
-
-               trace_trans_restart_journal_reclaim(trans->fn, trace_ip);
-               ret = -EINTR;
+               ret = bch2_trans_relock(trans);
+               if (ret)
+                       trace_trans_restart_journal_reclaim(trans, trace_ip);
                break;
        default:
                BUG_ON(ret >= 0);
                break;
        }
 
-       BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted);
+       BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
        BUG_ON(ret == -ENOSPC &&
               !(trans->flags & BTREE_INSERT_NOWAIT) &&
               (trans->flags & BTREE_INSERT_NOFAIL));
@@ -1046,13 +1033,11 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
 
        bch2_trans_unlock(trans);
 
-       ret = bch2_fs_read_write_early(c);
+       ret =   bch2_fs_read_write_early(c) ?:
+               bch2_trans_relock(trans);
        if (ret)
                return ret;
 
-       if (!bch2_trans_relock(trans))
-               return -EINTR;
-
        percpu_ref_get(&c->writes);
        return 0;
 }
@@ -1122,9 +1107,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
                BUG_ON(!i->path->should_be_locked);
 
                if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) {
-                       trace_trans_restart_upgrade(trans->fn, _RET_IP_,
-                                                   i->btree_id, &i->path->pos);
-                       ret = btree_trans_restart(trans);
+                       trace_trans_restart_upgrade(trans, _RET_IP_, i->path);
+                       ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
                        goto out;
                }
 
@@ -1164,7 +1148,7 @@ retry:
        if (ret)
                goto err;
 
-       trace_transaction_commit(trans->fn, _RET_IP_);
+       trace_transaction_commit(trans, _RET_IP_);
 out:
        bch2_journal_preres_put(&c->journal, &trans->journal_preres);
 
@@ -1567,7 +1551,7 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
                if (ret)
                        goto err;
 
-               btree_path->should_be_locked = true;
+               btree_path_set_should_be_locked(btree_path);
                ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip);
 err:
                bch2_path_put(trans, btree_path, true);
@@ -1633,12 +1617,11 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
                        ck = (void *) iter->key_cache_path->l[0].b;
 
                        if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-                               trace_trans_restart_key_cache_raced(trans->fn, _RET_IP_);
-                               btree_trans_restart(trans);
-                               return -EINTR;
+                               trace_trans_restart_key_cache_raced(trans, _RET_IP_);
+                               return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
                        }
 
-                       iter->key_cache_path->should_be_locked = true;
+                       btree_path_set_should_be_locked(iter->key_cache_path);
                }
 
                path = iter->key_cache_path;
@@ -1763,7 +1746,7 @@ retry:
                        break;
        }
 
-       if (ret == -EINTR) {
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
                ret = 0;
                goto retry;
        }
@@ -1782,9 +1765,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
                            unsigned update_flags,
                            u64 *journal_seq)
 {
-       return bch2_trans_do(c, NULL, journal_seq, 0,
-                            bch2_btree_delete_range_trans(&trans, id, start, end,
-                                                          update_flags, journal_seq));
+       return bch2_trans_run(c,
+                       bch2_btree_delete_range_trans(&trans, id, start, end, update_flags, journal_seq));
 }
 
 int bch2_trans_log_msg(struct btree_trans *trans, const char *msg)
index fe2cd730aee01c0eaff4c8c1a730aeef34e519c3..b4be2122c2d5eb72f8d6112e1a5d7f3696cab97d 100644 (file)
@@ -544,22 +544,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
                }
        }
 
-       if (new_a.data_type == BCH_DATA_free &&
-           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
-               closure_wake_up(&c->freelist_wait);
-
-       if (new_a.data_type == BCH_DATA_need_discard &&
-           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
-               bch2_do_discards(c);
-
-       if (old_a.data_type != BCH_DATA_cached &&
-           new_a.data_type == BCH_DATA_cached &&
-           should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
-               bch2_do_invalidates(c);
-
-       if (new_a.data_type == BCH_DATA_need_gc_gens)
-               bch2_do_gc_gens(c);
-
        percpu_down_read(&c->mark_lock);
        if (!gc && new_a.gen != old_a.gen)
                *bucket_gen(ca, new.k->p.offset) = new_a.gen;
@@ -599,6 +583,22 @@ int bch2_mark_alloc(struct btree_trans *trans,
                }
        }
 
+       if (new_a.data_type == BCH_DATA_free &&
+           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+               closure_wake_up(&c->freelist_wait);
+
+       if (new_a.data_type == BCH_DATA_need_discard &&
+           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+               bch2_do_discards(c);
+
+       if (old_a.data_type != BCH_DATA_cached &&
+           new_a.data_type == BCH_DATA_cached &&
+           should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+               bch2_do_invalidates(c);
+
+       if (new_a.data_type == BCH_DATA_need_gc_gens)
+               bch2_do_gc_gens(c);
+
        return 0;
 }
 
@@ -1939,8 +1939,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
 
 int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
 {
-       return bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
-                       __bch2_trans_mark_dev_sb(&trans, ca));
+       return bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca));
 }
 
 /* Disk reservations: */
index 7c2af6754aeaaf603970cf719270c1a2032bfe1d..b5850a761b91037dbbffb56eb5a7e1ae37f7cde6 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "checksum.h"
+#include "errcode.h"
 #include "super.h"
 #include "super-io.h"
 
@@ -527,7 +528,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
 
        ret = bch2_request_key(c->disk_sb.sb, &user_key);
        if (ret) {
-               bch_err(c, "error requesting encryption key: %i", ret);
+               bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -552,20 +553,24 @@ err:
 
 static int bch2_alloc_ciphers(struct bch_fs *c)
 {
+       int ret;
+
        if (!c->chacha20)
                c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
-       if (IS_ERR(c->chacha20)) {
-               bch_err(c, "error requesting chacha20 module: %li",
-                       PTR_ERR(c->chacha20));
-               return PTR_ERR(c->chacha20);
+       ret = PTR_ERR_OR_ZERO(c->chacha20);
+
+       if (ret) {
+               bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret));
+               return ret;
        }
 
        if (!c->poly1305)
                c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
-       if (IS_ERR(c->poly1305)) {
-               bch_err(c, "error requesting poly1305 module: %li",
-                       PTR_ERR(c->poly1305));
-               return PTR_ERR(c->poly1305);
+       ret = PTR_ERR_OR_ZERO(c->poly1305);
+
+       if (ret) {
+               bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret));
+               return ret;
        }
 
        return 0;
@@ -626,7 +631,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
        if (keyed) {
                ret = bch2_request_key(c->disk_sb.sb, &user_key);
                if (ret) {
-                       bch_err(c, "error requesting encryption key: %i", ret);
+                       bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
                        goto err;
                }
 
@@ -678,9 +683,9 @@ int bch2_fs_encryption_init(struct bch_fs *c)
        pr_verbose_init(c->opts, "");
 
        c->sha256 = crypto_alloc_shash("sha256", 0, 0);
-       if (IS_ERR(c->sha256)) {
-               bch_err(c, "error requesting sha256 module");
-               ret = PTR_ERR(c->sha256);
+       ret = PTR_ERR_OR_ZERO(c->sha256);
+       if (ret) {
+               bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret));
                goto out;
        }
 
index c181dba6028d2393b5b149777ffdfa671012e73d..3b442b01ca869c1a869e30c0ddaaf516bbd2d8d1 100644 (file)
@@ -236,7 +236,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
                                bch2_ob_add_backpointer(c, ec_ob, &insert->k);
                }
 err:
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        ret = 0;
                if (ret)
                        break;
@@ -272,7 +272,7 @@ out:
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&_insert, c);
        bch2_bkey_buf_exit(&_new, c);
-       BUG_ON(ret == -EINTR);
+       BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
        return ret;
 }
 
index 05cae0ed41ae7c07361e0b512c6fb61e5464891e..cd37a1016e259b1f0e040520e8b9d7e71449f317 100644 (file)
@@ -189,6 +189,7 @@ struct dump_iter {
        struct bch_fs           *c;
        enum btree_id           id;
        struct bpos             from;
+       struct bpos             prev_node;
        u64                     iter;
 
        struct printbuf         buf;
@@ -258,39 +259,30 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
        i->size = size;
        i->ret  = 0;
 
-       err = flush_buf(i);
-       if (err)
-               return err;
-
-       if (!i->size)
-               return i->ret;
-
        bch2_trans_init(&trans, i->c, 0, 0);
 
-       bch2_trans_iter_init(&trans, &iter, i->id, i->from,
-                            BTREE_ITER_PREFETCH|
-                            BTREE_ITER_ALL_SNAPSHOTS);
-       k = bch2_btree_iter_peek(&iter);
-
-       while (k.k && !(err = bkey_err(k))) {
-               bch2_bkey_val_to_text(&i->buf, i->c, k);
-               prt_char(&i->buf, '\n');
-
-               k = bch2_btree_iter_next(&iter);
-               i->from = iter.pos;
-
+       err = for_each_btree_key2(&trans, iter, i->id, i->from,
+                                 BTREE_ITER_PREFETCH|
+                                 BTREE_ITER_ALL_SNAPSHOTS, k, ({
                err = flush_buf(i);
                if (err)
                        break;
 
                if (!i->size)
                        break;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+
+               bch2_bkey_val_to_text(&i->buf, i->c, k);
+               prt_newline(&i->buf);
+               0;
+       }));
+       i->from = iter.pos;
+
+       if (!err)
+               err = flush_buf(i);
 
        bch2_trans_exit(&trans);
 
-       return err < 0 ? err : i->ret;
+       return err ?: i->ret;
 }
 
 static const struct file_operations btree_debug_ops = {
@@ -360,7 +352,6 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       struct btree *prev_node = NULL;
        int err;
 
        i->ubuf = buf;
@@ -376,44 +367,36 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
 
        bch2_trans_init(&trans, i->c, 0, 0);
 
-       bch2_trans_iter_init(&trans, &iter, i->id, i->from,
-                            BTREE_ITER_PREFETCH|
-                            BTREE_ITER_ALL_SNAPSHOTS);
-
-       while ((k = bch2_btree_iter_peek(&iter)).k &&
-              !(err = bkey_err(k))) {
+       err = for_each_btree_key2(&trans, iter, i->id, i->from,
+                                 BTREE_ITER_PREFETCH|
+                                 BTREE_ITER_ALL_SNAPSHOTS, k, ({
                struct btree_path_level *l = &iter.path->l[0];
                struct bkey_packed *_k =
                        bch2_btree_node_iter_peek(&l->iter, l->b);
 
-               if (l->b != prev_node) {
-                       bch2_btree_node_to_text(&i->buf, i->c, l->b);
-                       err = flush_buf(i);
-                       if (err)
-                               break;
-               }
-               prev_node = l->b;
-
-               bch2_bfloat_to_text(&i->buf, l->b, _k);
-               err = flush_buf(i);
-               if (err)
-                       break;
-
-               bch2_btree_iter_advance(&iter);
-               i->from = iter.pos;
-
                err = flush_buf(i);
                if (err)
                        break;
 
                if (!i->size)
                        break;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+
+               if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) {
+                       bch2_btree_node_to_text(&i->buf, i->c, l->b);
+                       i->prev_node = l->b->key.k.p;
+               }
+
+               bch2_bfloat_to_text(&i->buf, l->b, _k);
+               0;
+       }));
+       i->from = iter.pos;
+
+       if (!err)
+               err = flush_buf(i);
 
        bch2_trans_exit(&trans);
 
-       return err < 0 ? err : i->ret;
+       return err ?: i->ret;
 }
 
 static const struct file_operations bfloat_failed_debug_ops = {
@@ -636,6 +619,75 @@ static const struct file_operations journal_pins_ops = {
        .read           = bch2_journal_pins_read,
 };
 
+static int lock_held_stats_open(struct inode *inode, struct file *file)
+{
+       struct bch_fs *c = inode->i_private;
+       struct dump_iter *i;
+
+       i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+
+       if (!i)
+               return -ENOMEM;
+
+       i->iter = 0;
+       i->c    = c;
+       i->buf  = PRINTBUF;
+       file->private_data = i;
+
+       return 0;
+}
+
+static int lock_held_stats_release(struct inode *inode, struct file *file)
+{
+       struct dump_iter *i = file->private_data;
+
+       printbuf_exit(&i->buf);
+       kfree(i);
+
+       return 0;
+}
+
+static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
+                                     size_t size, loff_t *ppos)
+{
+       struct dump_iter        *i = file->private_data;
+       struct lock_held_stats *lhs = &i->c->lock_held_stats;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) {
+               err = flush_buf(i);
+               if (err)
+                       return err;
+
+               if (!i->size)
+                       break;
+
+               prt_printf(&i->buf, "%s:", lhs->names[i->iter]);
+               prt_newline(&i->buf);
+               printbuf_indent_add(&i->buf, 8);
+               bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]);
+               printbuf_indent_sub(&i->buf, 8);
+               prt_newline(&i->buf);
+               i->iter++;
+       }
+
+       if (i->buf.allocation_failure)
+               return -ENOMEM;
+
+       return i->ret;
+}
+
+static const struct file_operations lock_held_stats_op = {
+       .owner = THIS_MODULE,
+       .open = lock_held_stats_open,
+       .release = lock_held_stats_release,
+       .read = lock_held_stats_read,
+};
+
 void bch2_fs_debug_exit(struct bch_fs *c)
 {
        if (!IS_ERR_OR_NULL(c->fs_debug_dir))
@@ -664,6 +716,11 @@ void bch2_fs_debug_init(struct bch_fs *c)
        debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
                            c->btree_debug, &journal_pins_ops);
 
+       if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+               debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir,
+                               c, &lock_held_stats_op);
+       }
+
        c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
        if (IS_ERR_OR_NULL(c->btree_debug_dir))
                return;
index 0cbb765cde54e682ad1cff7dc39f0683ef24b261..4d942d224a088303fd9a79684fed652ac48daf6d 100644 (file)
@@ -471,7 +471,7 @@ retry:
 
        ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
                                          name, inum, 0);
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
        if (!ret)
                bch2_trans_iter_exit(&trans, &iter);
@@ -556,7 +556,7 @@ retry:
        }
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
index 6ce352c526f012e38c7359547ffb53a333eca35e..f33acf1af1109ea091bbd171ce9fca51421a12f6 100644 (file)
@@ -572,18 +572,14 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans,
                               struct btree_iter *iter)
 {
        size_t idx = iter->pos.offset;
-       int ret = 0;
 
        if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_NOWAIT|__GFP_NOWARN))
-               return ret;
+               return 0;
 
        bch2_trans_unlock(trans);
-       ret = -EINTR;
 
-       if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL))
-               return ret;
-
-       return -ENOMEM;
+       return   __ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL) ?:
+               bch2_trans_relock(trans);
 }
 
 static ssize_t stripe_idx_to_delete(struct bch_fs *c)
@@ -726,7 +722,7 @@ static int ec_stripe_bkey_insert(struct btree_trans *trans,
        struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_stripes, start_pos,
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
                if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
                        if (start_pos.offset) {
@@ -740,12 +736,13 @@ static int ec_stripe_bkey_insert(struct btree_trans *trans,
                }
 
                if (bkey_deleted(k.k))
-                       goto found_slot;
+                       break;
        }
 
-       goto err;
-found_slot:
-       start_pos = iter.pos;
+       c->ec_stripe_hint = iter.pos.offset;
+
+       if (ret)
+               goto err;
 
        ret = ec_stripe_mem_alloc(trans, &iter);
        if (ret)
@@ -754,8 +751,6 @@ found_slot:
        stripe->k.p = iter.pos;
 
        ret = bch2_trans_update(trans, &iter, &stripe->k_i, 0);
-
-       c->ec_stripe_hint = start_pos.offset;
 err:
        bch2_trans_iter_exit(trans, &iter);
 
@@ -822,80 +817,62 @@ static void extent_stripe_ptr_add(struct bkey_s_extent e,
        };
 }
 
-static int ec_stripe_update_ptrs(struct bch_fs *c,
-                                struct ec_stripe_buf *s,
-                                struct bkey *pos)
+static int ec_stripe_update_extent(struct btree_trans *trans,
+                                  struct btree_iter *iter,
+                                  struct bkey_s_c k,
+                                  struct ec_stripe_buf *s,
+                                  struct bpos end)
 {
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct bkey_s_extent e;
-       struct bkey_buf sk;
-       struct bpos next_pos;
-       int ret = 0, dev, block;
-
-       bch2_bkey_buf_init(&sk);
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
-       /* XXX this doesn't support the reflink btree */
-
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
-                            bkey_start_pos(pos),
-                            BTREE_ITER_INTENT);
-retry:
-       while (bch2_trans_begin(&trans),
-              (k = bch2_btree_iter_peek(&iter)).k &&
-              !(ret = bkey_err(k)) &&
-              bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
-               const struct bch_extent_ptr *ptr_c;
-               struct bch_extent_ptr *ptr, *ec_ptr = NULL;
-
-               if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
-                       bch2_btree_iter_advance(&iter);
-                       continue;
-               }
+       const struct bch_extent_ptr *ptr_c;
+       struct bch_extent_ptr *ptr, *ec_ptr = NULL;
+       struct bkey_i *n;
+       int ret, dev, block;
 
-               ptr_c = bkey_matches_stripe(&s->key.v, k, &block);
-               /*
-                * It doesn't generally make sense to erasure code cached ptrs:
-                * XXX: should we be incrementing a counter?
-                */
-               if (!ptr_c || ptr_c->cached) {
-                       bch2_btree_iter_advance(&iter);
-                       continue;
-               }
+       if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
+               return 1;
 
-               dev = s->key.v.ptrs[block].dev;
+       if (extent_has_stripe_ptr(k, s->key.k.p.offset))
+               return 0;
 
-               bch2_bkey_buf_reassemble(&sk, c, k);
-               e = bkey_i_to_s_extent(sk.k);
+       ptr_c = bkey_matches_stripe(&s->key.v, k, &block);
+       /*
+        * It doesn't generally make sense to erasure code cached ptrs:
+        * XXX: should we be incrementing a counter?
+        */
+       if (!ptr_c || ptr_c->cached)
+               return 0;
 
-               bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
-               ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
-               BUG_ON(!ec_ptr);
+       dev = s->key.v.ptrs[block].dev;
 
-               extent_stripe_ptr_add(e, s, ec_ptr, block);
+       n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       ret = PTR_ERR_OR_ZERO(n);
+       if (ret)
+               return ret;
 
-               bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k));
-               next_pos = sk.k->k.p;
+       bkey_reassemble(n, k);
 
-               ret   = bch2_btree_iter_traverse(&iter) ?:
-                       bch2_trans_update(&trans, &iter, sk.k, 0) ?:
-                       bch2_trans_commit(&trans, NULL, NULL,
-                                       BTREE_INSERT_NOFAIL);
-               if (!ret)
-                       bch2_btree_iter_set_pos(&iter, next_pos);
-               if (ret)
-                       break;
-       }
-       if (ret == -EINTR)
-               goto retry;
-       bch2_trans_iter_exit(&trans, &iter);
+       bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
+       ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev);
+       BUG_ON(!ec_ptr);
 
-       bch2_trans_exit(&trans);
-       bch2_bkey_buf_exit(&sk, c);
+       extent_stripe_ptr_add(bkey_i_to_s_extent(n), s, ec_ptr, block);
 
-       return ret;
+       return bch2_trans_update(trans, iter, n, 0);
+}
+
+static int ec_stripe_update_extents(struct bch_fs *c,
+                                struct ec_stripe_buf *s,
+                                struct bkey *pos)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+
+       return bch2_trans_run(c,
+               for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_extents, bkey_start_pos(pos),
+                       BTREE_ITER_NOT_EXTENTS|BTREE_ITER_INTENT, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL,
+               ec_stripe_update_extent(&trans, &iter, k, s, pos->p)));
 }
 
 /*
@@ -966,9 +943,10 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        }
 
        for_each_keylist_key(&s->keys, k) {
-               ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k);
+               ret = ec_stripe_update_extents(c, &s->new_stripe, &k->k);
                if (ret) {
-                       bch_err(c, "error creating stripe: error %i updating pointers", ret);
+                       bch_err(c, "error creating stripe: error updating pointers: %s",
+                               bch2_err_str(ret));
                        break;
                }
        }
diff --git a/libbcachefs/errcode.c b/libbcachefs/errcode.c
new file mode 100644 (file)
index 0000000..9da8a59
--- /dev/null
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "errcode.h"
+
+#include <linux/errname.h>
+
+static const char * const bch2_errcode_strs[] = {
+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = #err,
+       BCH_ERRCODES()
+#undef x
+       NULL
+};
+
+#define BCH_ERR_0      0
+
+static unsigned bch2_errcode_parents[] = {
+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = BCH_ERR_##class,
+       BCH_ERRCODES()
+#undef x
+};
+
+const char *bch2_err_str(int err)
+{
+       const char *errstr;
+       err = abs(err);
+
+       BUG_ON(err >= BCH_ERR_MAX);
+
+       if (err >= BCH_ERR_START)
+               errstr = bch2_errcode_strs[err - BCH_ERR_START];
+       else if (err)
+               errstr = errname(err);
+       else
+               errstr = "(No error)";
+       return errstr ?: "(Invalid error)";
+}
+
+bool __bch2_err_matches(int err, int class)
+{
+       err     = abs(err);
+       class   = abs(class);
+
+       BUG_ON(err      >= BCH_ERR_MAX);
+       BUG_ON(class    >= BCH_ERR_MAX);
+
+       while (err >= BCH_ERR_START && err != class)
+               err = bch2_errcode_parents[err - BCH_ERR_START];
+
+       return err == class;
+}
index 0581f3c7a0d82c421ef2250486112dc107abe391..15a1be2fcc84e68df6205bc8560fffda72e2a4d1 100644 (file)
@@ -2,12 +2,73 @@
 #ifndef _BCACHEFS_ERRCODE_H
 #define _BCACHEFS_ERRCODE_H
 
-enum {
-       /* Bucket allocator: */
-       OPEN_BUCKETS_EMPTY =    2048,
-       FREELIST_EMPTY,         /* Allocator thread not keeping up */
-       INSUFFICIENT_DEVICES,
-       NEED_SNAPSHOT_CLEANUP,
+#define BCH_ERRCODES()                                                 \
+       x(0,                    open_buckets_empty)                     \
+       x(0,                    freelist_empty)                         \
+       x(freelist_empty,       no_buckets_found)                       \
+       x(0,                    insufficient_devices)                   \
+       x(0,                    transaction_restart)                    \
+       x(transaction_restart,  transaction_restart_fault_inject)       \
+       x(transaction_restart,  transaction_restart_relock)             \
+       x(transaction_restart,  transaction_restart_relock_path)        \
+       x(transaction_restart,  transaction_restart_relock_path_intent) \
+       x(transaction_restart,  transaction_restart_relock_after_fill)  \
+       x(transaction_restart,  transaction_restart_too_many_iters)     \
+       x(transaction_restart,  transaction_restart_lock_node_reused)   \
+       x(transaction_restart,  transaction_restart_fill_relock)        \
+       x(transaction_restart,  transaction_restart_fill_mem_alloc_fail)\
+       x(transaction_restart,  transaction_restart_mem_realloced)      \
+       x(transaction_restart,  transaction_restart_in_traverse_all)    \
+       x(transaction_restart,  transaction_restart_would_deadlock)     \
+       x(transaction_restart,  transaction_restart_would_deadlock_write)\
+       x(transaction_restart,  transaction_restart_upgrade)            \
+       x(transaction_restart,  transaction_restart_key_cache_upgrade)  \
+       x(transaction_restart,  transaction_restart_key_cache_fill)     \
+       x(transaction_restart,  transaction_restart_key_cache_raced)    \
+       x(transaction_restart,  transaction_restart_key_cache_realloced)\
+       x(transaction_restart,  transaction_restart_journal_preres_get) \
+       x(transaction_restart,  transaction_restart_nested)             \
+       x(0,                    no_btree_node)                          \
+       x(no_btree_node,        no_btree_node_relock)                   \
+       x(no_btree_node,        no_btree_node_upgrade)                  \
+       x(no_btree_node,        no_btree_node_drop)                     \
+       x(no_btree_node,        no_btree_node_lock_root)                \
+       x(no_btree_node,        no_btree_node_up)                       \
+       x(no_btree_node,        no_btree_node_down)                     \
+       x(no_btree_node,        no_btree_node_init)                     \
+       x(no_btree_node,        no_btree_node_cached)                   \
+       x(0,                    lock_fail_node_reused)                  \
+       x(0,                    lock_fail_root_changed)                 \
+       x(0,                    journal_reclaim_would_deadlock)         \
+       x(0,                    fsck)                                   \
+       x(fsck,                 fsck_fix)                               \
+       x(fsck,                 fsck_ignore)                            \
+       x(fsck,                 fsck_errors_not_fixed)                  \
+       x(fsck,                 fsck_repair_unimplemented)              \
+       x(fsck,                 fsck_repair_impossible)                 \
+       x(0,                    need_snapshot_cleanup)                  \
+       x(0,                    need_topology_repair)
+
+enum bch_errcode {
+       BCH_ERR_START           = 2048,
+#define x(class, err) BCH_ERR_##err,
+       BCH_ERRCODES()
+#undef x
+       BCH_ERR_MAX
 };
 
+const char *bch2_err_str(int);
+bool __bch2_err_matches(int, int);
+
+static inline bool _bch2_err_matches(int err, int class)
+{
+       return err && __bch2_err_matches(err, class);
+}
+
+#define bch2_err_matches(_err, _class)                 \
+({                                                     \
+       BUILD_BUG_ON(!__builtin_constant_p(_class));    \
+       _bch2_err_matches(_err, _class);                \
+})
+
 #endif /* _BCACHFES_ERRCODE_H */
index 8279a9ba76a5c5e91524512d31e966e566cb240e..f6a895b2ceb7d5d38801b15fc21cc94572b6c80c 100644 (file)
@@ -68,8 +68,7 @@ void bch2_io_error(struct bch_dev *ca)
 #include "tools-util.h"
 #endif
 
-enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
-                               const char *fmt, ...)
+int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
 {
        struct fsck_err_state *s = NULL;
        va_list args;
@@ -83,10 +82,10 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
 
                if (c->opts.errors == BCH_ON_ERROR_continue) {
                        bch_err(c, "fixing");
-                       return FSCK_ERR_FIX;
+                       return -BCH_ERR_fsck_fix;
                } else {
                        bch2_inconsistent_error(c);
-                       return FSCK_ERR_EXIT;
+                       return -BCH_ERR_fsck_errors_not_fixed;
                }
        }
 
@@ -156,14 +155,14 @@ print:
 
        if (fix) {
                set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-               return FSCK_ERR_FIX;
+               return -BCH_ERR_fsck_fix;
        } else {
                set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
                set_bit(BCH_FS_ERROR, &c->flags);
                return c->opts.fix_errors == FSCK_OPT_EXIT ||
                        !(flags & FSCK_CAN_IGNORE)
-                       ? FSCK_ERR_EXIT
-                       : FSCK_ERR_IGNORE;
+                       ? -BCH_ERR_fsck_errors_not_fixed
+                       : -BCH_ERR_fsck_ignore;
        }
 }
 
index 6e63c38186f30167a88ec6590a3f0395e4023fdf..b603d738c54927e785e60ee2d14260b096c07f8a 100644 (file)
@@ -91,14 +91,6 @@ do {                                                                 \
  * be able to repair:
  */
 
-enum {
-       BCH_FSCK_OK                     = 0,
-       BCH_FSCK_ERRORS_NOT_FIXED       = 1,
-       BCH_FSCK_REPAIR_UNIMPLEMENTED   = 2,
-       BCH_FSCK_REPAIR_IMPOSSIBLE      = 3,
-       BCH_FSCK_UNKNOWN_VERSION        = 4,
-};
-
 enum fsck_err_opts {
        FSCK_OPT_EXIT,
        FSCK_OPT_YES,
@@ -106,13 +98,6 @@ enum fsck_err_opts {
        FSCK_OPT_ASK,
 };
 
-enum fsck_err_ret {
-       FSCK_ERR_IGNORE = 0,
-       FSCK_ERR_FIX    = 1,
-       FSCK_ERR_EXIT   = 2,
-       FSCK_ERR_START_TOPOLOGY_REPAIR = 3,
-};
-
 struct fsck_err_state {
        struct list_head        list;
        const char              *fmt;
@@ -127,21 +112,21 @@ struct fsck_err_state {
 #define FSCK_NO_RATELIMIT      (1 << 3)
 
 __printf(3, 4) __cold
-enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
-                               unsigned, const char *, ...);
+int bch2_fsck_err(struct bch_fs *, unsigned, const char *, ...);
 void bch2_flush_fsck_errs(struct bch_fs *);
 
 #define __fsck_err(c, _flags, msg, ...)                                        \
 ({                                                                     \
-       int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
+       int _ret = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);        \
                                                                        \
-       if (_fix == FSCK_ERR_EXIT) {                                    \
+       if (_ret != -BCH_ERR_fsck_fix &&                                \
+           _ret != -BCH_ERR_fsck_ignore) {                             \
                bch_err(c, "Unable to continue, halting");              \
-               ret = BCH_FSCK_ERRORS_NOT_FIXED;                        \
+               ret = _ret;                                             \
                goto fsck_err;                                          \
        }                                                               \
                                                                        \
-       _fix;                                                           \
+       _ret == -BCH_ERR_fsck_fix;                                      \
 })
 
 /* These macros return true if error should be fixed: */
index bcfd9e5f3c2f7c40a04993f8fc74f91631a8d4bd..0a7f172f11c64c15e07beb16d591baf65286fa13 100644 (file)
@@ -409,7 +409,7 @@ retry:
        offset = iter.pos.offset;
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
        bch2_trans_exit(&trans);
 
@@ -850,13 +850,13 @@ void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
        bch2_clear_page_bits(&folio->page);
 }
 
-int bch2_releasepage(struct page *page, gfp_t gfp_mask)
+bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
 {
-       if (PageDirty(page))
-               return 0;
+       if (folio_test_dirty(folio) || folio_test_writeback(folio))
+               return false;
 
-       bch2_clear_page_bits(page);
-       return 1;
+       bch2_clear_page_bits(&folio->page);
+       return true;
 }
 
 #ifdef CONFIG_MIGRATION
@@ -1045,10 +1045,9 @@ retry:
                 * read_extent -> io_time_reset may cause a transaction restart
                 * without returning an error, we need to check for that here:
                 */
-               if (!bch2_trans_relock(trans)) {
-                       ret = -EINTR;
+               ret = bch2_trans_relock(trans);
+               if (ret)
                        break;
-               }
 
                bch2_btree_iter_set_pos(&iter,
                                POS(inum.inum, rbio->bio.bi_iter.bi_sector));
@@ -1101,7 +1100,7 @@ retry:
 err:
        bch2_trans_iter_exit(trans, &iter);
 
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        if (ret) {
@@ -1175,20 +1174,6 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
        bch2_trans_exit(&trans);
 }
 
-int bch2_readpage(struct file *file, struct page *page)
-{
-       struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
-       struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
-       struct bch_read_bio *rbio;
-
-       rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &c->bio_read), opts);
-       rbio->bio.bi_end_io = bch2_readpages_end_io;
-
-       __bchfs_readpage(c, rbio, inode_inum(inode), page);
-       return 0;
-}
-
 static void bch2_read_single_page_end_io(struct bio *bio)
 {
        complete(bio->bi_private);
@@ -1221,6 +1206,16 @@ static int bch2_read_single_page(struct page *page,
        return 0;
 }
 
+int bch2_read_folio(struct file *file, struct folio *folio)
+{
+       struct page *page = &folio->page;
+       int ret;
+
+       ret = bch2_read_single_page(page, page->mapping);
+       folio_unlock(folio);
+       return ret;
+}
+
 /* writepages: */
 
 struct bch_writepage_state {
@@ -1512,7 +1507,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
 /* buffered writes: */
 
 int bch2_write_begin(struct file *file, struct address_space *mapping,
-                    loff_t pos, unsigned len, unsigned flags,
+                    loff_t pos, unsigned len,
                     struct page **pagep, void **fsdata)
 {
        struct bch_inode_info *inode = to_bch_ei(mapping->host);
@@ -1532,7 +1527,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
 
        bch2_pagecache_add_get(&inode->ei_pagecache_lock);
 
-       page = grab_cache_page_write_begin(mapping, index, flags);
+       page = grab_cache_page_write_begin(mapping, index);
        if (!page)
                goto err_unlock;
 
@@ -1663,7 +1658,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
        bch2_page_reservation_init(c, inode, &res);
 
        for (i = 0; i < nr_pages; i++) {
-               pages[i] = grab_cache_page_write_begin(mapping, index + i, 0);
+               pages[i] = grab_cache_page_write_begin(mapping, index + i);
                if (!pages[i]) {
                        nr_pages = i;
                        if (!i) {
@@ -2073,7 +2068,7 @@ retry:
        offset = iter.pos.offset;
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (err == -EINTR)
+       if (bch2_err_matches(err, BCH_ERR_transaction_restart))
                goto retry;
        bch2_trans_exit(&trans);
 
@@ -2449,7 +2444,7 @@ retry:
        start = iter.pos;
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
@@ -2839,7 +2834,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
        bch2_trans_copy_iter(&dst, &src);
        bch2_trans_copy_iter(&del, &src);
 
-       while (ret == 0 || ret == -EINTR) {
+       while (ret == 0 ||
+              bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
                struct disk_reservation disk_res =
                        bch2_disk_reservation_init(c, 0);
                struct bkey_i delete;
@@ -3041,7 +3037,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 bkey_err:
                bch2_quota_reservation_put(c, inode, &quota_res);
                bch2_disk_reservation_put(c, &disk_res);
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        ret = 0;
        }
 
@@ -3321,7 +3317,7 @@ retry:
        }
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
@@ -3436,7 +3432,7 @@ retry:
        }
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
index 7f2d7f454be4c6aa4bed2cd5dfd6130b384c2d14..a22a4e95731bea34d5c9c7872379db514ed68ecb 100644 (file)
@@ -15,13 +15,13 @@ int __must_check bch2_write_inode_size(struct bch_fs *,
                                       struct bch_inode_info *,
                                       loff_t, unsigned);
 
-int bch2_readpage(struct file *, struct page *);
+int bch2_read_folio(struct file *, struct folio *);
 
 int bch2_writepages(struct address_space *, struct writeback_control *);
 void bch2_readahead(struct readahead_control *);
 
 int bch2_write_begin(struct file *, struct address_space *, loff_t,
-                    unsigned, unsigned, struct page **, void **);
+                    unsigned, struct page **, void **);
 int bch2_write_end(struct file *, struct address_space *, loff_t,
                   unsigned, unsigned, struct page *, void *);
 
@@ -42,7 +42,7 @@ loff_t bch2_llseek(struct file *, loff_t, int);
 vm_fault_t bch2_page_fault(struct vm_fault *);
 vm_fault_t bch2_page_mkwrite(struct vm_fault *);
 void bch2_invalidate_folio(struct folio *, size_t, size_t);
-int bch2_releasepage(struct page *, gfp_t);
+bool bch2_release_folio(struct folio *, gfp_t);
 int bch2_migrate_page(struct address_space *, struct page *,
                      struct page *, enum migrate_mode);
 
index 2354c98945170c511557f7a21b36e43ad260964a..3e2b6097819b159e653d3cacfb21de91b1bda668 100644 (file)
@@ -8,6 +8,7 @@
 #include "buckets.h"
 #include "chardev.h"
 #include "dirent.h"
+#include "errcode.h"
 #include "extents.h"
 #include "fs.h"
 #include "fs-common.h"
@@ -153,7 +154,7 @@ retry:
 
        bch2_trans_iter_exit(&trans, &iter);
 
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
@@ -323,7 +324,7 @@ retry:
                bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
                                KEY_TYPE_QUOTA_WARN);
 err_before_quota:
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        goto retry;
                goto err_trans;
        }
@@ -754,7 +755,7 @@ retry:
 btree_err:
        bch2_trans_iter_exit(&trans, &inode_iter);
 
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
        if (unlikely(ret))
                goto err_trans;
@@ -985,7 +986,7 @@ retry:
        start = iter.pos.offset;
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        if (!ret && have_extent)
@@ -1112,14 +1113,14 @@ static const struct inode_operations bch_special_inode_operations = {
 };
 
 static const struct address_space_operations bch_address_space_operations = {
-       .readpage       = bch2_readpage,
+       .read_folio     = bch2_read_folio,
        .writepages     = bch2_writepages,
        .readahead      = bch2_readahead,
        .dirty_folio    = filemap_dirty_folio,
        .write_begin    = bch2_write_begin,
        .write_end      = bch2_write_end,
        .invalidate_folio = bch2_invalidate_folio,
-       .releasepage    = bch2_releasepage,
+       .release_folio  = bch2_release_folio,
        .direct_IO      = noop_direct_IO,
 #ifdef CONFIG_MIGRATION
        .migratepage    = bch2_migrate_page,
@@ -1335,7 +1336,7 @@ found:
        memcpy(name, d.v->d_name, name_len);
        name[name_len] = '\0';
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_iter_exit(&trans, &iter1);
@@ -1870,10 +1871,9 @@ got_sb:
        sb->s_shrink.seeks = 0;
 
        vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
-       if (IS_ERR(vinode)) {
-               bch_err(c, "error mounting: error getting root inode %i",
-                       (int) PTR_ERR(vinode));
-               ret = PTR_ERR(vinode);
+       ret = PTR_ERR_OR_ZERO(vinode);
+       if (ret) {
+               bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
                goto err_put_super;
        }
 
index 6165878c2ddc4c90172b4aa7d7c7d0d370f97788..c93e177a314ffc38abe016fc085b3a23db02a467 100644 (file)
@@ -136,9 +136,9 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
 
        ret = bch2_inode_unpack(k, inode);
 err:
-       if (ret && ret != -EINTR)
-               bch_err(trans->c, "error %i fetching inode %llu",
-                       ret, inode_nr);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(trans->c, "error fetching inode %llu: %s",
+                       inode_nr, bch2_err_str(ret));
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
@@ -164,9 +164,9 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
        if (!ret)
                *snapshot = iter.pos.snapshot;
 err:
-       if (ret && ret != -EINTR)
-               bch_err(trans->c, "error %i fetching inode %llu:%u",
-                       ret, inode_nr, *snapshot);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(trans->c, "error fetching inode %llu:%u: %s",
+                       inode_nr, *snapshot, bch2_err_str(ret));
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
@@ -225,7 +225,8 @@ static int write_inode(struct btree_trans *trans,
                                  BTREE_INSERT_LAZY_RW,
                                  __write_inode(trans, inode, snapshot));
        if (ret)
-               bch_err(trans->c, "error in fsck: error %i updating inode", ret);
+               bch_err(trans->c, "error in fsck: error updating inode: %s",
+                       bch2_err_str(ret));
        return ret;
 }
 
@@ -286,7 +287,7 @@ retry:
                                BTREE_INSERT_NOFAIL);
 err:
        bch2_trans_iter_exit(trans, &iter);
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        return ret;
@@ -313,8 +314,8 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
                                  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
        bch2_trans_iter_exit(trans, &iter);
 err:
-       if (ret && ret != -EINTR)
-               bch_err(c, "error %i from __remove_dirent()", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(c, "error from __remove_dirent(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -349,8 +350,8 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
                goto create_lostfound;
        }
 
-       if (ret && ret != -EINTR)
-               bch_err(c, "error looking up lost+found: %i", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret));
        if (ret)
                return ret;
 
@@ -372,8 +373,8 @@ create_lostfound:
                                lostfound, &lostfound_str,
                                0, 0, S_IFDIR|0700, 0, NULL, NULL,
                                (subvol_inum) { }, 0);
-       if (ret && ret != -EINTR)
-               bch_err(c, "error creating lost+found: %i", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(c, "error creating lost+found: %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -437,8 +438,8 @@ static int reattach_inode(struct btree_trans *trans,
                                  BTREE_INSERT_NOFAIL,
                        __reattach_inode(trans, inode, inode_snapshot));
        if (ret) {
-               bch_err(trans->c, "error %i reattaching inode %llu",
-                       ret, inode->bi_inum);
+               bch_err(trans->c, "error reattaching inode %llu: %s",
+                       inode->bi_inum, bch2_err_str(ret));
                return ret;
        }
 
@@ -518,7 +519,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
                .id     = pos.snapshot,
                .equiv  = bch2_snapshot_equiv(c, pos.snapshot),
        };
-       int ret;
+       int ret = 0;
 
        if (bkey_cmp(s->pos, pos))
                s->ids.nr = 0;
@@ -528,14 +529,13 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
 
        darray_for_each(s->ids, i)
                if (i->equiv == n.equiv) {
-                       if (i->id != n.id) {
-                               bch_err(c, "snapshot deletion did not run correctly:\n"
+                       if (fsck_err_on(i->id != n.id, c,
+                                       "snapshot deletion did not run correctly:\n"
                                        "  duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
                                        bch2_btree_ids[btree_id],
                                        pos.inode, pos.offset,
-                                       i->id, n.id, n.equiv);
-                               return -NEED_SNAPSHOT_CLEANUP;
-                       }
+                                       i->id, n.id, n.equiv))
+                               return -BCH_ERR_need_snapshot_cleanup;
 
                        return 0;
                }
@@ -544,6 +544,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
        if (ret)
                bch_err(c, "error reallocating snapshots_seen table (size %zu)",
                        s->ids.size);
+fsck_err:
        return ret;
 }
 
@@ -649,6 +650,7 @@ static int __walk_inode(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
        struct bkey_s_c k;
+       u32 restart_count = trans->restart_count;
        unsigned i;
        int ret;
 
@@ -676,6 +678,10 @@ static int __walk_inode(struct btree_trans *trans,
 
        w->cur_inum             = pos.inode;
        w->first_this_inode     = true;
+
+       if (trans_was_restarted(trans, restart_count))
+               return -BCH_ERR_transaction_restart_nested;
+
 lookup_snapshot:
        for (i = 0; i < w->inodes.nr; i++)
                if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->inodes.data[i].snapshot))
@@ -837,15 +843,14 @@ bad_hash:
                     "hashed to %llu\n%s",
                     bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
                     (printbuf_reset(&buf),
-                     bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE)
-               return 0;
-
-       ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
-       if (ret) {
-               bch_err(c, "hash_redo_key err %i", ret);
-               return ret;
+                     bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
+               ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
+               if (ret) {
+                       bch_err(c, "hash_redo_key err %s", bch2_err_str(ret));
+                       return ret;
+               }
+               ret = -BCH_ERR_transaction_restart_nested;
        }
-       ret = -EINTR;
 fsck_err:
        goto out;
 }
@@ -910,7 +915,8 @@ static int check_inode(struct btree_trans *trans,
 
                ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
                if (ret)
-                       bch_err(c, "error in fsck: error %i while deleting inode", ret);
+                       bch_err(c, "error in fsck: error while deleting inode: %s",
+                               bch2_err_str(ret));
                return ret;
        }
 
@@ -933,7 +939,8 @@ static int check_inode(struct btree_trans *trans,
                                POS(u.bi_inum, U64_MAX),
                                0, NULL);
                if (ret) {
-                       bch_err(c, "error in fsck: error %i truncating inode", ret);
+                       bch_err(c, "error in fsck: error truncating inode: %s",
+                               bch2_err_str(ret));
                        return ret;
                }
 
@@ -958,8 +965,8 @@ static int check_inode(struct btree_trans *trans,
 
                sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
                if (sectors < 0) {
-                       bch_err(c, "error in fsck: error %i recounting inode sectors",
-                               (int) sectors);
+                       bch_err(c, "error in fsck: error recounting inode sectors: %s",
+                               bch2_err_str(sectors));
                        return sectors;
                }
 
@@ -978,13 +985,13 @@ static int check_inode(struct btree_trans *trans,
        if (do_update) {
                ret = __write_inode(trans, &u, iter->pos.snapshot);
                if (ret)
-                       bch_err(c, "error in fsck: error %i "
-                               "updating inode", ret);
+                       bch_err(c, "error in fsck: error updating inode: %s",
+                               bch2_err_str(ret));
        }
 err:
 fsck_err:
        if (ret)
-               bch_err(c, "error %i from check_inode()", ret);
+               bch_err(c, "error from check_inode(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1003,16 +1010,14 @@ static int check_inodes(struct bch_fs *c, bool full)
 
        ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
                        POS_MIN,
-                       BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
-                       k,
-                       NULL, NULL,
-                       BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+                       BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+                       NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
                check_inode(&trans, &iter, k, &prev, &s, full));
 
        bch2_trans_exit(&trans);
        snapshots_seen_exit(&s);
        if (ret)
-               bch_err(c, "error %i from check_inodes()", ret);
+               bch_err(c, "error from check_inodes(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1115,15 +1120,15 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
 {
        struct bch_fs *c = trans->c;
        struct inode_walker_entry *i;
-       int ret = 0, ret2 = 0;
+       u32 restart_count = trans->restart_count;
+       int ret = 0;
        s64 count2;
 
        darray_for_each(w->inodes, i) {
                if (i->inode.bi_sectors == i->count)
                        continue;
 
-               count2 = lockrestart_do(trans,
-                       bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot));
+               count2 = bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot);
 
                if (i->count != count2) {
                        bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu",
@@ -1136,19 +1141,21 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
                if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
                            "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
                            w->cur_inum, i->snapshot,
-                           i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE)
-                       continue;
-
-               i->inode.bi_sectors = i->count;
-               ret = write_inode(trans, &i->inode, i->snapshot);
-               if (ret)
-                       break;
-               ret2 = -EINTR;
+                           i->inode.bi_sectors, i->count)) {
+                       i->inode.bi_sectors = i->count;
+                       ret = write_inode(trans, &i->inode, i->snapshot);
+                       if (ret)
+                               break;
+               }
        }
 fsck_err:
-       if (ret)
-               bch_err(c, "error %i from check_i_sectors()", ret);
-       return ret ?: ret2;
+       if (ret) {
+               bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret));
+               return ret;
+       }
+       if (trans_was_restarted(trans, restart_count))
+               return -BCH_ERR_transaction_restart_nested;
+       return 0;
 }
 
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
@@ -1184,14 +1191,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        goto err;
        }
 
-       if (!iter->path->should_be_locked) {
-               /*
-                * hack: check_i_sectors may have handled a transaction restart,
-                * it shouldn't be but we need to fix the new i_sectors check
-                * code and delete the old bch2_count_inode_sectors() first
-                */
-               return -EINTR;
-       }
+       BUG_ON(!iter->path->should_be_locked);
 #if 0
        if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
                char buf1[200];
@@ -1201,7 +1201,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                bch2_bkey_val_to_text(&PBUF(buf2), c, k);
 
                if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
-                       ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR;
+                       ret = fix_overlapping_extent(trans, k, prev.k->k.p)
+                               ?: -BCH_ERR_transaction_restart_nested;
                        goto out;
                }
        }
@@ -1286,8 +1287,8 @@ err:
 fsck_err:
        printbuf_exit(&buf);
 
-       if (ret && ret != -EINTR)
-               bch_err(c, "error %i from check_extent()", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(c, "error from check_extent(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1329,7 +1330,7 @@ static int check_extents(struct bch_fs *c)
        snapshots_seen_exit(&s);
 
        if (ret)
-               bch_err(c, "error %i from check_extents()", ret);
+               bch_err(c, "error from check_extents(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1337,7 +1338,8 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
 {
        struct bch_fs *c = trans->c;
        struct inode_walker_entry *i;
-       int ret = 0, ret2 = 0;
+       u32 restart_count = trans->restart_count;
+       int ret = 0;
        s64 count2;
 
        darray_for_each(w->inodes, i) {
@@ -1363,13 +1365,16 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
                        ret = write_inode(trans, &i->inode, i->snapshot);
                        if (ret)
                                break;
-                       ret2 = -EINTR;
                }
        }
 fsck_err:
-       if (ret)
-               bch_err(c, "error %i from check_subdir_count()", ret);
-       return ret ?: ret2;
+       if (ret) {
+               bch_err(c, "error from check_subdir_count(): %s", bch2_err_str(ret));
+               return ret;
+       }
+       if (trans_was_restarted(trans, restart_count))
+               return -BCH_ERR_transaction_restart_nested;
+       return 0;
 }
 
 static int check_dirent_target(struct btree_trans *trans,
@@ -1486,8 +1491,8 @@ err:
 fsck_err:
        printbuf_exit(&buf);
 
-       if (ret && ret != -EINTR)
-               bch_err(c, "error %i from check_target()", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(c, "error from check_target(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1527,10 +1532,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
                        goto err;
        }
 
-       if (!iter->path->should_be_locked) {
-               /* hack: see check_extent() */
-               return -EINTR;
-       }
+       BUG_ON(!iter->path->should_be_locked);
 
        ret = __walk_inode(trans, dir, equiv);
        if (ret < 0)
@@ -1659,8 +1661,8 @@ err:
 fsck_err:
        printbuf_exit(&buf);
 
-       if (ret && ret != -EINTR)
-               bch_err(c, "error %i from check_dirent()", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(c, "error from check_dirent(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1699,7 +1701,7 @@ static int check_dirents(struct bch_fs *c)
        inode_walker_exit(&target);
 
        if (ret)
-               bch_err(c, "error %i from check_dirents()", ret);
+               bch_err(c, "error from check_dirents(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1734,8 +1736,8 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
 
        ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
 fsck_err:
-       if (ret && ret != -EINTR)
-               bch_err(c, "error %i from check_xattr()", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(c, "error from check_xattr(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1767,7 +1769,7 @@ static int check_xattrs(struct bch_fs *c)
        bch2_trans_exit(&trans);
 
        if (ret)
-               bch_err(c, "error %i from check_xattrs()", ret);
+               bch_err(c, "error from check_xattrs(): %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -1799,7 +1801,7 @@ static int check_root_trans(struct btree_trans *trans)
                                      BTREE_INSERT_LAZY_RW,
                        __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i));
                if (ret) {
-                       bch_err(c, "error writing root subvol: %i", ret);
+                       bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
                        goto err;
                }
 
@@ -1818,7 +1820,7 @@ static int check_root_trans(struct btree_trans *trans)
 
                ret = __write_inode(trans, &root_inode, snapshot);
                if (ret)
-                       bch_err(c, "error writing root inode: %i", ret);
+                       bch_err(c, "error writing root inode: %s", bch2_err_str(ret));
        }
 err:
 fsck_err:
@@ -1971,7 +1973,7 @@ static int check_path(struct btree_trans *trans,
        }
 fsck_err:
        if (ret)
-               bch_err(c, "%s: err %i", __func__, ret);
+               bch_err(c, "%s: err %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -2015,8 +2017,6 @@ static int check_directory_structure(struct bch_fs *c)
        }
        bch2_trans_iter_exit(&trans, &iter);
 
-       BUG_ON(ret == -EINTR);
-
        darray_exit(&path);
 
        bch2_trans_exit(&trans);
@@ -2194,6 +2194,47 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
        return ret;
 }
 
+static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter,
+                                    struct bkey_s_c k,
+                                    struct nlink_table *links,
+                                    size_t *idx, u64 range_end)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_inode_unpacked u;
+       struct nlink *link = &links->d[*idx];
+       int ret = 0;
+
+       if (k.k->p.offset >= range_end)
+               return 1;
+
+       if (!bkey_is_inode(k.k))
+               return 0;
+
+       BUG_ON(bch2_inode_unpack(k, &u));
+
+       if (S_ISDIR(le16_to_cpu(u.bi_mode)))
+               return 0;
+
+       if (!u.bi_nlink)
+               return 0;
+
+       while ((cmp_int(link->inum, k.k->p.offset) ?:
+               cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
+               BUG_ON(*idx == links->nr);
+               link = &links->d[++*idx];
+       }
+
+       if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
+                       "inode %llu type %s has wrong i_nlink (%u, should be %u)",
+                       u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
+                       bch2_inode_nlink_get(&u), link->count)) {
+               bch2_inode_nlink_set(&u, link->count);
+               ret = __write_inode(trans, &u, k.k->p.snapshot);
+       }
+fsck_err:
+       return ret;
+}
+
 noinline_for_stack
 static int check_nlinks_update_hardlinks(struct bch_fs *c,
                               struct nlink_table *links,
@@ -2202,56 +2243,25 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       struct bch_inode_unpacked u;
-       struct nlink *link = links->d;
+       size_t idx = 0;
        int ret = 0;
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_inodes,
-                          POS(0, range_start),
-                          BTREE_ITER_INTENT|
-                          BTREE_ITER_PREFETCH|
-                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               if (k.k->p.offset >= range_end)
-                       break;
-
-               if (!bkey_is_inode(k.k))
-                       continue;
-
-               BUG_ON(bch2_inode_unpack(k, &u));
-
-               if (S_ISDIR(le16_to_cpu(u.bi_mode)))
-                       continue;
-
-               if (!u.bi_nlink)
-                       continue;
-
-               while ((cmp_int(link->inum, k.k->p.offset) ?:
-                       cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
-                       link++;
-                       BUG_ON(link >= links->d + links->nr);
-               }
-
-               if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
-                               "inode %llu type %s has wrong i_nlink (%u, should be %u)",
-                               u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
-                               bch2_inode_nlink_get(&u), link->count)) {
-                       bch2_inode_nlink_set(&u, link->count);
+       ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
+                       POS(0, range_start),
+                       BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+                       NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+               check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end));
 
-                       ret = write_inode(&trans, &u, k.k->p.snapshot);
-                       if (ret)
-                               bch_err(c, "error in fsck: error %i updating inode", ret);
-               }
-       }
-fsck_err:
-       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
 
-       if (ret)
+       if (ret < 0) {
                bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
+               return ret;
+       }
 
-       return ret;
+       return 0;
 }
 
 noinline_for_stack
@@ -2291,21 +2301,13 @@ static int check_nlinks(struct bch_fs *c)
        return ret;
 }
 
-static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
+static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter,
+                            struct bkey_s_c k)
 {
-       struct bkey_s_c k;
        struct bkey_s_c_reflink_p p;
        struct bkey_i_reflink_p *u;
        int ret;
 
-       k = bch2_btree_iter_peek(iter);
-       if (!k.k)
-               return 0;
-
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-
        if (k.k->type != KEY_TYPE_reflink_p)
                return 0;
 
@@ -2341,20 +2343,11 @@ static int fix_reflink_p(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN,
-                          BTREE_ITER_INTENT|
-                          BTREE_ITER_PREFETCH|
-                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               if (k.k->type == KEY_TYPE_reflink_p) {
-                       ret = commit_do(&trans, NULL, NULL,
-                                             BTREE_INSERT_NOFAIL|
-                                             BTREE_INSERT_LAZY_RW,
-                                             fix_reflink_p_key(&trans, &iter));
-                       if (ret)
-                               break;
-               }
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_extents, POS_MIN,
+                       BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+               fix_reflink_p_key(&trans, &iter, k));
 
        bch2_trans_exit(&trans);
        return ret;
@@ -2380,7 +2373,7 @@ again:
                check_nlinks(c) ?:
                fix_reflink_p(c);
 
-       if (ret == -NEED_SNAPSHOT_CLEANUP) {
+       if (bch2_err_matches(ret, BCH_ERR_need_snapshot_cleanup)) {
                set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
                goto again;
        }
index 6a2b94908371e230c69f3cc4a6d65760248e00c6..0831060067473eac96d0010bea94fdc858f85762 100644 (file)
@@ -639,7 +639,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
                      bch2_trans_commit(trans, NULL, NULL,
                                        BTREE_INSERT_NOFAIL);
 err:
-               if (ret && ret != -EINTR)
+               if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        break;
        }
 
@@ -710,7 +710,7 @@ retry:
                                BTREE_INSERT_NOFAIL);
 err:
        bch2_trans_iter_exit(&trans, &iter);
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
index 50fa572344a97da07db4617f1238417901bd7ac8..93771f834620bd45130c399461a778f42a914167 100644 (file)
@@ -312,7 +312,7 @@ int bch2_extent_update(struct btree_trans *trans,
 }
 
 /*
- * Returns -EINTR if we had to drop locks:
+ * Returns -BCH_ERR_transacton_restart if we had to drop locks:
  */
 int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
                   subvol_inum inum, u64 end,
@@ -325,7 +325,8 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
        int ret = 0, ret2 = 0;
        u32 snapshot;
 
-       while (!ret || ret == -EINTR) {
+       while (!ret ||
+              bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
                struct disk_reservation disk_res =
                        bch2_disk_reservation_init(c, 0);
                struct bkey_i delete;
@@ -384,7 +385,10 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
        bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
 
-       return ret == -EINTR ? 0 : ret;
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               ret = 0;
+
+       return ret;
 }
 
 int bch2_write_index_default(struct bch_write_op *op)
@@ -415,7 +419,7 @@ int bch2_write_index_default(struct bch_write_op *op)
 
                ret = bch2_subvolume_get_snapshot(&trans, inum.subvol,
                                                  &sk.k->k.p.snapshot);
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        continue;
                if (ret)
                        break;
@@ -430,7 +434,7 @@ int bch2_write_index_default(struct bch_write_op *op)
                                         op->flags & BCH_WRITE_CHECK_ENOSPC);
                bch2_trans_iter_exit(&trans, &iter);
 
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        continue;
                if (ret)
                        break;
@@ -580,14 +584,14 @@ static void __bch2_write_index(struct bch_write_op *op)
                u64 sectors_start = keylist_sectors(keys);
                int ret = op->index_update_fn(op);
 
-               BUG_ON(ret == -EINTR);
+               BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
                BUG_ON(keylist_sectors(keys) && !ret);
 
                op->written += sectors_start - keylist_sectors(keys);
 
                if (ret) {
                        bch_err_inum_ratelimited(c, op->pos.inode,
-                               "write error %i from btree update", ret);
+                               "write error while doing btree update: %s", bch2_err_str(ret));
                        op->error = ret;
                }
        }
@@ -1915,6 +1919,7 @@ static void bch2_read_endio(struct bio *bio)
        }
 
        if (rbio->narrow_crcs ||
+           rbio->promote ||
            crc_is_compressed(rbio->pick.crc) ||
            bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
                context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
@@ -2316,10 +2321,9 @@ retry:
                 * read_extent -> io_time_reset may cause a transaction restart
                 * without returning an error, we need to check for that here:
                 */
-               if (!bch2_trans_relock(&trans)) {
-                       ret = -EINTR;
+               ret = bch2_trans_relock(&trans);
+               if (ret)
                        break;
-               }
 
                bch2_btree_iter_set_pos(&iter,
                                POS(inum.inum, bvec_iter.bi_sector));
@@ -2373,7 +2377,9 @@ retry:
 err:
        bch2_trans_iter_exit(&trans, &iter);
 
-       if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+           ret == READ_RETRY ||
+           ret == READ_RETRY_AVOID)
                goto retry;
 
        bch2_trans_exit(&trans);
index b561ed787493ed0c249e5bbcc4180f3358c4e60d..d77092aa069ee66826c30da8c1d5316cac857b06 100644 (file)
@@ -883,7 +883,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
 
        if (!new_fs) {
                for (i = 0; i < nr_got; i++) {
-                       ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
+                       ret = bch2_trans_run(c,
                                bch2_trans_mark_metadata_bucket(&trans, ca,
                                                bu[i], BCH_DATA_journal,
                                                ca->mi.bucket_size));
@@ -1146,7 +1146,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
                bch2_sb_get_journal(sb);
        struct bch_sb_field_journal_v2 *journal_buckets_v2 =
                bch2_sb_get_journal_v2(sb);
-       unsigned i;
+       unsigned i, nr_bvecs;
 
        ja->nr = 0;
 
@@ -1163,11 +1163,14 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
        if (!ja->bucket_seq)
                return -ENOMEM;
 
-       ca->journal.bio = bio_kmalloc(GFP_KERNEL,
-                       DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE));
+       nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
+
+       ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
        if (!ca->journal.bio)
                return -ENOMEM;
 
+       bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0);
+
        ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
        if (!ja->buckets)
                return -ENOMEM;
index 0ff78a274d4c0422f078e0959f2ef49cdf53966c..107521e10ff9fffb3b3cc3fd646921e84964fe4d 100644 (file)
@@ -197,7 +197,7 @@ static void journal_entry_null_range(void *start, void *end)
                bch_err(c, "corrupt metadata before write:\n"           \
                        msg, ##__VA_ARGS__);                            \
                if (bch2_fs_inconsistent(c)) {                          \
-                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       ret = -BCH_ERR_fsck_errors_not_fixed;           \
                        goto fsck_err;                                  \
                }                                                       \
                break;                                                  \
@@ -823,20 +823,20 @@ static int journal_read_bucket(struct bch_dev *ca,
        while (offset < end) {
                if (!sectors_read) {
                        struct bio *bio;
+                       unsigned nr_bvecs;
 reread:
                        sectors_read = min_t(unsigned,
                                end - offset, buf->size >> 9);
+                       nr_bvecs = buf_pages(buf->data, sectors_read << 9);
 
-                       bio = bio_kmalloc(GFP_KERNEL,
-                                         buf_pages(buf->data,
-                                                   sectors_read << 9));
-                       bio_set_dev(bio, ca->disk_sb.bdev);
-                       bio->bi_iter.bi_sector  = offset;
-                       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+                       bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
+                       bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ);
+
+                       bio->bi_iter.bi_sector = offset;
                        bch2_bio_map(bio, buf->data, sectors_read << 9);
 
                        ret = submit_bio_wait(bio);
-                       bio_put(bio);
+                       kfree(bio);
 
                        if (bch2_dev_io_err_on(ret, ca,
                                               "journal read error: sector %llu",
@@ -858,7 +858,7 @@ reread:
                                    end - offset, sectors_read,
                                    READ);
                switch (ret) {
-               case BCH_FSCK_OK:
+               case 0:
                        sectors = vstruct_sectors(j, c->block_bits);
                        break;
                case JOURNAL_ENTRY_REREAD:
index fdc94e831a86d63b6d0c07fcc3b0eee8e72fe288..9f8b63b340f71b3280c563e9aa41c14bc1fdc3de 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "btree_key_cache.h"
+#include "errcode.h"
 #include "error.h"
 #include "journal.h"
 #include "journal_io.h"
@@ -282,11 +283,11 @@ void bch2_journal_do_discards(struct journal *j)
                while (should_discard_bucket(j, ja)) {
                        if (!c->opts.nochanges &&
                            ca->mi.discard &&
-                           blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
+                           bdev_max_discard_sectors(ca->disk_sb.bdev))
                                blkdev_issue_discard(ca->disk_sb.bdev,
                                        bucket_to_sector(ca,
                                                ja->buckets[ja->discard_idx]),
-                                       ca->mi.bucket_size, GFP_NOIO, 0);
+                                       ca->mi.bucket_size, GFP_NOIO);
 
                        spin_lock(&j->lock);
                        ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
@@ -740,15 +741,17 @@ int bch2_journal_reclaim_start(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct task_struct *p;
+       int ret;
 
        if (j->reclaim_thread)
                return 0;
 
        p = kthread_create(bch2_journal_reclaim_thread, j,
                           "bch-reclaim/%s", c->name);
-       if (IS_ERR(p)) {
-               bch_err(c, "error creating journal reclaim thread: %li", PTR_ERR(p));
-               return PTR_ERR(p);
+       ret = PTR_ERR_OR_ZERO(p);
+       if (ret) {
+               bch_err(c, "error creating journal reclaim thread: %s", bch2_err_str(ret));
+               return ret;
        }
 
        get_task_struct(p);
index d9b4042a2e4aa062a726b839c9ebf4292b3980eb..5c555b3703c0947006176f11ce9736717e9275d9 100644 (file)
@@ -272,7 +272,7 @@ retry:
                       !test_bit(BCH_FS_STOPPING, &c->flags))
                        b = bch2_btree_iter_next_node(&iter);
 
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        goto retry;
 
                bch2_trans_iter_exit(&trans, &iter);
index 94ecb3a397602d90487887d0706a244556788ece..53e607d72274cd5458a95d7a846d2bc17a6ba20b 100644 (file)
@@ -130,25 +130,18 @@ int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx,
 }
 
 static int bch2_check_lru_key(struct btree_trans *trans,
-                             struct btree_iter *lru_iter)
+                             struct btree_iter *lru_iter,
+                             struct bkey_s_c lru_k)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
-       struct bkey_s_c lru_k, k;
+       struct bkey_s_c k;
        struct bch_alloc_v4 a;
        struct printbuf buf1 = PRINTBUF;
        struct printbuf buf2 = PRINTBUF;
        struct bpos alloc_pos;
        int ret;
 
-       lru_k = bch2_btree_iter_peek(lru_iter);
-       if (!lru_k.k)
-               return 0;
-
-       ret = bkey_err(lru_k);
-       if (ret)
-               return ret;
-
        alloc_pos = POS(lru_k.k->p.inode,
                        le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx));
 
@@ -202,16 +195,10 @@ int bch2_check_lrus(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_NOFAIL|
-                                     BTREE_INSERT_LAZY_RW,
-                       bch2_check_lru_key(&trans, &iter));
-               if (ret)
-                       break;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+               bch2_check_lru_key(&trans, &iter, k));
 
        bch2_trans_exit(&trans);
        return ret;
index 5345697f2712cebdf44a59022cb3d93be1edf2c5..8b258d966d042f73e9448a3086373d8d8a1d6b5f 100644 (file)
@@ -8,6 +8,7 @@
 #include "btree_update.h"
 #include "btree_update_interior.h"
 #include "buckets.h"
+#include "errcode.h"
 #include "extents.h"
 #include "io.h"
 #include "journal.h"
@@ -35,85 +36,76 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
        return 0;
 }
 
-static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
-                                  enum btree_id btree_id)
+static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
+                                    struct btree_iter *iter,
+                                    struct bkey_s_c k,
+                                    unsigned dev_idx,
+                                    int flags)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_i *n;
+       int ret;
+
+       if (!bch2_bkey_has_device(k, dev_idx))
+               return 0;
+
+       n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       ret = PTR_ERR_OR_ZERO(n);
+       if (ret)
+               return ret;
+
+       bkey_reassemble(n, k);
+
+       ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, false);
+       if (ret)
+               return ret;
+
+       /*
+        * If the new extent no longer has any pointers, bch2_extent_normalize()
+        * will do the appropriate thing with it (turning it into a
+        * KEY_TYPE_error key, or just a discard if it was a cached extent)
+        */
+       bch2_extent_normalize(c, bkey_i_to_s(n));
+
+       /*
+        * Since we're not inserting through an extent iterator
+        * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+        * we aren't using the extent overwrite path to delete, we're
+        * just using the normal key deletion path:
+        */
+       if (bkey_deleted(&n->k))
+               n->k.size = 0;
+
+       return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+}
+
+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 {
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       struct bkey_buf sk;
+       enum btree_id id;
        int ret = 0;
 
-       bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN,
-                            BTREE_ITER_PREFETCH|
-                            BTREE_ITER_ALL_SNAPSHOTS);
-
-       while ((bch2_trans_begin(&trans),
-               (k = bch2_btree_iter_peek(&iter)).k) &&
-              !(ret = bkey_err(k))) {
-               if (!bch2_bkey_has_device(k, dev_idx)) {
-                       bch2_btree_iter_advance(&iter);
+       for (id = 0; id < BTREE_ID_NR; id++) {
+               if (!btree_type_has_ptrs(id))
                        continue;
-               }
 
-               bch2_bkey_buf_reassemble(&sk, c, k);
-
-               ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
-                                   dev_idx, flags, false);
-               if (ret)
-                       break;
-
-               /*
-                * If the new extent no longer has any pointers, bch2_extent_normalize()
-                * will do the appropriate thing with it (turning it into a
-                * KEY_TYPE_error key, or just a discard if it was a cached extent)
-                */
-               bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-
-               /*
-                * Since we're not inserting through an extent iterator
-                * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
-                * we aren't using the extent overwrite path to delete, we're
-                * just using the normal key deletion path:
-                */
-               if (bkey_deleted(&sk.k->k))
-                       sk.k->k.size = 0;
-
-               ret   = bch2_btree_iter_traverse(&iter) ?:
-                       bch2_trans_update(&trans, &iter, sk.k,
-                                         BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-                       bch2_trans_commit(&trans, NULL, NULL,
-                                       BTREE_INSERT_NOFAIL);
-
-               /*
-                * don't want to leave ret == -EINTR, since if we raced and
-                * something else overwrote the key we could spuriously return
-                * -EINTR below:
-                */
-               if (ret == -EINTR)
-                       ret = 0;
+               ret = for_each_btree_key_commit(&trans, iter, id, POS_MIN,
+                               BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+                               NULL, NULL, BTREE_INSERT_NOFAIL,
+                       bch2_dev_usrdata_drop_key(&trans, &iter, k, dev_idx, flags));
                if (ret)
                        break;
        }
-       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
-       bch2_bkey_buf_exit(&sk, c);
-
-       BUG_ON(ret == -EINTR);
 
        return ret;
 }
 
-static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-{
-       return  __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_extents) ?:
-               __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_reflink);
-}
-
 static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 {
        struct btree_trans trans;
@@ -154,19 +146,20 @@ retry:
                        }
 
                        ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, false);
-                       if (ret == -EINTR) {
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
                                ret = 0;
                                continue;
                        }
 
                        if (ret) {
-                               bch_err(c, "Error updating btree node key: %i", ret);
+                               bch_err(c, "Error updating btree node key: %s",
+                                       bch2_err_str(ret));
                                break;
                        }
 next:
                        bch2_btree_iter_next_node(&iter);
                }
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        goto retry;
 
                bch2_trans_iter_exit(&trans, &iter);
@@ -181,7 +174,7 @@ err:
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&k, c);
 
-       BUG_ON(ret == -EINTR);
+       BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
 
        return ret;
 }
index 9748b865325d42a2267637e326f45b6e33769877..2fc247451390d72cd9be8062e5f4397b0e0c6149 100644 (file)
@@ -9,6 +9,7 @@
 #include "btree_update_interior.h"
 #include "disk_groups.h"
 #include "ec.h"
+#include "errcode.h"
 #include "error.h"
 #include "inode.h"
 #include "io.h"
@@ -370,7 +371,7 @@ static int move_get_io_opts(struct btree_trans *trans,
        ret = lookup_inode(trans,
                           SPOS(0, k.k->p.inode, k.k->p.snapshot),
                           &inode);
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                return ret;
 
        if (!ret)
@@ -418,7 +419,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
                        break;
 
                ret = bkey_err(k);
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        continue;
                if (ret)
                        break;
@@ -449,7 +450,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
                ret2 = bch2_move_extent(&trans, ctxt, io_opts,
                                        btree_id, k, data_opts);
                if (ret2) {
-                       if (ret2 == -EINTR)
+                       if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
                                continue;
 
                        if (ret2 == -ENOMEM) {
@@ -574,7 +575,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
 
                ret = bch2_get_next_backpointer(&trans, bucket, gen,
                                                &bp_offset, &bp);
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        continue;
                if (ret)
                        goto err;
@@ -589,7 +590,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
                        k = bch2_backpointer_get_key(&trans, &iter,
                                                bucket, bp_offset, bp);
                        ret = bkey_err(k);
-                       if (ret == -EINTR)
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                continue;
                        if (ret)
                                goto err;
@@ -616,7 +617,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
 
                        ret = bch2_move_extent(&trans, ctxt, io_opts,
                                               bp.btree_id, k, data_opts);
-                       if (ret == -EINTR)
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                continue;
                        if (ret == -ENOMEM) {
                                /* memory allocation failure, wait for some IO to finish */
@@ -635,7 +636,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
                        b = bch2_backpointer_get_node(&trans, &iter,
                                                bucket, bp_offset, bp);
                        ret = PTR_ERR_OR_ZERO(b);
-                       if (ret == -EINTR)
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                continue;
                        if (ret)
                                goto err;
@@ -645,7 +646,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
                        ret = bch2_btree_node_rewrite(&trans, &iter, b, 0);
                        bch2_trans_iter_exit(&trans, &iter);
 
-                       if (ret == -EINTR)
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                continue;
                        if (ret)
                                goto err;
@@ -740,14 +741,14 @@ retry:
                                goto next;
 
                        ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
-                       if (ret == -EINTR)
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                continue;
                        if (ret)
                                break;
 next:
                        bch2_btree_iter_next_node(&iter);
                }
-               if (ret == -EINTR)
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        goto retry;
 
                bch2_trans_iter_exit(&trans, &iter);
@@ -759,7 +760,7 @@ next:
        bch2_trans_exit(&trans);
 
        if (ret)
-               bch_err(c, "error %i in bch2_move_btree", ret);
+               bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
 
        bch2_btree_interior_updates_flush(c);
 
index f9ad4cb26905a70a79c4de6ebafc2ae30338e69d..f913864eaa4f56ca21b83c604f4b0a3d802389a4 100644 (file)
@@ -13,6 +13,7 @@
 #include "buckets.h"
 #include "clock.h"
 #include "disk_groups.h"
+#include "errcode.h"
 #include "error.h"
 #include "extents.h"
 #include "eytzinger.h"
@@ -162,7 +163,7 @@ static int bch2_copygc(struct bch_fs *c)
        bch2_moving_ctxt_exit(&ctxt);
 
        if (ret < 0)
-               bch_err(c, "error %i from bch2_move_data() in copygc", ret);
+               bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
 
        trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
        return ret;
@@ -251,6 +252,7 @@ void bch2_copygc_stop(struct bch_fs *c)
 int bch2_copygc_start(struct bch_fs *c)
 {
        struct task_struct *t;
+       int ret;
 
        if (c->copygc_thread)
                return 0;
@@ -262,9 +264,10 @@ int bch2_copygc_start(struct bch_fs *c)
                return -ENOMEM;
 
        t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
-       if (IS_ERR(t)) {
-               bch_err(c, "error creating copygc thread: %li", PTR_ERR(t));
-               return PTR_ERR(t);
+       ret = PTR_ERR_OR_ZERO(t);
+       if (ret) {
+               bch_err(c, "error creating copygc thread: %s", bch2_err_str(ret));
+               return ret;
        }
 
        get_task_struct(t);
index 922738247d03967b439d0ceb75ed451218f6d31d..e85c8136a46e9b7ad52e987c9d299c4d4c9cc581 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _BCACHEFS_MOVINGGC_H
 #define _BCACHEFS_MOVINGGC_H
 
+unsigned long bch2_copygc_wait_amount(struct bch_fs *);
 void bch2_copygc_stop(struct bch_fs *);
 int bch2_copygc_start(struct bch_fs *);
 void bch2_fs_copygc_init(struct bch_fs *);
index 2f5f49cb774d92c4e959b9084ad1858894d642ee..5b8586ecb37431150a4321a73876f52691f8e768 100644 (file)
@@ -341,6 +341,11 @@ enum opt_type {
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,                       false,                          \
          NULL,         "Don't open device in exclusive mode")          \
+       x(direct_io,                    u8,                             \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_BOOL(),                                                   \
+         BCH2_NO_SB_OPT,                       true,                   \
+         NULL,         "Use O_DIRECT (userspace only)")                \
        x(sb,                           u64,                            \
          OPT_MOUNT,                                                    \
          OPT_UINT(0, S64_MAX),                                         \
index e35a6d1f31e907a5d9d706201ba6cc9428761cd8..454c76e03be90484ebb68c5cbb55b2537ae1429f 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "btree_update.h"
+#include "errcode.h"
 #include "inode.h"
 #include "quota.h"
 #include "subvolume.h"
@@ -370,6 +371,9 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
 
        BUG_ON(k.k->p.inode >= QTYP_NR);
 
+       if (!((1U << k.k->p.inode) & enabled_qtypes(c)))
+               return 0;
+
        switch (k.k->type) {
        case KEY_TYPE_quota:
                dq = bkey_s_c_to_quota(k);
@@ -393,30 +397,6 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
        return 0;
 }
 
-static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
-{
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c, 0, 0);
-
-       for_each_btree_key(&trans, iter, BTREE_ID_quotas, POS(type, 0),
-                          BTREE_ITER_PREFETCH, k, ret) {
-               if (k.k->p.inode != type)
-                       break;
-
-               ret = __bch2_quota_set(c, k);
-               if (ret)
-                       break;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
-
-       bch2_trans_exit(&trans);
-       return ret;
-}
-
 void bch2_fs_quota_exit(struct bch_fs *c)
 {
        unsigned i;
@@ -491,8 +471,6 @@ advance:
 
 int bch2_fs_quota_read(struct bch_fs *c)
 {
-       unsigned i, qtypes = enabled_qtypes(c);
-       struct bch_memquota_type *q;
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -502,23 +480,16 @@ int bch2_fs_quota_read(struct bch_fs *c)
        bch2_sb_quota_read(c);
        mutex_unlock(&c->sb_lock);
 
-       for_each_set_qtype(c, i, q, qtypes) {
-               ret = bch2_quota_init_type(c, i);
-               if (ret)
-                       return ret;
-       }
-
        bch2_trans_init(&trans, c, 0, 0);
 
-       ret = for_each_btree_key2(&trans, iter, BTREE_ID_inodes,
-                            POS_MIN,
-                            BTREE_ITER_INTENT|
-                            BTREE_ITER_PREFETCH|
-                            BTREE_ITER_ALL_SNAPSHOTS,
-                            k,
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_quotas,
+                       POS_MIN, BTREE_ITER_PREFETCH, k,
+               __bch2_quota_set(c, k)) ?:
+             for_each_btree_key2(&trans, iter, BTREE_ID_inodes,
+                       POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
                bch2_fs_quota_read_inode(&trans, &iter, k));
        if (ret)
-               bch_err(c, "err reading inodes in quota init: %i", ret);
+               bch_err(c, "err in quota_read: %s", bch2_err_str(ret));
 
        bch2_trans_exit(&trans);
        return ret;
index 31da409338326abe280a9e41a5c6b89277128f28..ecc64dd92b05074b958bf9fe33255b12703b6929 100644 (file)
@@ -6,6 +6,7 @@
 #include "buckets.h"
 #include "clock.h"
 #include "disk_groups.h"
+#include "errcode.h"
 #include "extents.h"
 #include "io.h"
 #include "move.h"
@@ -331,6 +332,7 @@ void bch2_rebalance_stop(struct bch_fs *c)
 int bch2_rebalance_start(struct bch_fs *c)
 {
        struct task_struct *p;
+       int ret;
 
        if (c->rebalance.thread)
                return 0;
@@ -339,9 +341,10 @@ int bch2_rebalance_start(struct bch_fs *c)
                return 0;
 
        p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
-       if (IS_ERR(p)) {
-               bch_err(c, "error creating rebalance thread: %li", PTR_ERR(p));
-               return PTR_ERR(p);
+       ret = PTR_ERR_OR_ZERO(p);
+       if (ret) {
+               bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret));
+               return ret;
        }
 
        get_task_struct(p);
index 64b1e79f3182bb01f3757043140fad00a10433ee..b070bdf01500a0747fa8c644b560f3248dd15caf 100644 (file)
@@ -11,6 +11,7 @@
 #include "buckets.h"
 #include "dirent.h"
 #include "ec.h"
+#include "errcode.h"
 #include "error.h"
 #include "fs-common.h"
 #include "fsck.h"
@@ -87,9 +88,9 @@ static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t i
        return keys->d + idx_to_pos(keys, idx);
 }
 
-static size_t bch2_journal_key_search(struct journal_keys *keys,
-                                     enum btree_id id, unsigned level,
-                                     struct bpos pos)
+static size_t __bch2_journal_key_search(struct journal_keys *keys,
+                                       enum btree_id id, unsigned level,
+                                       struct bpos pos)
 {
        size_t l = 0, r = keys->nr, m;
 
@@ -107,7 +108,14 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
        BUG_ON(l &&
               __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0);
 
-       return idx_to_pos(keys, l);
+       return l;
+}
+
+static size_t bch2_journal_key_search(struct journal_keys *keys,
+                                     enum btree_id id, unsigned level,
+                                     struct bpos pos)
+{
+       return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
 }
 
 struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
@@ -116,22 +124,21 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
 {
        struct journal_keys *keys = &c->journal_keys;
        unsigned iters = 0;
+       struct journal_key *k;
 search:
        if (!*idx)
-               *idx = bch2_journal_key_search(keys, btree_id, level, pos);
+               *idx = __bch2_journal_key_search(keys, btree_id, level, pos);
 
-       while (*idx < keys->size &&
-              keys->d[*idx].btree_id == btree_id &&
-              keys->d[*idx].level == level &&
-              bpos_cmp(keys->d[*idx].k->k.p, end_pos) <= 0) {
-               if (bpos_cmp(keys->d[*idx].k->k.p, pos) >= 0 &&
-                   !keys->d[*idx].overwritten)
-                       return keys->d[*idx].k;
+       while (*idx < keys->nr &&
+              (k = idx_to_key(keys, *idx),
+               k->btree_id == btree_id &&
+               k->level == level &&
+               bpos_cmp(k->k->k.p, end_pos) <= 0)) {
+               if (bpos_cmp(k->k->k.p, pos) >= 0 &&
+                   !k->overwritten)
+                       return k->k;
 
                (*idx)++;
-               if (*idx == keys->gap)
-                       *idx += keys->size - keys->nr;
-
                iters++;
                if (iters == 10) {
                        *idx = 0;
@@ -1153,7 +1160,7 @@ int bch2_fs_recovery(struct bch_fs *c)
 use_clean:
                if (!clean) {
                        bch_err(c, "no superblock clean section found");
-                       ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
+                       ret = -BCH_ERR_fsck_repair_impossible;
                        goto err;
 
                }
@@ -1435,9 +1442,9 @@ out:
        }
 
        if (ret)
-               bch_err(c, "Error in recovery: %s (%i)", err, ret);
+               bch_err(c, "Error in recovery: %s (%s)", err, bch2_err_str(ret));
        else
-               bch_verbose(c, "ret %i", ret);
+               bch_verbose(c, "ret %s", bch2_err_str(ret));
        return ret;
 err:
 fsck_err:
index 2038e3502d8c1434ea99102a101b6278ce2d6f0d..d5c14bb2992d5d7fc4281a207140861a0cdefd1f 100644 (file)
@@ -299,7 +299,8 @@ s64 bch2_remap_range(struct bch_fs *c,
        bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start,
                             BTREE_ITER_INTENT);
 
-       while ((ret == 0 || ret == -EINTR) &&
+       while ((ret == 0 ||
+               bch2_err_matches(ret, BCH_ERR_transaction_restart)) &&
               bkey_cmp(dst_iter.pos, dst_end) < 0) {
                struct disk_reservation disk_res = { 0 };
 
@@ -409,7 +410,7 @@ s64 bch2_remap_range(struct bch_fs *c,
                }
 
                bch2_trans_iter_exit(&trans, &inode_iter);
-       } while (ret2 == -EINTR);
+       } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
 
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&new_src, c);
index 1a212bac2a04624709fde0d92aaa53a47af6b117..24244bc3d2fbff6703dcf5b4e27d513ebb7082dc 100644 (file)
@@ -3,6 +3,7 @@
 #include "bcachefs.h"
 #include "btree_key_cache.h"
 #include "btree_update.h"
+#include "errcode.h"
 #include "error.h"
 #include "fs.h"
 #include "subvolume.h"
@@ -291,22 +292,14 @@ int bch2_fs_check_snapshots(struct bch_fs *c)
 }
 
 static int check_subvol(struct btree_trans *trans,
-                       struct btree_iter *iter)
+                       struct btree_iter *iter,
+                       struct bkey_s_c k)
 {
-       struct bkey_s_c k;
        struct bkey_s_c_subvolume subvol;
        struct bch_snapshot snapshot;
        unsigned snapid;
        int ret;
 
-       k = bch2_btree_iter_peek(iter);
-       if (!k.k)
-               return 0;
-
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-
        if (k.k->type != KEY_TYPE_subvolume)
                return 0;
 
@@ -322,9 +315,9 @@ static int check_subvol(struct btree_trans *trans,
 
        if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
                ret = bch2_subvolume_delete(trans, iter->pos.offset);
-               if (ret && ret != -EINTR)
-                       bch_err(trans->c, "error deleting subvolume %llu: %i",
-                               iter->pos.offset, ret);
+               if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                       bch_err(trans->c, "error deleting subvolume %llu: %s",
+                               iter->pos.offset, bch2_err_str(ret));
                if (ret)
                        return ret;
        }
@@ -336,22 +329,15 @@ int bch2_fs_check_subvols(struct bch_fs *c)
 {
        struct btree_trans trans;
        struct btree_iter iter;
+       struct bkey_s_c k;
        int ret;
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes,
-                            POS_MIN, BTREE_ITER_PREFETCH);
-
-       do {
-               ret = commit_do(&trans, NULL, NULL,
-                                     BTREE_INSERT_LAZY_RW|
-                                     BTREE_INSERT_NOFAIL,
-                                     check_subvol(&trans, &iter));
-               if (ret)
-                       break;
-       } while (bch2_btree_iter_advance(&iter));
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+               check_subvol(&trans, &iter, k));
 
        bch2_trans_exit(&trans);
 
@@ -380,7 +366,7 @@ int bch2_fs_snapshots_start(struct bch_fs *c)
        bch2_trans_exit(&trans);
 
        if (ret)
-               bch_err(c, "error starting snapshots: %i", ret);
+               bch_err(c, "error starting snapshots: %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -595,59 +581,27 @@ err:
        return ret;
 }
 
-static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
-                                          snapshot_id_list *deleted,
-                                          enum btree_id btree_id)
+static int snapshot_delete_key(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              struct bkey_s_c k,
+                              snapshot_id_list *deleted,
+                              snapshot_id_list *equiv_seen,
+                              struct bpos *last_pos)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       snapshot_id_list equiv_seen = { 0 };
-       struct bpos last_pos = POS_MIN;
-       int ret = 0;
-
-       /*
-        * XXX: We should also delete whiteouts that no longer overwrite
-        * anything
-        */
+       u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
 
-       bch2_trans_iter_init(trans, &iter, btree_id, POS_MIN,
-                            BTREE_ITER_INTENT|
-                            BTREE_ITER_PREFETCH|
-                            BTREE_ITER_NOT_EXTENTS|
-                            BTREE_ITER_ALL_SNAPSHOTS);
-
-       while ((bch2_trans_begin(trans),
-               (k = bch2_btree_iter_peek(&iter)).k) &&
-              !(ret = bkey_err(k))) {
-               u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
-
-               if (bkey_cmp(k.k->p, last_pos))
-                       equiv_seen.nr = 0;
-               last_pos = k.k->p;
-
-               if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
-                   snapshot_list_has_id(&equiv_seen, equiv)) {
-                       ret = commit_do(trans, NULL, NULL,
-                                             BTREE_INSERT_NOFAIL,
-                               bch2_btree_iter_traverse(&iter) ?:
-                               bch2_btree_delete_at(trans, &iter,
-                                       BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
-                       if (ret)
-                               break;
-               } else {
-                       ret = snapshot_list_add(c, &equiv_seen, equiv);
-                       if (ret)
-                               break;
-               }
+       if (bkey_cmp(k.k->p, *last_pos))
+               equiv_seen->nr = 0;
+       *last_pos = k.k->p;
 
-               bch2_btree_iter_advance(&iter);
+       if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
+           snapshot_list_has_id(equiv_seen, equiv)) {
+               return bch2_btree_delete_at(trans, iter,
+                                           BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+       } else {
+               return snapshot_list_add(c, equiv_seen, equiv);
        }
-       bch2_trans_iter_exit(trans, &iter);
-
-       darray_exit(&equiv_seen);
-
-       return ret;
 }
 
 static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
@@ -694,7 +648,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
        if (!test_bit(BCH_FS_STARTED, &c->flags)) {
                ret = bch2_fs_read_write_early(c);
                if (ret) {
-                       bch_err(c, "error deleleting dead snapshots: error going rw: %i", ret);
+                       bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
                        return ret;
                }
        }
@@ -710,7 +664,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
                        NULL, NULL, 0,
                bch2_delete_redundant_snapshot(&trans, &iter, k));
        if (ret) {
-               bch_err(c, "error deleting redundant snapshots: %i", ret);
+               bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -718,7 +672,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
                           POS_MIN, 0, k,
                bch2_snapshot_set_equiv(&trans, k));
        if (ret) {
-               bch_err(c, "error in bch2_snapshots_set_equiv: %i", ret);
+               bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -737,17 +691,27 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
        bch2_trans_iter_exit(&trans, &iter);
 
        if (ret) {
-               bch_err(c, "error walking snapshots: %i", ret);
+               bch_err(c, "error walking snapshots: %s", bch2_err_str(ret));
                goto err;
        }
 
        for (id = 0; id < BTREE_ID_NR; id++) {
+               struct bpos last_pos = POS_MIN;
+               snapshot_id_list equiv_seen = { 0 };
+
                if (!btree_type_has_snapshots(id))
                        continue;
 
-               ret = bch2_snapshot_delete_keys_btree(&trans, &deleted, id);
+               ret = for_each_btree_key_commit(&trans, iter,
+                               id, POS_MIN,
+                               BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+                               NULL, NULL, BTREE_INSERT_NOFAIL,
+                       snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos));
+
+               darray_exit(&equiv_seen);
+
                if (ret) {
-                       bch_err(c, "error deleting snapshot keys: %i", ret);
+                       bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret));
                        goto err;
                }
        }
@@ -756,8 +720,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
                ret = commit_do(&trans, NULL, NULL, 0,
                        bch2_snapshot_node_delete(&trans, deleted.data[i]));
                if (ret) {
-                       bch_err(c, "error deleting snapshot %u: %i",
-                               deleted.data[i], ret);
+                       bch_err(c, "error deleting snapshot %u: %s",
+                               deleted.data[i], bch2_err_str(ret));
                        goto err;
                }
        }
@@ -913,6 +877,8 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
                goto err;
 
        ret = bch2_snapshot_node_set_deleted(trans, snapid);
+       if (ret)
+               goto err;
 
        h = bch2_trans_kmalloc(trans, sizeof(*h));
        ret = PTR_ERR_OR_ZERO(h);
@@ -949,7 +915,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
                        ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
                                      bch2_subvolume_delete(&trans, *id));
                        if (ret) {
-                               bch_err(c, "error %i deleting subvolume %u", ret, *id);
+                               bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
                                break;
                        }
                }
index 8b8130993a59cdbf0946201a7dca766072ff4885..55f8c65ad725e42b3cc757b2d9fdad25f1326067 100644 (file)
@@ -101,7 +101,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
 void bch2_free_super(struct bch_sb_handle *sb)
 {
        if (sb->bio)
-               bio_put(sb->bio);
+               kfree(sb->bio);
        if (!IS_ERR_OR_NULL(sb->bdev))
                blkdev_put(sb->bdev, sb->mode);
 
@@ -143,13 +143,16 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
                return -ENOMEM;
 
        if (sb->have_bio) {
-               bio = bio_kmalloc(GFP_KERNEL,
-                       DIV_ROUND_UP(new_buffer_size, PAGE_SIZE));
+               unsigned nr_bvecs = DIV_ROUND_UP(new_buffer_size, PAGE_SIZE);
+
+               bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
                if (!bio)
                        return -ENOMEM;
 
+               bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
+
                if (sb->bio)
-                       bio_put(sb->bio);
+                       kfree(sb->bio);
                sb->bio = bio;
        }
 
index 2908974034ca0babfc12c049277af9b2cc58ef3c..7c6348001ae3904478bc0bb5dc4fc52a19b48f86 100644 (file)
@@ -24,6 +24,7 @@
 #include "debug.h"
 #include "disk_groups.h"
 #include "ec.h"
+#include "errcode.h"
 #include "error.h"
 #include "fs.h"
 #include "fs-io.h"
@@ -930,31 +931,10 @@ out:
        up_write(&c->state_lock);
        return ret;
 err:
-       switch (ret) {
-       case BCH_FSCK_ERRORS_NOT_FIXED:
-               bch_err(c, "filesystem contains errors: please report this to the developers");
-               pr_cont("mount with -o fix_errors to repair\n");
-               break;
-       case BCH_FSCK_REPAIR_UNIMPLEMENTED:
-               bch_err(c, "filesystem contains errors: please report this to the developers");
-               pr_cont("repair unimplemented: inform the developers so that it can be added\n");
-               break;
-       case BCH_FSCK_REPAIR_IMPOSSIBLE:
-               bch_err(c, "filesystem contains errors, but repair impossible");
-               break;
-       case BCH_FSCK_UNKNOWN_VERSION:
-               bch_err(c, "unknown metadata version");
-               break;
-       case -ENOMEM:
-               bch_err(c, "cannot allocate memory");
-               break;
-       case -EIO:
-               bch_err(c, "IO error");
-               break;
-       }
+       bch_err(c, "error starting filesystem: %s", bch2_err_str(ret));
 
-       if (ret >= 0)
-               ret = -EIO;
+       if (ret < -BCH_ERR_START)
+               ret = -EINVAL;
        goto out;
 }
 
@@ -1438,7 +1418,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
                bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
                                        BTREE_TRIGGER_NORUN, NULL);
        if (ret)
-               bch_err(c, "error %i removing dev alloc info", ret);
+               bch_err(c, "error removing dev alloc info: %s", bch2_err_str(ret));
 
        return ret;
 }
@@ -1466,7 +1446,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 
        ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
        if (ret) {
-               bch_err(ca, "Remove failed: error %i dropping data", ret);
+               bch_err(ca, "Remove failed: error dropping data: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -1478,7 +1458,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 
        ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
        if (ret) {
-               bch_err(ca, "Remove failed: error %i flushing journal", ret);
+               bch_err(ca, "Remove failed: error flushing journal: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -1490,7 +1470,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 
        ret = bch2_replicas_gc2(c);
        if (ret) {
-               bch_err(ca, "Remove failed: error %i from replicas gc", ret);
+               bch_err(ca, "Remove failed: error from replicas gc: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -1554,7 +1534,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 
        ret = bch2_read_super(path, &opts, &sb);
        if (ret) {
-               bch_err(c, "device add error: error reading super: %i", ret);
+               bch_err(c, "device add error: error reading super: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -1647,13 +1627,13 @@ have_slot:
 
        ret = bch2_trans_mark_dev_sb(c, ca);
        if (ret) {
-               bch_err(c, "device add error: error marking new superblock: %i", ret);
+               bch_err(c, "device add error: error marking new superblock: %s", bch2_err_str(ret));
                goto err_late;
        }
 
        ret = bch2_fs_freespace_init(c);
        if (ret) {
-               bch_err(c, "device add error: error initializing free space: %i", ret);
+               bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret));
                goto err_late;
        }
 
@@ -1715,8 +1695,8 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
 
        ret = bch2_trans_mark_dev_sb(c, ca);
        if (ret) {
-               bch_err(c, "error bringing %s online: error %i from bch2_trans_mark_dev_sb",
-                       path, ret);
+               bch_err(c, "error bringing %s online: error from bch2_trans_mark_dev_sb: %s",
+                       path, bch2_err_str(ret));
                goto err;
        }
 
@@ -1785,7 +1765,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 
        ret = bch2_dev_buckets_resize(c, ca, nbuckets);
        if (ret) {
-               bch_err(ca, "Resize error: %i", ret);
+               bch_err(ca, "Resize error: %s", bch2_err_str(ret));
                goto err;
        }
 
index 57245caa255f0682f741239ee6ecebb17c885017..56058a56f2a287f812f2b6cf748633f480dae403 100644 (file)
@@ -46,7 +46,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
                bch2_btree_iter_traverse(&iter) ?:
                bch2_trans_update(&trans, &iter, &k.k_i, 0));
        if (ret) {
-               bch_err(c, "update error in test_delete: %i", ret);
+               bch_err(c, "update error in test_delete: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -55,7 +55,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
                bch2_btree_iter_traverse(&iter) ?:
                bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
-               bch_err(c, "delete error (first) in test_delete: %i", ret);
+               bch_err(c, "delete error (first) in test_delete: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -64,7 +64,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
                bch2_btree_iter_traverse(&iter) ?:
                bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
-               bch_err(c, "delete error (second) in test_delete: %i", ret);
+               bch_err(c, "delete error (second) in test_delete: %s", bch2_err_str(ret));
                goto err;
        }
 err:
@@ -92,7 +92,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
                bch2_btree_iter_traverse(&iter) ?:
                bch2_trans_update(&trans, &iter, &k.k_i, 0));
        if (ret) {
-               bch_err(c, "update error in test_delete_written: %i", ret);
+               bch_err(c, "update error in test_delete_written: %s", bch2_err_str(ret));
                goto err;
        }
 
@@ -103,7 +103,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
                bch2_btree_iter_traverse(&iter) ?:
                bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
-               bch_err(c, "delete error in test_delete_written: %i", ret);
+               bch_err(c, "delete error in test_delete_written: %s", bch2_err_str(ret));
                goto err;
        }
 err:
@@ -136,7 +136,7 @@ static int test_iterate(struct bch_fs *c, u64 nr)
                ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i,
                                        NULL, NULL, 0);
                if (ret) {
-                       bch_err(c, "insert error in test_iterate: %i", ret);
+                       bch_err(c, "insert error in test_iterate: %s", bch2_err_str(ret));
                        goto err;
                }
        }
@@ -145,20 +145,30 @@ static int test_iterate(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
-                          SPOS(0, 0, U32_MAX), 0, k, ret) {
-               if (k.k->p.inode)
-                       break;
-
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+                                 SPOS(0, 0, U32_MAX), 0, k, ({
                BUG_ON(k.k->p.offset != i++);
+               0;
+       }));
+       if (ret) {
+               bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+               goto err;
        }
 
        BUG_ON(i != nr);
 
        pr_info("iterating backwards");
 
-       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k))
-               BUG_ON(k.k->p.offset != --i);
+       ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_xattrs,
+                                        SPOS(0, U64_MAX, U32_MAX), 0, k,
+               ({
+                       BUG_ON(k.k->p.offset != --i);
+                       0;
+               }));
+       if (ret) {
+               bch_err(c, "%s(): error iterating backwards: %s", __func__, bch2_err_str(ret));
+               goto err;
+       }
 
        BUG_ON(i);
 err:
@@ -192,7 +202,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
                ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
                                        NULL, NULL, 0);
                if (ret) {
-                       bch_err(c, "insert error in test_iterate_extents: %i", ret);
+                       bch_err(c, "insert error in test_iterate_extents: %s", bch2_err_str(ret));
                        goto err;
                }
        }
@@ -201,19 +211,31 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents,
-                          SPOS(0, 0, U32_MAX), 0, k, ret) {
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
+                                 SPOS(0, 0, U32_MAX), 0, k, ({
                BUG_ON(bkey_start_offset(k.k) != i);
                i = k.k->p.offset;
+               0;
+       }));
+       if (ret) {
+               bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+               goto err;
        }
 
        BUG_ON(i != nr);
 
        pr_info("iterating backwards");
 
-       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) {
-               BUG_ON(k.k->p.offset != i);
-               i = bkey_start_offset(k.k);
+       ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_extents,
+                                        SPOS(0, U64_MAX, U32_MAX), 0, k,
+               ({
+                       BUG_ON(k.k->p.offset != i);
+                       i = bkey_start_offset(k.k);
+                       0;
+               }));
+       if (ret) {
+               bch_err(c, "%s(): error iterating backwards: %s", __func__, bch2_err_str(ret));
+               goto err;
        }
 
        BUG_ON(i);
@@ -247,7 +269,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
                ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i,
                                        NULL, NULL, 0);
                if (ret) {
-                       bch_err(c, "insert error in test_iterate_slots: %i", ret);
+                       bch_err(c, "insert error in test_iterate_slots: %s", bch2_err_str(ret));
                        goto err;
                }
        }
@@ -256,15 +278,16 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
-                          SPOS(0, 0, U32_MAX), 0, k, ret) {
-               if (k.k->p.inode)
-                       break;
-
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+                                 SPOS(0, 0, U32_MAX), 0, k, ({
                BUG_ON(k.k->p.offset != i);
                i += 2;
+               0;
+       }));
+       if (ret) {
+               bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+               goto err;
        }
-       bch2_trans_iter_exit(&trans, &iter);
 
        BUG_ON(i != nr * 2);
 
@@ -272,17 +295,23 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
-                          SPOS(0, 0, U32_MAX),
-                          BTREE_ITER_SLOTS, k, ret) {
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+                                 SPOS(0, 0, U32_MAX),
+                                 BTREE_ITER_SLOTS, k, ({
+               if (i >= nr * 2)
+                       break;
+
                BUG_ON(k.k->p.offset != i);
                BUG_ON(bkey_deleted(k.k) != (i & 1));
 
                i++;
-               if (i == nr * 2)
-                       break;
+               0;
+       }));
+       if (ret < 0) {
+               bch_err(c, "%s(): error iterating forwards by slots: %s", __func__, bch2_err_str(ret));
+               goto err;
        }
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = 0;
 err:
        bch2_trans_exit(&trans);
        return ret;
@@ -313,7 +342,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
                ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
                                        NULL, NULL, 0);
                if (ret) {
-                       bch_err(c, "insert error in test_iterate_slots_extents: %i", ret);
+                       bch_err(c, "insert error in test_iterate_slots_extents: %s", bch2_err_str(ret));
                        goto err;
                }
        }
@@ -322,13 +351,17 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents,
-                          SPOS(0, 0, U32_MAX), 0, k, ret) {
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
+                                 SPOS(0, 0, U32_MAX), 0, k, ({
                BUG_ON(bkey_start_offset(k.k) != i + 8);
                BUG_ON(k.k->size != 8);
                i += 16;
+               0;
+       }));
+       if (ret) {
+               bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+               goto err;
        }
-       bch2_trans_iter_exit(&trans, &iter);
 
        BUG_ON(i != nr);
 
@@ -336,19 +369,23 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
 
        i = 0;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents,
-                          SPOS(0, 0, U32_MAX),
-                          BTREE_ITER_SLOTS, k, ret) {
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
+                                SPOS(0, 0, U32_MAX),
+                                BTREE_ITER_SLOTS, k, ({
+               if (i == nr)
+                       break;
                BUG_ON(bkey_deleted(k.k) != !(i % 16));
 
                BUG_ON(bkey_start_offset(k.k) != i);
                BUG_ON(k.k->size != 8);
                i = k.k->p.offset;
-
-               if (i == nr)
-                       break;
+               0;
+       }));
+       if (ret) {
+               bch_err(c, "%s(): error iterating forwards by slots: %s", __func__, bch2_err_str(ret));
+               goto err;
        }
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = 0;
 err:
        bch2_trans_exit(&trans);
        return 0;
@@ -368,10 +405,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
                             SPOS(0, 0, U32_MAX), 0);
 
-       k = bch2_btree_iter_peek(&iter);
+       lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
        BUG_ON(k.k);
 
-       k = bch2_btree_iter_peek(&iter);
+       lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
        BUG_ON(k.k);
 
        bch2_trans_iter_exit(&trans, &iter);
@@ -389,10 +426,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr)
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
                             SPOS(0, 0, U32_MAX), 0);
 
-       k = bch2_btree_iter_peek(&iter);
+       lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
        BUG_ON(k.k);
 
-       k = bch2_btree_iter_peek(&iter);
+       lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
        BUG_ON(k.k);
 
        bch2_trans_iter_exit(&trans, &iter);
@@ -419,7 +456,7 @@ static int insert_test_extent(struct bch_fs *c,
        ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
                                NULL, NULL, 0);
        if (ret)
-               bch_err(c, "insert error in insert_test_extent: %i", ret);
+               bch_err(c, "insert error in insert_test_extent: %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -482,7 +519,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
        bch2_trans_init(&trans, c, 0, 0);
        bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
                             SPOS(0, 0, snapid_lo), 0);
-       k = bch2_btree_iter_peek(&iter);
+       lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
 
        BUG_ON(k.k->p.snapshot != U32_MAX);
 
@@ -518,7 +555,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr)
 
        ret = test_snapshot_filter(c, snapids[0], snapids[1]);
        if (ret) {
-               bch_err(c, "err %i from test_snapshot_filter", ret);
+               bch_err(c, "err from test_snapshot_filter: %s", bch2_err_str(ret));
                return ret;
        }
 
@@ -555,7 +592,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
                ret = commit_do(&trans, NULL, NULL, 0,
                        __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));
                if (ret) {
-                       bch_err(c, "error in rand_insert: %i", ret);
+                       bch_err(c, "error in rand_insert: %s", bch2_err_str(ret));
                        break;
                }
        }
@@ -591,7 +628,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr)
                        __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[6].k_i) ?:
                        __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[7].k_i));
                if (ret) {
-                       bch_err(c, "error in rand_insert_multi: %i", ret);
+                       bch_err(c, "error in rand_insert_multi: %s", bch2_err_str(ret));
                        break;
                }
        }
@@ -615,10 +652,10 @@ static int rand_lookup(struct bch_fs *c, u64 nr)
        for (i = 0; i < nr; i++) {
                bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX));
 
-               k = bch2_btree_iter_peek(&iter);
+               lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
                ret = bkey_err(k);
                if (ret) {
-                       bch_err(c, "error in rand_lookup: %i", ret);
+                       bch_err(c, "error in rand_lookup: %s", bch2_err_str(ret));
                        break;
                }
        }
@@ -638,10 +675,10 @@ static int rand_mixed_trans(struct btree_trans *trans,
 
        bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX));
 
-       k = bch2_btree_iter_peek(iter);
+       lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(iter)));
        ret = bkey_err(k);
-       if (ret && ret != -EINTR)
-               bch_err(trans->c, "lookup error in rand_mixed: %i", ret);
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               bch_err(trans->c, "lookup error in rand_mixed: %s", bch2_err_str(ret));
        if (ret)
                return ret;
 
@@ -671,7 +708,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr)
                ret = commit_do(&trans, NULL, NULL, 0,
                        rand_mixed_trans(&trans, &iter, &cookie, i, rand));
                if (ret) {
-                       bch_err(c, "update error in rand_mixed: %i", ret);
+                       bch_err(c, "update error in rand_mixed: %s", bch2_err_str(ret));
                        break;
                }
        }
@@ -689,7 +726,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
 
        bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
                             BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek(&iter);
+       lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -717,7 +754,7 @@ static int rand_delete(struct bch_fs *c, u64 nr)
                ret = commit_do(&trans, NULL, NULL, 0,
                        __do_delete(&trans, pos));
                if (ret) {
-                       bch_err(c, "error in rand_delete: %i", ret);
+                       bch_err(c, "error in rand_delete: %s", bch2_err_str(ret));
                        break;
                }
        }
@@ -733,28 +770,23 @@ static int seq_insert(struct bch_fs *c, u64 nr)
        struct bkey_s_c k;
        struct bkey_i_cookie insert;
        int ret = 0;
-       u64 i = 0;
 
        bkey_cookie_init(&insert.k_i);
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX),
-                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               insert.k.p = iter.pos;
-
-               ret = commit_do(&trans, NULL, NULL, 0,
-                       bch2_btree_iter_traverse(&iter) ?:
-                       bch2_trans_update(&trans, &iter, &insert.k_i, 0));
-               if (ret) {
-                       bch_err(c, "error in seq_insert: %i", ret);
-                       break;
-               }
-
-               if (++i == nr)
-                       break;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
+                                       SPOS(0, 0, U32_MAX),
+                                       BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k,
+                                       NULL, NULL, 0,
+               ({
+                       if (iter.pos.offset >= nr)
+                               break;
+                       insert.k.p = iter.pos;
+                       bch2_trans_update(&trans, &iter, &insert.k_i, 0);
+               }));
+       if (ret)
+               bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
 
        bch2_trans_exit(&trans);
        return ret;
@@ -769,10 +801,11 @@ static int seq_lookup(struct bch_fs *c, u64 nr)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
-                          SPOS(0, 0, U32_MAX), 0, k, ret)
-               ;
-       bch2_trans_iter_exit(&trans, &iter);
+       ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+                                 SPOS(0, 0, U32_MAX), 0, k,
+               0);
+       if (ret)
+               bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
 
        bch2_trans_exit(&trans);
        return ret;
@@ -787,22 +820,18 @@ static int seq_overwrite(struct bch_fs *c, u64 nr)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
-                          SPOS(0, 0, U32_MAX),
-                          BTREE_ITER_INTENT, k, ret) {
-               struct bkey_i_cookie u;
-
-               bkey_reassemble(&u.k_i, k);
+       ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
+                                       SPOS(0, 0, U32_MAX),
+                                       BTREE_ITER_INTENT, k,
+                                       NULL, NULL, 0,
+               ({
+                       struct bkey_i_cookie u;
 
-               ret = commit_do(&trans, NULL, NULL, 0,
-                       bch2_btree_iter_traverse(&iter) ?:
-                       bch2_trans_update(&trans, &iter, &u.k_i, 0));
-               if (ret) {
-                       bch_err(c, "error in seq_overwrite: %i", ret);
-                       break;
-               }
-       }
-       bch2_trans_iter_exit(&trans, &iter);
+                       bkey_reassemble(&u.k_i, k);
+                       bch2_trans_update(&trans, &iter, &u.k_i, 0);
+               }));
+       if (ret)
+               bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
 
        bch2_trans_exit(&trans);
        return ret;
@@ -816,7 +845,7 @@ static int seq_delete(struct bch_fs *c, u64 nr)
                                      SPOS(0, 0, U32_MAX), SPOS_MAX,
                                      0, NULL);
        if (ret)
-               bch_err(c, "error in seq_delete: %i", ret);
+               bch_err(c, "error in seq_delete: %s", bch2_err_str(ret));
        return ret;
 }
 
@@ -853,7 +882,7 @@ static int btree_perf_test_thread(void *data)
 
        ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads));
        if (ret) {
-               bch_err(j->c, "%ps: error %i", j->fn, ret);
+               bch_err(j->c, "%ps: error %s", j->fn, bch2_err_str(ret));
                j->ret = ret;
        }
 
index 59e8dfa3d24520a2f57e5744efd3c4c456dab3a8..70573981b87dde70cd1267f2d0f539e5874c196a 100644 (file)
@@ -2,11 +2,13 @@
 #include "bcachefs.h"
 #include "alloc_types.h"
 #include "buckets.h"
-#include "btree_types.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
 #include "keylist.h"
+#include "opts.h"
 
 #include <linux/blktrace_api.h>
-#include "keylist.h"
+#include <linux/six.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/bcachefs.h>
index 8ef4b5915c374ce3be5dc0dc1f15687832ef5127..ee2c7d9e7050082b432ebdc81c1a80e39fd904ff 100644 (file)
@@ -376,31 +376,37 @@ void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
        u64 q, last_q = 0;
        int i;
 
-       prt_printf(out, "count:\t\t%llu\n",
+       prt_printf(out, "count:\t\t%llu",
                         stats->count);
-       prt_printf(out, "rate:\t\t%llu/sec\n",
+       prt_newline(out);
+       prt_printf(out, "rate:\t\t%llu/sec",
               freq ?  div64_u64(NSEC_PER_SEC, freq) : 0);
+       prt_newline(out);
 
        prt_printf(out, "frequency:\t");
        pr_time_units(out, freq);
 
-       prt_printf(out, "\navg duration:\t");
+       prt_newline(out);
+       prt_printf(out, "avg duration:\t");
        pr_time_units(out, stats->average_duration);
 
-       prt_printf(out, "\nmax duration:\t");
+       prt_newline(out);
+       prt_printf(out, "max duration:\t");
        pr_time_units(out, stats->max_duration);
 
        i = eytzinger0_first(NR_QUANTILES);
        u = pick_time_units(stats->quantiles.entries[i].m);
 
-       prt_printf(out, "\nquantiles (%s):\t", u->name);
+       prt_newline(out);
+       prt_printf(out, "quantiles (%s):\t", u->name);
        eytzinger0_for_each(i, NR_QUANTILES) {
                bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
 
                q = max(stats->quantiles.entries[i].m, last_q);
-               prt_printf(out, "%llu%s",
-                      div_u64(q, u->nsecs),
-                      is_last ? "\n" : " ");
+               prt_printf(out, "%llu ",
+                      div_u64(q, u->nsecs));
+               if (is_last)
+                       prt_newline(out);
                last_q = q;
        }
 }
index 1236127162e509458cc6f5fcba5c3eeeff049d04..186ffab542d53f882ab72bdbae638cea95664060 100644 (file)
@@ -344,7 +344,7 @@ retry:
        offset = iter.pos.offset;
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
index 5e19c8ad4f42b33bd8c198c195e841513a22a9d1..93a791c4bb98a205b149ff888d6883e7eaa3eeb0 100644 (file)
@@ -293,7 +293,7 @@ void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
        atomic_set(&bio->__bi_remaining, 1);
 }
 
-struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+struct bio *bio_kmalloc(unsigned int nr_iovecs, gfp_t gfp_mask)
 {
        struct bio *bio;
 
index bd9dc9cc5e62bfe24cf313207e028028d9a17fc3..9b3ea93f8c19aecdaa0a6c9bb3e12eda2def00bf 100644 (file)
@@ -113,7 +113,7 @@ int submit_bio_wait(struct bio *bio)
 
 int blkdev_issue_discard(struct block_device *bdev,
                         sector_t sector, sector_t nr_sects,
-                        gfp_t gfp_mask, unsigned long flags)
+                        gfp_t gfp_mask)
 {
        return 0;
 }
index fca1208720b67dfd7e96915679572737dd626dba..5b2d92c6e91c382a5548fe5b88e84dbbe1a9a40c 100644 (file)
@@ -757,3 +757,23 @@ void six_lock_pcpu_alloc(struct six_lock *lock)
 #endif
 }
 EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);
+
+/*
+ * Returns lock held counts, for both read and intent
+ */
+struct six_lock_count six_lock_counts(struct six_lock *lock)
+{
+       struct six_lock_count ret = { 0, lock->state.intent_lock };
+
+       if (!lock->readers)
+               ret.read += lock->state.read_lock;
+       else {
+               int cpu;
+
+               for_each_possible_cpu(cpu)
+                       ret.read += *per_cpu_ptr(lock->readers, cpu);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(six_lock_counts);