]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 84505cfd37 bcachefs: Go RW before check_alloc_info()
authorKent Overstreet <kent.overstreet@linux.dev>
Tue, 13 Dec 2022 23:27:31 +0000 (18:27 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 13 Dec 2022 23:27:31 +0000 (18:27 -0500)
23 files changed:
.bcachefs_revision
include/trace/events/bcachefs.h
libbcachefs/alloc_background.c
libbcachefs/alloc_foreground.c
libbcachefs/backpointers.c
libbcachefs/bcachefs.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_locking.c
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/data_update.c
libbcachefs/ec.c
libbcachefs/errcode.h
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/move.c
libbcachefs/movinggc.c
libbcachefs/recovery.c
libbcachefs/reflink.c
libbcachefs/super-io.c
libbcachefs/sysfs.c

index d55973ef6bbd97618ffad7fb6cd79660bb6f4007..eacbdc34d58126d62d7d8d1750bc44c5202c4e0a 100644 (file)
@@ -1 +1 @@
-ed2a5f4260b65f3d613dcd76a97ac091bc88a126
+84505cfd37957accbff6fa7e4477bfd9c4c23ba6
index e055d37c81a8a95b37178a0fb63c8b80cf049366..f699146aad2640795aef73c6cad912cc13beb7f8 100644 (file)
@@ -564,6 +564,7 @@ TRACE_EVENT(bucket_alloc_fail,
                __field(u64,                    need_journal_commit     )
                __field(u64,                    nouse                   )
                __field(bool,                   nonblocking             )
+               __field(u64,                    nocow                   )
                __array(char,                   err,    32              )
        ),
 
@@ -579,10 +580,11 @@ TRACE_EVENT(bucket_alloc_fail,
                __entry->need_journal_commit = s->skipped_need_journal_commit;
                __entry->nouse          = s->skipped_nouse;
                __entry->nonblocking    = nonblocking;
+               __entry->nocow          = s->skipped_nocow;
                strscpy(__entry->err, err, sizeof(__entry->err));
        ),
 
-       TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u err %s",
+       TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u nocow %llu err %s",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->reserve,
                  __entry->free,
@@ -594,6 +596,7 @@ TRACE_EVENT(bucket_alloc_fail,
                  __entry->need_journal_commit,
                  __entry->nouse,
                  __entry->nonblocking,
+                 __entry->nocow,
                  __entry->err)
 );
 
@@ -702,6 +705,37 @@ TRACE_EVENT(move_data,
                  __entry->sectors_moved, __entry->keys_moved)
 );
 
+TRACE_EVENT(evacuate_bucket,
+       TP_PROTO(struct bch_fs *c, struct bpos *bucket,
+                unsigned sectors, unsigned bucket_size,
+                int ret),
+       TP_ARGS(c, bucket, sectors, bucket_size, ret),
+
+       TP_STRUCT__entry(
+               __field(dev_t,          dev             )
+               __field(u64,            member          )
+               __field(u64,            bucket          )
+               __field(u32,            sectors         )
+               __field(u32,            bucket_size     )
+               __field(int,            ret             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev                    = c->dev;
+               __entry->member                 = bucket->inode;
+               __entry->bucket                 = bucket->offset;
+               __entry->sectors                = sectors;
+               __entry->bucket_size            = bucket_size;
+               __entry->ret                    = ret;
+       ),
+
+       TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->member, __entry->bucket,
+                 __entry->sectors, __entry->bucket_size,
+                 __entry->ret)
+);
+
 TRACE_EVENT(copygc,
        TP_PROTO(struct bch_fs *c,
                 u64 sectors_moved, u64 sectors_not_moved,
@@ -1096,75 +1130,6 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
                  __entry->new_u64s)
 );
 
-DECLARE_EVENT_CLASS(node_lock_fail,
-       TP_PROTO(unsigned long trans_ip,
-                unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos,
-                unsigned level, u32 iter_seq, struct btree *b, u32 node_seq),
-       TP_ARGS(trans_ip, caller_ip, btree_id, pos,
-               level, iter_seq, b, node_seq),
-
-       TP_STRUCT__entry(
-               __field(unsigned long,          trans_ip        )
-               __field(unsigned long,          caller_ip       )
-               __field(u8,                     btree_id        )
-               __field(u64,                    pos_inode       )
-               __field(u64,                    pos_offset      )
-               __field(u32,                    pos_snapshot    )
-               __field(u32,                    level           )
-               __field(u32,                    iter_seq        )
-               __array(char,                   node, 24        )
-               __field(u32,                    node_seq        )
-       ),
-
-       TP_fast_assign(
-               __entry->trans_ip               = trans_ip;
-               __entry->caller_ip              = caller_ip;
-               __entry->btree_id               = btree_id;
-               __entry->pos_inode              = pos->inode;
-               __entry->pos_offset             = pos->offset;
-               __entry->pos_snapshot           = pos->snapshot;
-               __entry->level                  = level;
-               __entry->iter_seq               = iter_seq;
-               if (IS_ERR(b))
-                       strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node));
-               else
-                       scnprintf(__entry->node, sizeof(__entry->node), "%px", b);
-               __entry->node_seq               = node_seq;
-       ),
-
-       TP_printk("%ps %pS btree %u pos %llu:%llu:%u level %u iter seq %u node %s node seq %u",
-                 (void *) __entry->trans_ip,
-                 (void *) __entry->caller_ip,
-                 __entry->btree_id,
-                 __entry->pos_inode,
-                 __entry->pos_offset,
-                 __entry->pos_snapshot,
-                 __entry->level, __entry->iter_seq,
-                 __entry->node, __entry->node_seq)
-);
-
-DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
-       TP_PROTO(unsigned long trans_ip,
-                unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos,
-                unsigned level, u32 iter_seq, struct btree *b, u32 node_seq),
-       TP_ARGS(trans_ip, caller_ip, btree_id, pos,
-               level, iter_seq, b, node_seq)
-);
-
-DEFINE_EVENT(node_lock_fail, node_relock_fail,
-       TP_PROTO(unsigned long trans_ip,
-                unsigned long caller_ip,
-                enum btree_id btree_id,
-                struct bpos *pos,
-                unsigned level, u32 iter_seq, struct btree *b, u32 node_seq),
-       TP_ARGS(trans_ip, caller_ip, btree_id, pos,
-               level, iter_seq, b, node_seq)
-);
-
 #endif /* _TRACE_BCACHE_H */
 
 /* This part must be outside protection */
index 0939200e6d1a723edbeec55d0f6503cb78c00e84..ae6ae78a2618032949db2016d7bbbdb9e31af277 100644 (file)
@@ -800,6 +800,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
                goto err;
 
        if (ca->mi.freespace_initialized &&
+           test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) &&
            bch2_trans_inconsistent_on(old.k->type != old_type, trans,
                        "incorrect key when %s %s btree (got %s should be %s)\n"
                        "  for %s",
@@ -1649,21 +1650,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
                goto write;
        }
 
-       if (bch2_trans_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, trans,
-                       "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
-                       "%s",
-                       a->v.journal_seq,
-                       c->journal.flushed_seq_ondisk,
-                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-               ret = -EIO;
+       if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+               if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+                       bch2_trans_inconsistent(trans,
+                               "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
+                               "%s",
+                               a->v.journal_seq,
+                               c->journal.flushed_seq_ondisk,
+                               (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+                       ret = -EIO;
+               }
                goto out;
        }
 
-       if (bch2_trans_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, trans,
-                       "bucket incorrectly set in need_discard btree\n"
-                       "%s",
-                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-               ret = -EIO;
+       if (a->v.data_type != BCH_DATA_need_discard) {
+               if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+                       bch2_trans_inconsistent(trans,
+                               "bucket incorrectly set in need_discard btree\n"
+                               "%s",
+                               (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+                       ret = -EIO;
+               }
+
                goto out;
        }
 
index 46f215c8bced21fec6d3614ef69a6db1c411f483..2010a9af0eb2de8a36eb74633c79dea18f17ac25 100644 (file)
@@ -312,28 +312,34 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
 
        a = bch2_alloc_to_v4(k, &a_convert);
 
-       if (genbits != (alloc_freespace_genbits(*a) >> 56)) {
-               prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
-                      "  freespace key ",
-                      genbits, alloc_freespace_genbits(*a) >> 56);
+       if (a->data_type != BCH_DATA_free) {
+               if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+                       ob = NULL;
+                       goto err;
+               }
+
+               prt_printf(&buf, "non free bucket in freespace btree\n"
+                      "  freespace key ");
                bch2_bkey_val_to_text(&buf, c, freespace_k);
                prt_printf(&buf, "\n  ");
                bch2_bkey_val_to_text(&buf, c, k);
                bch2_trans_inconsistent(trans, "%s", buf.buf);
                ob = ERR_PTR(-EIO);
                goto err;
-
        }
 
-       if (a->data_type != BCH_DATA_free) {
-               prt_printf(&buf, "non free bucket in freespace btree\n"
-                      "  freespace key ");
+       if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
+           test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+               prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
+                      "  freespace key ",
+                      genbits, alloc_freespace_genbits(*a) >> 56);
                bch2_bkey_val_to_text(&buf, c, freespace_k);
                prt_printf(&buf, "\n  ");
                bch2_bkey_val_to_text(&buf, c, k);
                bch2_trans_inconsistent(trans, "%s", buf.buf);
                ob = ERR_PTR(-EIO);
                goto err;
+
        }
 
        if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
@@ -506,8 +512,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct open_bucket *ob = NULL;
-       bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
-       u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
+       bool freespace = READ_ONCE(ca->mi.freespace_initialized);
+       u64 start = freespace ? 0 : ca->bucket_alloc_trans_early_cursor;
        u64 avail;
        struct bucket_alloc_state s = { .cur_bucket = start };
        bool waiting = false;
@@ -546,20 +552,25 @@ again:
                if (ob)
                        return ob;
        }
-
-       ob = likely(ca->mi.freespace_initialized)
+alloc:
+       ob = likely(freespace)
                ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
                : bch2_bucket_alloc_early(trans, ca, reserve, &s, cl);
 
        if (s.skipped_need_journal_commit * 2 > avail)
                bch2_journal_flush_async(&c->journal, NULL);
 
-       if (!ob && !freespace_initialized && start) {
+       if (!ob && !freespace && start) {
                start = s.cur_bucket = 0;
-               goto again;
+               goto alloc;
        }
 
-       if (!freespace_initialized)
+       if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+               freespace = false;
+               goto alloc;
+       }
+
+       if (!freespace)
                ca->bucket_alloc_trans_early_cursor = s.cur_bucket;
 err:
        if (!ob)
@@ -1224,12 +1235,9 @@ err:
        if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
            bch2_err_matches(ret, BCH_ERR_freelist_empty))
                return cl
-                       ? -EAGAIN
+                       ? -BCH_ERR_bucket_alloc_blocked
                        : -BCH_ERR_ENOSPC_bucket_alloc;
 
-       if (bch2_err_matches(ret, BCH_ERR_insufficient_devices))
-               return -EROFS;
-
        return ret;
 }
 
index 7d4367f93b5c337de1018b3db83c323e373bd3fb..405823d1cfabc69499d853ccff87fca78d9df66a 100644 (file)
@@ -242,6 +242,9 @@ btree:
            memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
                struct printbuf buf = PRINTBUF;
 
+               if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
+                       goto err;
+
                prt_printf(&buf, "backpointer not found when deleting");
                prt_newline(&buf);
                printbuf_indent_add(&buf, 2);
@@ -262,10 +265,8 @@ btree:
                bch2_bkey_val_to_text(&buf, c, orig_k);
 
                bch_err(c, "%s", buf.buf);
-               if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
-                       bch2_inconsistent_error(c);
-                       ret = -EIO;
-               }
+               bch2_inconsistent_error(c);
+               ret = -EIO;
                printbuf_exit(&buf);
                goto err;
        }
index 7f479cdc7069f1409c1a87bbc668f70390ddbb12..71f51459872d28c16c95c4e233eda5a870fcb983 100644 (file)
@@ -549,6 +549,7 @@ enum {
        /* fsck passes: */
        BCH_FS_TOPOLOGY_REPAIR_DONE,
        BCH_FS_INITIAL_GC_DONE,         /* kill when we enumerate fsck passes */
+       BCH_FS_CHECK_ALLOC_DONE,
        BCH_FS_CHECK_LRUS_DONE,
        BCH_FS_CHECK_BACKPOINTERS_DONE,
        BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
index d24827fb0164b37663f76e1fadd36e03b8721149..b5e78042c1ff9bb46e1294e242e482be6804beab 100644 (file)
@@ -577,7 +577,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
        }
 
        trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
-       return -EAGAIN;
+       return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
 
 success:
        trace_and_count(c, btree_cache_cannibalize_lock, c);
@@ -952,8 +952,6 @@ retry:
  * bch_btree_node_get - find a btree node in the cache and lock it, reading it
  * in from disk if necessary.
  *
- * If IO is necessary and running under generic_make_request, returns -EAGAIN.
- *
  * The btree node will have either a read or a write lock held, depending on
  * the @write parameter.
  */
index 055987a2a6a6a06804f06950084deb0901622674..6b7353c9206eef54210018e88be39729ce888a25 100644 (file)
@@ -1285,8 +1285,7 @@ fsck_err:
        return ret;
 }
 
-static int bch2_gc_start(struct bch_fs *c,
-                        bool metadata_only)
+static int bch2_gc_start(struct bch_fs *c)
 {
        struct bch_dev *ca = NULL;
        unsigned i;
@@ -1301,7 +1300,6 @@ static int bch2_gc_start(struct bch_fs *c,
        }
 
        for_each_member_device(ca, c, i) {
-               BUG_ON(ca->buckets_gc);
                BUG_ON(ca->usage_gc);
 
                ca->usage_gc = alloc_percpu(struct bch_dev_usage);
@@ -1318,6 +1316,22 @@ static int bch2_gc_start(struct bch_fs *c,
        return 0;
 }
 
+static int bch2_gc_reset(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i;
+
+       for_each_member_device(ca, c, i) {
+               free_percpu(ca->usage_gc);
+               ca->usage_gc = NULL;
+       }
+
+       free_percpu(c->usage_gc);
+       c->usage_gc = NULL;
+
+       return bch2_gc_start(c);
+}
+
 /* returns true if not equal */
 static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
                                     struct bch_alloc_v4 r)
@@ -1763,7 +1777,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
 
        bch2_btree_interior_updates_flush(c);
 
-       ret   = bch2_gc_start(c, metadata_only) ?:
+       ret   = bch2_gc_start(c) ?:
                bch2_gc_alloc_start(c, metadata_only) ?:
                bch2_gc_reflink_start(c, metadata_only);
        if (ret)
@@ -1824,6 +1838,9 @@ again:
                bch2_gc_stripes_reset(c, metadata_only);
                bch2_gc_alloc_reset(c, metadata_only);
                bch2_gc_reflink_reset(c, metadata_only);
+               ret = bch2_gc_reset(c);
+               if (ret)
+                       goto out;
 
                /* flush fsck errors, reset counters */
                bch2_flush_fsck_errs(c);
@@ -1975,7 +1992,7 @@ int bch2_gc_gens(struct bch_fs *c)
                                        NULL, NULL,
                                        BTREE_INSERT_NOFAIL,
                                gc_btree_gens_key(&trans, &iter, k));
-                       if (ret && ret != -EROFS)
+                       if (ret && !bch2_err_matches(ret, EROFS))
                                bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
                        if (ret)
                                goto err;
@@ -1988,7 +2005,7 @@ int bch2_gc_gens(struct bch_fs *c)
                        NULL, NULL,
                        BTREE_INSERT_NOFAIL,
                bch2_alloc_write_oldest_gen(&trans, &iter, k));
-       if (ret && ret != -EROFS)
+       if (ret && !bch2_err_matches(ret, EROFS))
                bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
        if (ret)
                goto err;
index d3326e980db998f933e846d21b11ea88f925d4a2..dce2dc0cc0c555a7e34873d05d6b0cd4c14cb191 100644 (file)
@@ -359,18 +359,8 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
 
                if (!(upgrade
                      ? bch2_btree_node_upgrade(trans, path, l)
-                     : bch2_btree_node_relock(trans, path, l))) {
-                       (upgrade
-                        ? trace_node_upgrade_fail
-                        : trace_node_relock_fail)(0, _RET_IP_,
-                                       path->btree_id, &path->pos,
-                                       l, path->l[l].lock_seq,
-                                       path->l[l].b,
-                                       is_btree_node(path, l)
-                                       ? path->l[l].b->c.lock.state.seq
-                                       : 0);
+                     : bch2_btree_node_relock(trans, path, l)))
                        fail_idx = l;
-               }
 
                l++;
        } while (l < path->locks_want);
index a4476f1662ea738e0e0e01c0cf5d61b0aea036eb..a49e7b6b416d95355dc3a929a6c9e6b4f00e03b3 100644 (file)
@@ -1162,7 +1162,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
 
                        bch2_trans_unlock(trans);
                        closure_sync(&cl);
-               } while (ret == -EAGAIN);
+               } while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
        }
 
        if (ret) {
index 459d9a4441d2dcf8d007553c99920919e95aa99d..75d8a55352127482a42eccfcf604768da7f10f62 100644 (file)
@@ -316,15 +316,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
 static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
                                             unsigned flags)
 {
-       struct bch_fs *c = trans->c;
-       int ret;
-
-       ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
-                                  trans->journal_u64s,
-                                  flags|
-                                  (trans->flags & JOURNAL_WATERMARK_MASK));
-
-       return ret == -EAGAIN ? -BCH_ERR_btree_insert_need_journal_res : ret;
+       return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
+                                   trans->journal_u64s,
+                                   flags|
+                                   (trans->flags & JOURNAL_WATERMARK_MASK));
 }
 
 #define JSET_ENTRY_LOG_U64s            4
@@ -643,21 +638,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
                trans->journal_res.seq = c->journal.replay_journal_seq;
        }
 
-       if (unlikely(trans->extra_journal_entries.nr)) {
-               memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
-                                 trans->extra_journal_entries.data,
-                                 trans->extra_journal_entries.nr);
-
-               trans->journal_res.offset       += trans->extra_journal_entries.nr;
-               trans->journal_res.u64s         -= trans->extra_journal_entries.nr;
-       }
-
        /*
         * Not allowed to fail after we've gotten our journal reservation - we
         * have to use it:
         */
 
-       if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+           !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
                if (bch2_journal_seq_verify)
                        trans_for_each_update(trans, i)
                                i->k->k.version.lo = trans->journal_res.seq;
@@ -683,6 +670,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
                        return ret;
        }
 
+       if (unlikely(trans->extra_journal_entries.nr)) {
+               memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
+                                 trans->extra_journal_entries.data,
+                                 trans->extra_journal_entries.nr);
+
+               trans->journal_res.offset       += trans->extra_journal_entries.nr;
+               trans->journal_res.u64s         -= trans->extra_journal_entries.nr;
+       }
+
        if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
                trans_for_each_update(trans, i) {
                        struct journal *j = &c->journal;
@@ -841,7 +837,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                        &trans->journal_preres, trans->journal_preres_u64s,
                        JOURNAL_RES_GET_NONBLOCK|
                        (trans->flags & JOURNAL_WATERMARK_MASK));
-       if (unlikely(ret == -EAGAIN))
+       if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
                ret = bch2_trans_journal_preres_get_cold(trans,
                                                trans->journal_preres_u64s, trace_ip);
        if (unlikely(ret))
@@ -913,7 +909,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (ret)
                        trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip);
                break;
-       case -BCH_ERR_btree_insert_need_journal_res:
+       case -BCH_ERR_journal_res_get_blocked:
                bch2_trans_unlock(trans);
 
                if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
@@ -967,7 +963,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
 
        if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)) ||
            test_bit(BCH_FS_STARTED, &c->flags))
-               return -EROFS;
+               return -BCH_ERR_erofs_trans_commit;
 
        bch2_trans_unlock(trans);
 
index 3edd7b779f6e08c9890284d3970e3ec56797782d..d4cdfb48ab8fd19f8bd120f4e10b311ea1e19314 100644 (file)
@@ -349,7 +349,7 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
                                update->op.nr_replicas,
                                update->op.alloc_reserve,
                                0, &cl, &wp);
-               if (ret == -EAGAIN) {
+               if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
                        bch2_trans_unlock(trans);
                        closure_sync(&cl);
                        continue;
@@ -459,7 +459,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
                                ? 0
                                : BCH_DISK_RESERVATION_NOFAIL);
                if (ret)
-                       return ret;
+                       goto err;
        }
 
        m->op.nr_replicas = m->op.nr_replicas_required =
@@ -471,6 +471,14 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
        if (bkey_extent_is_unwritten(k))
                return -BCH_ERR_unwritten_extent_update;
        return 0;
+err:
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+               bch2_bucket_nocow_unlock(&c->nocow_locks,
+                                      PTR_BUCKET_POS(c, &p.ptr), 0);
+
+       bch2_bkey_buf_exit(&m->k, c);
+       bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
+       return ret;
 }
 
 void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
index 53f0d8209a271e993792ceba7dd3bca27a5db49c..c234c8d5d6a311bbcda9848e98970ca59b6ba309 100644 (file)
@@ -942,7 +942,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        closure_sync(&s->iodone);
 
        if (s->err) {
-               if (s->err != -EROFS)
+               if (!bch2_err_matches(s->err, EROFS))
                        bch_err(c, "error creating stripe: error writing data buckets");
                goto err;
        }
index 1ab4c4c564eba7d377c290b0e43e2fb4bdce100d..543cdb553188ab5a003d5f3157b10927405afba2 100644 (file)
@@ -20,7 +20,6 @@
        x(0,                            open_buckets_empty)                     \
        x(0,                            freelist_empty)                         \
        x(BCH_ERR_freelist_empty,       no_buckets_found)                       \
-       x(0,                            insufficient_devices)                   \
        x(0,                            transaction_restart)                    \
        x(BCH_ERR_transaction_restart,  transaction_restart_fault_inject)       \
        x(BCH_ERR_transaction_restart,  transaction_restart_relock)             \
        x(EINVAL,                       device_already_online)                  \
        x(EINVAL,                       insufficient_devices_to_start)          \
        x(EINVAL,                       invalid)                                \
+       x(EROFS,                        erofs_trans_commit)                     \
+       x(EROFS,                        erofs_no_writes)                        \
+       x(EROFS,                        erofs_journal_err)                      \
+       x(EROFS,                        erofs_sb_err)                           \
+       x(EROFS,                        insufficient_devices)                   \
+       x(0,                            operation_blocked)                      \
+       x(BCH_ERR_operation_blocked,    btree_cache_cannibalize_lock_blocked)   \
+       x(BCH_ERR_operation_blocked,    journal_res_get_blocked)                \
+       x(BCH_ERR_operation_blocked,    journal_preres_get_blocked)             \
+       x(BCH_ERR_operation_blocked,    bucket_alloc_blocked)                   \
        x(BCH_ERR_invalid,              invalid_sb)                             \
        x(BCH_ERR_invalid_sb,           invalid_sb_magic)                       \
        x(BCH_ERR_invalid_sb,           invalid_sb_version)                     \
index f0fca861b90122f7882af40ac9ba86d4b1e9a170..706f18bc4238373a758998b30064ad7d702a478e 100644 (file)
@@ -427,7 +427,7 @@ retry:
                                opts.data_replicas,
                                opts.data_replicas,
                                RESERVE_none, 0, &cl, &wp);
-               if (ret == -EAGAIN) {
+               if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
                        bch2_trans_unlock(trans);
                        closure_sync(&cl);
                        goto retry;
@@ -1627,7 +1627,7 @@ again:
                                              BCH_WRITE_ONLY_SPECIFIED_DEVS))
                                ? NULL : &op->cl, &wp));
                if (unlikely(ret)) {
-                       if (ret == -EAGAIN)
+                       if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
                                break;
 
                        goto err;
@@ -1775,7 +1775,7 @@ void bch2_write(struct closure *cl)
 
        if (c->opts.nochanges ||
            !percpu_ref_tryget_live(&c->writes)) {
-               op->error = -EROFS;
+               op->error = -BCH_ERR_erofs_no_writes;
                goto err;
        }
 
index 95c29229d3fe658c6ff9e58361bdf0b2c125e50a..e35b685a6770df57484f8aceeb7f9883793735cd 100644 (file)
@@ -199,12 +199,6 @@ static bool journal_entry_close(struct journal *j)
 /*
  * should _only_ called from journal_res_get() - when we actually want a
  * journal reservation - journal entry is open means journal is dirty:
- *
- * returns:
- * 0:          success
- * -ENOSPC:    journal currently full, must invoke reclaim
- * -EAGAIN:    journal blocked, must wait
- * -EROFS:     insufficient rw devices or journal error
  */
 static int journal_entry_open(struct journal *j)
 {
@@ -353,7 +347,7 @@ retry:
                return 0;
 
        if (bch2_journal_error(j))
-               return -EROFS;
+               return -BCH_ERR_erofs_journal_err;
 
        spin_lock(&j->lock);
 
@@ -445,7 +439,9 @@ unlock:
                }
        }
 
-       return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN;
+       return ret == JOURNAL_ERR_insufficient_devices
+               ? -EROFS
+               : -BCH_ERR_journal_res_get_blocked;
 }
 
 /*
@@ -464,7 +460,8 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
        int ret;
 
        closure_wait_event(&j->async_wait,
-                  (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
+                  (ret = __journal_res_get(j, res, flags)) !=
+                  -BCH_ERR_journal_res_get_blocked||
                   (flags & JOURNAL_RES_GET_NONBLOCK));
        return ret;
 }
@@ -815,12 +812,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                } else {
                        ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none,
                                               false, cl);
-                       if (IS_ERR(ob[nr_got])) {
-                               ret = cl
-                                       ? -EAGAIN
-                                       : -BCH_ERR_ENOSPC_bucket_alloc;
+                       ret = PTR_ERR_OR_ZERO(ob[nr_got]);
+                       if (ret)
                                break;
-                       }
 
                        bu[nr_got] = ob[nr_got]->bucket;
                }
@@ -930,7 +924,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
 
        closure_init_stack(&cl);
 
-       while (ja->nr != nr && (ret == 0 || ret == -EAGAIN)) {
+       while (ja->nr != nr && (ret == 0 || ret == -BCH_ERR_bucket_alloc_blocked)) {
                struct disk_reservation disk_res = { 0, 0 };
 
                closure_sync(&cl);
index 51d29a01b7b2c0d2979872a9cbdbc2bfd7b389c9..896a2d7dca3615eab5512c56f023b3b267206673 100644 (file)
@@ -479,7 +479,7 @@ static inline int bch2_journal_preres_get(struct journal *j,
                return 0;
 
        if (flags & JOURNAL_RES_GET_NONBLOCK)
-               return -EAGAIN;
+               return -BCH_ERR_journal_preres_get_blocked;
 
        return __bch2_journal_preres_get(j, res, new_u64s, flags);
 }
index 47b77b3c3e686309e87336b2604b8dafeb290858..b308354aa5e3807971054a5e17b4b603fd8c60d8 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "backpointers.h"
 #include "bkey_buf.h"
@@ -257,7 +258,7 @@ static int bch2_move_extent(struct btree_trans *trans,
        }
 
        if (!percpu_ref_tryget_live(&c->writes))
-               return -EROFS;
+               return -BCH_ERR_erofs_no_writes;
 
        /*
         * Before memory allocations & taking nocow locks in
@@ -661,13 +662,29 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
        struct btree_iter iter;
        struct bkey_buf sk;
        struct bch_backpointer bp;
+       struct bch_alloc_v4 a_convert;
+       const struct bch_alloc_v4 *a;
+       struct bkey_s_c k;
        struct data_update_opts data_opts;
+       unsigned dirty_sectors, bucket_size;
        u64 bp_offset = 0, cur_inum = U64_MAX;
        int ret = 0;
 
        bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc,
+                            bucket, BTREE_ITER_CACHED);
+       ret = lockrestart_do(&trans,
+                       bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+       bch2_trans_iter_exit(&trans, &iter);
+
+       if (!ret) {
+               a = bch2_alloc_to_v4(k, &a_convert);
+               dirty_sectors = a->dirty_sectors;
+               bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
+       }
+
        while (!(ret = move_ratelimit(&trans, ctxt))) {
                bch2_trans_begin(&trans);
 
@@ -765,6 +782,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
                bp_offset++;
        }
 
+       trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, ret);
+
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
                bch2_trans_unlock(&trans);
                move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
index fbc8043e0bf97b261e7ca93442e1fd6a8581e671..f0ab65ffab7373dca455b4bec9528c31d6d0fbe3 100644 (file)
@@ -163,7 +163,7 @@ static int bch2_copygc(struct bch_fs *c)
 
        bch2_moving_ctxt_exit(&ctxt);
 
-       if (ret < 0 && ret != -EROFS)
+       if (ret < 0 && !bch2_err_matches(ret, EROFS))
                bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
 
        trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
index 069aafba48acc8e32b554a6bd87f3f2f0f663a5e..0ad2bb2aa039bdec4426b1d98b92672159bb08b7 100644 (file)
@@ -625,8 +625,8 @@ static int bch2_journal_replay(struct bch_fs *c)
                                     : 0),
                             bch2_journal_replay_key(&trans, k));
                if (ret) {
-                       bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
-                               ret, bch2_btree_ids[k->btree_id], k->level);
+                       bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
+                               bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret));
                        goto err;
                }
        }
@@ -1246,13 +1246,6 @@ use_clean:
 
                set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
 
-               bch_info(c, "checking need_discard and freespace btrees");
-               err = "error checking need_discard and freespace btrees";
-               ret = bch2_check_alloc_info(c);
-               if (ret)
-                       goto err;
-               bch_verbose(c, "done checking need_discard and freespace btrees");
-
                if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
                        err = "error creating root snapshot node";
                        ret = bch2_fs_initialize_subvolumes(c);
@@ -1277,6 +1270,15 @@ use_clean:
                if (c->opts.verbose || !c->sb.clean)
                        bch_info(c, "journal replay done");
 
+               bch_info(c, "checking need_discard and freespace btrees");
+               err = "error checking need_discard and freespace btrees";
+               ret = bch2_check_alloc_info(c);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking need_discard and freespace btrees");
+
+               set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
+
                bch_info(c, "checking lrus");
                err = "error checking lrus";
                ret = bch2_check_lrus(c);
@@ -1316,6 +1318,7 @@ use_clean:
                set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
        } else {
                set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+               set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
                set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
                set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
                set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
index ec672fedbd168911b19594df5815afd668019344..e89a9a1abe9f723d65eb4b52b6974b79fb33d165 100644 (file)
@@ -283,7 +283,7 @@ s64 bch2_remap_range(struct bch_fs *c,
        int ret = 0, ret2 = 0;
 
        if (!percpu_ref_tryget_live(&c->writes))
-               return -EROFS;
+               return -BCH_ERR_erofs_no_writes;
 
        bch2_check_set_feature(c, BCH_FEATURE_reflink);
 
index c4020cb9d6eb1af418a6358ca0504d55884de2a9..738b68b5d35cbfad6460d4bf78f914b0cb383ffd 100644 (file)
@@ -863,7 +863,7 @@ int bch2_write_super(struct bch_fs *c)
                                le64_to_cpu(ca->sb_read_scratch->seq),
                                ca->disk_sb.seq);
                        percpu_ref_put(&ca->io_ref);
-                       ret = -EROFS;
+                       ret = -BCH_ERR_erofs_sb_err;
                        goto out;
                }
 
@@ -873,7 +873,7 @@ int bch2_write_super(struct bch_fs *c)
                                le64_to_cpu(ca->sb_read_scratch->seq),
                                ca->disk_sb.seq);
                        percpu_ref_put(&ca->io_ref);
-                       ret = -EROFS;
+                       ret = -BCH_ERR_erofs_sb_err;
                        goto out;
                }
        }
index 647d018b5ec99e423dde784010cdbf6fd5d57f63..bad3eafd32d28d185d2bed9858624562167f7589 100644 (file)
@@ -194,6 +194,7 @@ read_attribute(btree_cache);
 read_attribute(btree_key_cache);
 read_attribute(stripes_heap);
 read_attribute(open_buckets);
+read_attribute(nocow_lock_table);
 
 read_attribute(internal_uuid);
 
@@ -445,6 +446,22 @@ SHOW(bch2_fs)
        if (attr == &sysfs_data_jobs)
                data_progress_to_text(out, c);
 
+       if (attr == &sysfs_nocow_lock_table) {
+               int i, count = 1;
+               long last, curr = 0;
+
+               last = atomic_long_read(&c->nocow_locks.l[0].v);
+               for (i = 1; i < BUCKET_NOCOW_LOCKS; i++) {
+                       curr = atomic_long_read(&c->nocow_locks.l[i].v);
+                       if (last != curr) {
+                               prt_printf(out, "%li: %d\n", last, count);
+                               count = 1;
+                               last = curr;
+                       } else
+                               count++;
+               }
+               prt_printf(out, "%li: %d\n", last, count);
+}
        return 0;
 }
 
@@ -627,6 +644,7 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_new_stripes,
        &sysfs_stripes_heap,
        &sysfs_open_buckets,
+       &sysfs_nocow_lock_table,
        &sysfs_io_timers_read,
        &sysfs_io_timers_write,