]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/journal_io.c
Update bcachefs sources to 070ec8d07b bcachefs: Snapshot depth, skiplist fields
[bcachefs-tools-debian] / libbcachefs / journal_io.c
index 45b1b839783de66f712aaac1b337cf522e365d84..f861ae2f176a8d0d87467250272b472cdc466c94 100644 (file)
@@ -14,8 +14,7 @@
 #include "journal_reclaim.h"
 #include "journal_seq_blacklist.h"
 #include "replicas.h"
-
-#include <trace/events/bcachefs.h>
+#include "trace.h"
 
 static struct nonce journal_nonce(const struct jset *jset)
 {
@@ -341,7 +340,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
                int ret = journal_validate_key(c, jset, entry,
                                               entry->level,
                                               entry->btree_id,
-                                              k, version, big_endian, write|BKEY_INVALID_FROM_JOURNAL);
+                                              k, version, big_endian,
+                                              write|BKEY_INVALID_JOURNAL);
                if (ret == FSCK_DELETED_KEY)
                        continue;
 
@@ -746,14 +746,12 @@ static int jset_validate(struct bch_fs *c,
                return JOURNAL_ENTRY_NONE;
 
        version = le32_to_cpu(jset->version);
-       if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
-                                 version < bcachefs_metadata_version_min) ||
-                                version >= bcachefs_metadata_version_max,
-                                c, jset, NULL,
-                       "%s sector %llu seq %llu: unknown journal entry version %u",
+       if (journal_entry_err_on(!bch2_version_compatible(version), c, jset, NULL,
+                       "%s sector %llu seq %llu: incompatible journal entry version %u.%u",
                        ca ? ca->name : c->name,
                        sector, le64_to_cpu(jset->seq),
-                       version)) {
+                       BCH_VERSION_MAJOR(version),
+                       BCH_VERSION_MINOR(version))) {
                /* don't try to continue: */
                return -EINVAL;
        }
@@ -797,14 +795,12 @@ static int jset_validate_early(struct bch_fs *c,
                return JOURNAL_ENTRY_NONE;
 
        version = le32_to_cpu(jset->version);
-       if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
-                                 version < bcachefs_metadata_version_min) ||
-                                version >= bcachefs_metadata_version_max,
-                                c, jset, NULL,
-                       "%s sector %llu seq %llu: unknown journal entry version %u",
+       if (journal_entry_err_on(!bch2_version_compatible(version), c, jset, NULL,
+                       "%s sector %llu seq %llu: unknown journal entry version %u.%u",
                        ca ? ca->name : c->name,
                        sector, le64_to_cpu(jset->seq),
-                       version)) {
+                       BCH_VERSION_MAJOR(version),
+                       BCH_VERSION_MINOR(version))) {
                /* don't try to continue: */
                return -EINVAL;
        }
@@ -1439,7 +1435,7 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
        if (buf->buf_size >= new_size)
                return;
 
-       new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
+       new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
        if (!new_buf)
                return;
 
@@ -1463,7 +1459,6 @@ static void journal_write_done(struct closure *cl)
        struct journal *j = container_of(cl, struct journal, io);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct journal_buf *w = journal_last_unwritten_buf(j);
-       struct bch_replicas_padded replicas;
        union journal_res_state old, new;
        u64 v, seq;
        int err = 0;
@@ -1475,13 +1470,7 @@ static void journal_write_done(struct closure *cl)
        if (!w->devs_written.nr) {
                bch_err(c, "unable to write journal to sufficient devices");
                err = -EIO;
-       } else {
-               bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
-                                        w->devs_written);
-               if (bch2_mark_replicas(c, &replicas.e))
-                       err = -EIO;
        }
-
        if (err)
                bch2_fatal_error(c);
 
@@ -1513,7 +1502,7 @@ static void journal_write_done(struct closure *cl)
         * Must come before signaling write completion, for
         * bch2_fs_journal_stop():
         */
-       if (j->watermark)
+       if (j->watermark != BCH_WATERMARK_stripe)
                journal_reclaim_kick(&c->journal);
 
        /* also must come before signalling write completion: */
@@ -1673,6 +1662,7 @@ void bch2_journal_write(struct closure *cl)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        struct journal_buf *w = journal_last_unwritten_buf(j);
+       struct bch_replicas_padded replicas;
        struct jset_entry *start, *end;
        struct jset *jset;
        struct bio *bio;
@@ -1750,12 +1740,19 @@ void bch2_journal_write(struct closure *cl)
        BUG_ON(u64s > j->entry_u64s_reserved);
 
        le32_add_cpu(&jset->u64s, u64s);
-       BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
+
+       sectors = vstruct_sectors(jset, c->block_bits);
+       bytes   = vstruct_bytes(jset);
+
+       if (sectors > w->sectors) {
+               bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
+                                   vstruct_bytes(jset), w->sectors << 9,
+                                   u64s, w->u64s_reserved, j->entry_u64s_reserved);
+               goto err;
+       }
 
        jset->magic             = cpu_to_le64(jset_magic(c));
-       jset->version           = c->sb.version < bcachefs_metadata_version_bkey_renumber
-               ? cpu_to_le32(BCH_JSET_VERSION_OLD)
-               : cpu_to_le32(c->sb.version);
+       jset->version           = cpu_to_le32(c->sb.version);
 
        SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
        SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
@@ -1787,10 +1784,6 @@ void bch2_journal_write(struct closure *cl)
            jset_validate(c, NULL, jset, 0, WRITE))
                goto err;
 
-       sectors = vstruct_sectors(jset, c->block_bits);
-       BUG_ON(sectors > w->sectors);
-
-       bytes = vstruct_bytes(jset);
        memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
 
 retry_alloc:
@@ -1823,9 +1816,7 @@ retry_alloc:
                bch_err(c, "Unable to allocate journal write:\n%s",
                        journal_debug_buf.buf);
                printbuf_exit(&journal_debug_buf);
-               bch2_fatal_error(c);
-               continue_at(cl, journal_write_done, c->io_complete_wq);
-               return;
+               goto err;
        }
 
        w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
@@ -1839,6 +1830,16 @@ retry_alloc:
        if (nr_rw_members > 1)
                w->separate_flush = true;
 
+       /*
+        * Mark journal replicas before we submit the write to guarantee
+        * recovery will find the journal entries after a crash.
+        */
+       bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+                                w->devs_written);
+       ret = bch2_mark_replicas(c, &replicas.e);
+       if (ret)
+               goto err;
+
        if (!JSET_NO_FLUSH(jset) && w->separate_flush) {
                for_each_rw_member(ca, c, i) {
                        percpu_ref_get(&ca->io_ref);