Update bcachefs sources to be2d60d948 bcachefs: New magic number

author Kent Overstreet <kent.overstreet@linux.dev>

Sat, 3 Dec 2022 00:47:25 +0000 (19:47 -0500)

committer Kent Overstreet <kent.overstreet@linux.dev>

Sat, 3 Dec 2022 00:48:23 +0000 (19:48 -0500)
author Kent Overstreet <kent.overstreet@linux.dev>
Sat, 3 Dec 2022 00:47:25 +0000 (19:47 -0500)
committer Kent Overstreet <kent.overstreet@linux.dev>
Sat, 3 Dec 2022 00:48:23 +0000 (19:48 -0500)
diff --git a/.bcachefs_revision b/.bcachefs_revision

index af8b3edad30c512a8ab5bf3fa26df363b815f110..2aea1a7dbdbfe0b614f6906badec0be40619c935 100644 (file)
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-f1c9030ccbf6d7b5c46f08f92ee878bfc9f6ee6b
+be2d60d9484734b4c619ac0ddf54b3103210c9c0
diff --git a/libbcachefs.c b/libbcachefs.c

index 4fe2c3db401a41afb2f79319986bb4819e31d310..092a54a67ef8ffb82b0807560f8e39dedc0017af 100644 (file)
--- a/libbcachefs.c
+++ b/libbcachefs.c
@@ -39,7 +39,7 @@ static void init_layout(struct bch_sb_layout *l,
  
         memset(l, 0, sizeof(*l));
  
-       l->magic                = BCACHE_MAGIC;
+       l->magic                = BCHFS_MAGIC;
         l->layout_type          = 0;
         l->nr_superblocks       = 2;
         l->sb_max_size_bits     = ilog2(sb_size);
@@ -188,7 +188,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs      fs_opt_strs,
  
         sb.sb->version          = le16_to_cpu(opts.version);
         sb.sb->version_min      = le16_to_cpu(opts.version);
-       sb.sb->magic            = BCACHE_MAGIC;
+       sb.sb->magic            = BCHFS_MAGIC;
         sb.sb->user_uuid        = opts.uuid;
         sb.sb->nr_devices       = nr_devs;
  
@@ -353,7 +353,8 @@ struct bch_sb *__bch2_super_read(int fd, u64 sector)
  
         xpread(fd, &sb, sizeof(sb), sector << 9);
  
-       if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
+       if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)) &&
+           memcmp(&sb.magic, &BCHFS_MAGIC, sizeof(sb.magic)))
                 die("not a bcachefs superblock");
  
         size_t bytes = vstruct_bytes(&sb);
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h

index 0aa522b7f0f29a210c8a0a7b52691726cbc16955..f29e3e7e95258b2d1639491372f258edbfa3c204 100644 (file)
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -1578,7 +1578,7 @@ struct bch_sb_layout {
   * @version_min        - Oldest metadata version this filesystem contains; so we can
   *               safely drop compatibility code and refuse to mount filesystems
   *               we'd need it for
- * @magic      - identifies as a bcachefs superblock (BCACHE_MAGIC)
+ * @magic      - identifies as a bcachefs superblock (BCHFS_MAGIC)
   * @seq                - incremented each time superblock is written
   * @uuid       - used for generating various magic numbers and identifying
   *                member devices, never changes
@@ -1894,6 +1894,9 @@ enum bch_compression_opts {
  #define BCACHE_MAGIC                                                   \
         UUID_LE(0xf67385c6, 0x1a4e, 0xca45,                             \
                 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
+#define BCHFS_MAGIC                                                    \
+       UUID_LE(0xf67385c6, 0xce66, 0xa990,                             \
+               0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
  
  #define BCACHEFS_STATFS_MAGIC          0xca451a4e
  
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c

index 0d280e60e1e382cfe5cbe7819c196dcfc29413c8..f9ccc216216c3108b8b99f7001b398d1820aa356 100644 (file)
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -1249,7 +1249,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
                    struct btree_path *path, struct bpos new_pos,
                    bool intent, unsigned long ip, int cmp)
  {
-       unsigned l = path->level;
+       unsigned level = path->level;
  
         EBUG_ON(trans->restarted);
         EBUG_ON(!path->ref);
@@ -1267,10 +1267,12 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
                 goto out;
         }
  
-       l = btree_path_up_until_good_node(trans, path, cmp);
+       level = btree_path_up_until_good_node(trans, path, cmp);
  
-       if (btree_path_node(path, l)) {
-               BUG_ON(!btree_node_locked(path, l));
+       if (btree_path_node(path, level)) {
+               struct btree_path_level *l = &path->l[level];
+
+               BUG_ON(!btree_node_locked(path, level));
                 /*
                  * We might have to skip over many keys, or just a few: try
                  * advancing the node iterator, and if we have to skip over too
@@ -1278,11 +1280,18 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
                  * is expensive).
                  */
                 if (cmp < 0 ||
-                   !btree_path_advance_to_pos(path, &path->l[l], 8))
-                       __btree_path_level_init(path, l);
+                   !btree_path_advance_to_pos(path, l, 8))
+                       bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
+
+               /*
+                * Iterators to interior nodes should always be pointed at the first non
+                * whiteout:
+                */
+               if (unlikely(level))
+                       bch2_btree_node_iter_peek(&l->iter, l->b);
         }
  
-       if (unlikely(l != path->level)) {
+       if (unlikely(level != path->level)) {
                 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
                 __bch2_btree_path_unlock(trans, path);
         }
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c

index 179361bea0e3c0e7718afc981830a4f42b24632c..2b1974a9f360e943e59a408ccbbcb9cc2c262f4b 100644 (file)
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -987,7 +987,6 @@ static void bch2_journal_read_device(struct closure *cl)
         struct journal_replay *r, **_r;
         struct genradix_iter iter;
         struct journal_read_buf buf = { NULL, 0 };
-       u64 min_seq = U64_MAX;
         unsigned i;
         int ret = 0;
  
@@ -1006,45 +1005,27 @@ static void bch2_journal_read_device(struct closure *cl)
                         goto err;
         }
  
-       /* Find the journal bucket with the highest sequence number: */
-       for (i = 0; i < ja->nr; i++) {
-               if (ja->bucket_seq[i] > ja->bucket_seq[ja->cur_idx])
-                       ja->cur_idx = i;
-
-               min_seq = min(ja->bucket_seq[i], min_seq);
-       }
-
-       /*
-        * If there's duplicate journal entries in multiple buckets (which
-        * definitely isn't supposed to happen, but...) - make sure to start
-        * cur_idx at the last of those buckets, so we don't deadlock trying to
-        * allocate
-        */
-       while (ja->bucket_seq[ja->cur_idx] > min_seq &&
-              ja->bucket_seq[ja->cur_idx] ==
-              ja->bucket_seq[(ja->cur_idx + 1) % ja->nr])
-               ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-
         ja->sectors_free = ca->mi.bucket_size;
  
         mutex_lock(&jlist->lock);
-       genradix_for_each(&c->journal_entries, iter, _r) {
+       genradix_for_each_reverse(&c->journal_entries, iter, _r) {
                 r = *_r;
  
                 if (!r)
                         continue;
  
                 for (i = 0; i < r->nr_ptrs; i++) {
-                       if (r->ptrs[i].dev == ca->dev_idx &&
-                           sector_to_bucket(ca, r->ptrs[i].sector) == ja->buckets[ja->cur_idx]) {
+                       if (r->ptrs[i].dev == ca->dev_idx) {
                                 unsigned wrote = bucket_remainder(ca, r->ptrs[i].sector) +
                                         vstruct_sectors(&r->j, c->block_bits);
  
-                               ja->sectors_free = min(ja->sectors_free,
-                                                      ca->mi.bucket_size - wrote);
+                               ja->cur_idx = r->ptrs[i].bucket;
+                               ja->sectors_free = ca->mi.bucket_size - wrote;
+                               goto found;
                         }
                 }
         }
+found:
         mutex_unlock(&jlist->lock);
  
         if (ja->bucket_seq[ja->cur_idx] &&
@@ -1660,20 +1641,42 @@ void bch2_journal_write(struct closure *cl)
         j->write_start_time = local_clock();
  
         spin_lock(&j->lock);
-       if (bch2_journal_error(j) ||
-           w->noflush ||
-           (!w->must_flush &&
-            (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
-            test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
+
+       /*
+        * If the journal is in an error state - we did an emergency shutdown -
+        * we prefer to continue doing journal writes. We just mark them as
+        * noflush so they'll never be used, but they'll still be visible by the
+        * list_journal tool - this helps in debugging.
+        *
+        * There's a caveat: the first journal write after marking the
+        * superblock dirty must always be a flush write, because on startup
+        * from a clean shutdown we didn't necessarily read the journal and the
+        * new journal write might overwrite whatever was in the journal
+        * previously - we can't leave the journal without any flush writes in
+        * it.
+        *
+        * So if we're in an error state, and we're still starting up, we don't
+        * write anything at all.
+        */
+       if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) &&
+           (bch2_journal_error(j) ||
+            w->noflush ||
+            (!w->must_flush &&
+             (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
+             test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) {
                 w->noflush = true;
                 SET_JSET_NO_FLUSH(jset, true);
                 jset->last_seq  = 0;
                 w->last_seq     = 0;
  
                 j->nr_noflush_writes++;
-       } else {
+       } else if (!bch2_journal_error(j)) {
                 j->last_flush_write = jiffies;
                 j->nr_flush_writes++;
+               clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
+       } else {
+               spin_unlock(&j->lock);
+               goto err;
         }
         spin_unlock(&j->lock);
  
diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h

index a6cdb885ad41077db05e74a837225a0e2158967b..045ee95a92a874a09668e75a5cabae92f9ce27e4 100644 (file)
--- a/libbcachefs/journal_types.h
+++ b/libbcachefs/journal_types.h
@@ -141,10 +141,11 @@ enum journal_space_from {
         journal_space_nr,
  };
  
-enum {
+enum journal_flags {
         JOURNAL_REPLAY_DONE,
         JOURNAL_STARTED,
         JOURNAL_MAY_SKIP_FLUSH,
+       JOURNAL_NEED_FLUSH_WRITE,
  };
  
  #define JOURNAL_WATERMARKS()           \
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c

index 6d99a5814392ed366fea6dcef4fb7313c395c301..0aa243f5fae6b7bb71331de703f71dbd03c7fe45 100644 (file)
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -211,7 +211,8 @@ static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out
         u64 offset, prev_offset, max_sectors;
         unsigned i;
  
-       if (uuid_le_cmp(layout->magic, BCACHE_MAGIC)) {
+       if (uuid_le_cmp(layout->magic, BCACHE_MAGIC) &&
+           uuid_le_cmp(layout->magic, BCHFS_MAGIC)) {
                 prt_printf(out, "Not a bcachefs superblock layout");
                 return -BCH_ERR_invalid_sb_layout;
         }
@@ -538,7 +539,8 @@ reread:
                 return ret;
         }
  
-       if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC)) {
+       if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC) &&
+           uuid_le_cmp(sb->sb->magic, BCHFS_MAGIC)) {
                 prt_printf(err, "Not a bcachefs superblock");
                 return -BCH_ERR_invalid_sb_magic;
         }
diff --git a/libbcachefs/super.c b/libbcachefs/super.c

index 47ca21536485422a5a930ee521f4a886e25990ab..7cac0567037d3ea9613d4bae2ff793b94e2fe0c3 100644 (file)
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -367,6 +367,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
  
         clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
  
+       /*
+        * First journal write must be a flush write: after a clean shutdown we
+        * don't read the journal, so the first journal write may end up
+        * overwriting whatever was there previously, and there must always be
+        * at least one non-flush write in the journal or recovery will fail:
+        */
+       set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags);
+
         for_each_rw_member(ca, c, i)
                 bch2_dev_allocator_add(c, ca);
         bch2_recalc_capacity(c);
author	Kent Overstreet <kent.overstreet@linux.dev>
	Sat, 3 Dec 2022 00:47:25 +0000 (19:47 -0500)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Sat, 3 Dec 2022 00:48:23 +0000 (19:48 -0500)
.bcachefs_revision		patch \| blob \| history
libbcachefs.c		patch \| blob \| history
libbcachefs/bcachefs_format.h		patch \| blob \| history
libbcachefs/btree_iter.c		patch \| blob \| history
libbcachefs/journal_io.c		patch \| blob \| history
libbcachefs/journal_types.h		patch \| blob \| history
libbcachefs/super-io.c		patch \| blob \| history
libbcachefs/super.c		patch \| blob \| history