]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to cbb2e45634dd bcachefs: fix simulateously upgrading & downg...
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 5 Jan 2024 19:18:29 +0000 (14:18 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 6 Jan 2024 01:00:08 +0000 (20:00 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
.bcachefs_revision
libbcachefs/bset.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/checksum.h
libbcachefs/fs-io.c
libbcachefs/io_read.c
libbcachefs/journal_io.c
libbcachefs/recovery.c
libbcachefs/super-io.c

index bb0353efe1bc9a731693efeff791c954a996541c..236a97d5640e9308930514ccc2b8a49fc83d86e7 100644 (file)
@@ -1 +1 @@
-d267e10a43b2e9ab37da6c9c991ca021142f6324
+cbb2e45634dd3b6ae38e45856ab6215c687a8806
index bb73ba9017b006e7fe181e19b7cccfe8494c1339..74bf8eb90a4c42cd24dc61024ecb448740e271a7 100644 (file)
@@ -68,6 +68,12 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b,
             _k = _n) {
                _n = bkey_p_next(_k);
 
+               if (!_k->u64s) {
+                       printk(KERN_ERR "block %u key %5zu - u64s 0? aieee!\n", set,
+                              _k->_data - i->_data);
+                       break;
+               }
+
                k = bkey_disassemble(b, _k, &uk);
 
                printbuf_reset(&buf);
index 378579bbe2ede6da15591b89a3b2a08c9f9fd37e..33db48e2153fef61f0c733f97278018f419c2b05 100644 (file)
@@ -524,7 +524,8 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
        prt_printf(out, "at btree ");
        bch2_btree_pos_to_text(out, c, b);
 
-       prt_printf(out, "\n  node offset %u", b->written);
+       prt_printf(out, "\n  node offset %u/%u",
+                  b->written, btree_ptr_sectors_written(&b->key));
        if (i)
                prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
        prt_str(out, ": ");
@@ -830,6 +831,23 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
                (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
 }
 
+static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+                        struct bset *i, struct bkey_packed *k)
+{
+       if (bkey_p_next(k) > vstruct_last(i))
+               return false;
+
+       if (k->format > KEY_FORMAT_CURRENT)
+               return false;
+
+       struct printbuf buf = PRINTBUF;
+       struct bkey tmp;
+       struct bkey_s u = __bkey_disassemble(b, k, &tmp);
+       bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf);
+       printbuf_exit(&buf);
+       return ret;
+}
+
 static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                         struct bset *i, int write,
                         bool have_retry, bool *saw_error)
@@ -845,6 +863,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
             k != vstruct_last(i);) {
                struct bkey_s u;
                struct bkey tmp;
+               unsigned next_good_key;
 
                if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
                                 -BCH_ERR_btree_node_read_err_fixable,
@@ -859,12 +878,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                                 -BCH_ERR_btree_node_read_err_fixable,
                                 c, NULL, b, i,
                                 btree_node_bkey_bad_format,
-                                "invalid bkey format %u", k->format)) {
-                       i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-                       memmove_u64s_down(k, bkey_p_next(k),
-                                         (u64 *) vstruct_end(i) - (u64 *) k);
-                       continue;
-               }
+                                "invalid bkey format %u", k->format))
+                       goto drop_this_key;
 
                /* XXX: validate k->u64s */
                if (!write)
@@ -885,11 +900,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                                  c, NULL, b, i,
                                  btree_node_bad_bkey,
                                  "invalid bkey: %s", buf.buf);
-
-                       i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-                       memmove_u64s_down(k, bkey_p_next(k),
-                                         (u64 *) vstruct_end(i) - (u64 *) k);
-                       continue;
+                       goto drop_this_key;
                }
 
                if (write)
@@ -906,21 +917,45 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                        prt_printf(&buf, " > ");
                        bch2_bkey_to_text(&buf, u.k);
 
-                       bch2_dump_bset(c, b, i, 0);
-
                        if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
                                      c, NULL, b, i,
                                      btree_node_bkey_out_of_order,
-                                     "%s", buf.buf)) {
-                               i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-                               memmove_u64s_down(k, bkey_p_next(k),
-                                                 (u64 *) vstruct_end(i) - (u64 *) k);
-                               continue;
-                       }
+                                     "%s", buf.buf))
+                               goto drop_this_key;
                }
 
                prev = k;
                k = bkey_p_next(k);
+               continue;
+drop_this_key:
+               next_good_key = k->u64s;
+
+               if (!next_good_key ||
+                   (BSET_BIG_ENDIAN(i) == CPU_BIG_ENDIAN &&
+                    version >= bcachefs_metadata_version_snapshot)) {
+                       /*
+                        * only do scanning if bch2_bkey_compat() has nothing to
+                        * do
+                        */
+
+                       if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+                               for (next_good_key = 1;
+                                    next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
+                                    next_good_key++)
+                                       if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+                                               goto got_good_key;
+
+                       }
+
+                       /*
+                        * didn't find a good key, have to truncate the rest of
+                        * the bset
+                        */
+                       next_good_key = (u64 *) vstruct_last(i) - (u64 *) k;
+               }
+got_good_key:
+               le16_add_cpu(&i->u64s, -next_good_key);
+               memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
        }
 fsck_err:
        printbuf_exit(&buf);
@@ -1007,8 +1042,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
                        nonce = btree_nonce(i, b->written << 9);
 
-                       csum_bad = bch2_crc_cmp(b->data->csum,
-                               csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
+                       struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+                       csum_bad = bch2_crc_cmp(b->data->csum, csum);
                        if (csum_bad)
                                bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
 
@@ -1016,7 +1051,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                                     -BCH_ERR_btree_node_read_err_want_retry,
                                     c, ca, b, i,
                                     bset_bad_csum,
-                                    "invalid checksum");
+                                    "%s",
+                                    (printbuf_reset(&buf),
+                                     bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
+                                     buf.buf));
 
                        ret = bset_encrypt(c, i, b->written << 9);
                        if (bch2_fs_fatal_err_on(ret, c,
@@ -1045,8 +1083,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                                     "unknown checksum type %llu", BSET_CSUM_TYPE(i));
 
                        nonce = btree_nonce(i, b->written << 9);
-                       csum_bad = bch2_crc_cmp(bne->csum,
-                               csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
+                       struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+                       csum_bad = bch2_crc_cmp(bne->csum, csum);
                        if (csum_bad)
                                bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
 
@@ -1054,7 +1092,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                                     -BCH_ERR_btree_node_read_err_want_retry,
                                     c, ca, b, i,
                                     bset_bad_csum,
-                                    "invalid checksum");
+                                    "%s",
+                                    (printbuf_reset(&buf),
+                                     bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
+                                     buf.buf));
 
                        ret = bset_encrypt(c, i, b->written << 9);
                        if (bch2_fs_fatal_err_on(ret, c,
index 6e8e9ba5805d2239ad74ddf6cdfb2359ff5be33d..fa298289e01656b989db38dcf19301ae4d880bb7 100644 (file)
@@ -2818,34 +2818,11 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        return p;
 }
 
-#include "sb-members.h"
-
 static inline void check_srcu_held_too_long(struct btree_trans *trans)
 {
-       if (trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10)) {
-               struct printbuf buf = PRINTBUF;
-
-               prt_str(&buf, "btree node read time:\n");
-               bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read]);
-
-               prt_str(&buf, "btree node read_done time:\n");
-               bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read_done]);
-
-               for_each_member_device(trans->c, ca) {
-                       prt_printf(&buf, "device %u read time:\n", ca->dev_idx);
-                       bch2_time_stats_to_text(&buf, &ca->io_latency[READ]);
-               }
-
-               struct btree_transaction_stats *s = btree_trans_stats(trans);
-               prt_str(&buf, "transaction duration:\n");
-               bch2_time_stats_to_text(&buf, &s->duration);
-
-               WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
-                    "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
-                    (jiffies - trans->srcu_lock_time) / HZ);
-               bch2_print_string_as_lines(KERN_ERR, buf.buf);
-               printbuf_exit(&buf);
-       }
+       WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+            "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
+            (jiffies - trans->srcu_lock_time) / HZ);
 }
 
 void bch2_trans_srcu_unlock(struct btree_trans *trans)
index 13998388c545c476545b1e6cd418306f67dcf90e..1b8c2c1016dc6347ce12ef3161d4723835dfa56e 100644 (file)
@@ -45,6 +45,29 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
        bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
 })
 
+static inline void bch2_csum_to_text(struct printbuf *out,
+                                    enum bch_csum_type type,
+                                    struct bch_csum csum)
+{
+       const u8 *p = (u8 *) &csum;
+       unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16;
+
+       for (unsigned i = 0; i < bytes; i++)
+               prt_hex_byte(out, p[i]);
+}
+
+static inline void bch2_csum_err_msg(struct printbuf *out,
+                                    enum bch_csum_type type,
+                                    struct bch_csum expected,
+                                    struct bch_csum got)
+{
+       prt_printf(out, "checksum error: got ");
+       bch2_csum_to_text(out, type, got);
+       prt_str(out, " should be ");
+       bch2_csum_to_text(out, type, expected);
+       prt_printf(out, " type %s", bch2_csum_types[type]);
+}
+
 int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
 int bch2_request_key(struct bch_sb *, struct bch_key *);
 #ifndef __KERNEL__
index 9fea89762df01dd64b6d8e6463b237d34a106c3a..98bd5babab193bec842dce20b0783e6c958ac5bf 100644 (file)
@@ -194,16 +194,6 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        int ret;
 
-       /*
-        * check if unlinked, disable/defer until relink
-        */
-
-       /*
-        * also: add a mode where a file is a tmpfile until fully,
-        * asynchronously written
-        */
-
-
        ret = file_write_and_wait_range(file, start, end);
        if (ret)
                goto out;
index 5c2d118eaf6feeba1c759596b76f1d47acb4b419..3c574d8873a1e209dc7f7f48faacf9928f8a1272 100644 (file)
@@ -642,12 +642,17 @@ csum_err:
                goto out;
        }
 
+       struct printbuf buf = PRINTBUF;
+       buf.atomic++;
+       prt_str(&buf, "data ");
+       bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
+
        bch_err_inum_offset_ratelimited(ca,
                rbio->read_pos.inode,
                rbio->read_pos.offset << 9,
-               "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
-               rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
-               csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
+               "data %s", buf.buf);
+       printbuf_exit(&buf);
+
        bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
        bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
        goto out;
index c5bc58247146a2cdbc1eecb987db7ba667e9677f..b0f4dd491e1205d28c6af528fb59696cdbc4dc9c 100644 (file)
@@ -27,11 +27,15 @@ static struct nonce journal_nonce(const struct jset *jset)
        }};
 }
 
-static bool jset_csum_good(struct bch_fs *c, struct jset *j)
+static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum)
 {
-       return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) &&
-               !bch2_crc_cmp(j->csum,
-                             csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j));
+       if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) {
+               *csum = (struct bch_csum) {};
+               return false;
+       }
+
+       *csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
+       return !bch2_crc_cmp(j->csum, *csum);
 }
 
 static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
@@ -934,6 +938,7 @@ static int journal_read_bucket(struct bch_dev *ca,
        u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
            end = offset + ca->mi.bucket_size;
        bool saw_bad = false, csum_good;
+       struct printbuf err = PRINTBUF;
        int ret = 0;
 
        pr_debug("reading %u", bucket);
@@ -966,7 +971,7 @@ reread:
                                 * found on a different device, and missing or
                                 * no journal entries will be handled later
                                 */
-                               return 0;
+                               goto out;
                        }
 
                        j = buf->data;
@@ -983,12 +988,12 @@ reread:
                                ret = journal_read_buf_realloc(buf,
                                                        vstruct_bytes(j));
                                if (ret)
-                                       return ret;
+                                       goto err;
                        }
                        goto reread;
                case JOURNAL_ENTRY_NONE:
                        if (!saw_bad)
-                               return 0;
+                               goto out;
                        /*
                         * On checksum error we don't really trust the size
                         * field of the journal entry we read, so try reading
@@ -997,7 +1002,7 @@ reread:
                        sectors = block_sectors(c);
                        goto next_block;
                default:
-                       return ret;
+                       goto err;
                }
 
                /*
@@ -1007,20 +1012,28 @@ reread:
                 * bucket:
                 */
                if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
-                       return 0;
+                       goto out;
 
                ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
 
-               csum_good = jset_csum_good(c, j);
+               enum bch_csum_type csum_type = JSET_CSUM_TYPE(j);
+               struct bch_csum csum;
+               csum_good = jset_csum_good(c, j, &csum);
+
                if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
-                                      "journal checksum error"))
+                                      "%s",
+                                      (printbuf_reset(&err),
+                                       prt_str(&err, "journal "),
+                                       bch2_csum_err_msg(&err, csum_type, j->csum, csum),
+                                       err.buf)))
                        saw_bad = true;
 
                ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
                             j->encrypted_start,
                             vstruct_end(j) - (void *) j->encrypted_start);
                bch2_fs_fatal_err_on(ret, c,
-                               "error decrypting journal entry: %i", ret);
+                               "error decrypting journal entry: %s",
+                               bch2_err_str(ret));
 
                mutex_lock(&jlist->lock);
                ret = journal_entry_add(c, ca, (struct journal_ptr) {
@@ -1039,7 +1052,7 @@ reread:
                case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
                        break;
                default:
-                       return ret;
+                       goto err;
                }
 next_block:
                pr_debug("next");
@@ -1048,7 +1061,11 @@ next_block:
                j = ((void *) j) + (sectors << 9);
        }
 
-       return 0;
+out:
+       ret = 0;
+err:
+       printbuf_exit(&err);
+       return ret;
 }
 
 static CLOSURE_CALLBACK(bch2_journal_read_device)
index e1f0da6a717e021b894ba39932275e8b2ed0b323..725214605a050996196c28a9132f8fe247e76d28 100644 (file)
@@ -696,8 +696,11 @@ static int bch2_run_recovery_passes(struct bch_fs *c)
 
        while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
                if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
+                       unsigned pass = c->curr_recovery_pass;
+
                        ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
-                       if (bch2_err_matches(ret, BCH_ERR_restart_recovery))
+                       if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
+                           (ret && c->curr_recovery_pass < pass))
                                continue;
                        if (ret)
                                break;
index ea86921727b4e5abd34540c028979b76dda99434..55926b81eede63596f7a48f6daaa363bede4d504 100644 (file)
@@ -612,7 +612,6 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
 
 static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
 {
-       struct bch_csum csum;
        size_t bytes;
        int ret;
 reread:
@@ -628,7 +627,9 @@ reread:
 
        if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) &&
            !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) {
-               prt_printf(err, "Not a bcachefs superblock");
+               prt_str(err, "Not a bcachefs superblock (got magic ");
+               pr_uuid(err, sb->sb->magic.b);
+               prt_str(err, ")");
                return -BCH_ERR_invalid_sb_magic;
        }
 
@@ -651,17 +652,16 @@ reread:
                goto reread;
        }
 
-       if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) {
+       enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
+       if (csum_type >= BCH_CSUM_NR) {
                prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
                return -BCH_ERR_invalid_sb_csum_type;
        }
 
        /* XXX: verify MACs */
-       csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
-                           null_nonce(), sb->sb);
-
+       struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb);
        if (bch2_crc_cmp(csum, sb->sb->csum)) {
-               prt_printf(err, "bad checksum");
+               bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum);
                return -BCH_ERR_invalid_sb_csum;
        }
 
@@ -1088,13 +1088,22 @@ bool bch2_check_version_downgrade(struct bch_fs *c)
        /*
         * Downgrade, if superblock is at a higher version than currently
         * supported:
+        *
+        * c->sb will be checked before we write the superblock, so update it as
+        * well:
         */
-       if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
+       if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) {
                SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
-       if (c->sb.version > bcachefs_metadata_version_current)
+               c->sb.version_upgrade_complete = bcachefs_metadata_version_current;
+       }
+       if (c->sb.version > bcachefs_metadata_version_current) {
                c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
-       if (c->sb.version_min > bcachefs_metadata_version_current)
+               c->sb.version = bcachefs_metadata_version_current;
+       }
+       if (c->sb.version_min > bcachefs_metadata_version_current) {
                c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
+               c->sb.version_min = bcachefs_metadata_version_current;
+       }
        c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
        return ret;
 }
@@ -1261,6 +1270,11 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
        pr_uuid(out, sb->uuid.b);
        prt_newline(out);
 
+       prt_printf(out, "Magic number:");
+       prt_tab(out);
+       pr_uuid(out, sb->magic.b);
+       prt_newline(out);
+
        prt_str(out, "Device index:");
        prt_tab(out);
        prt_printf(out, "%u", sb->dev_idx);