-d267e10a43b2e9ab37da6c9c991ca021142f6324
+cbb2e45634dd3b6ae38e45856ab6215c687a8806
_k = _n) {
_n = bkey_p_next(_k);
+ if (!_k->u64s) {
+ printk(KERN_ERR "block %u key %5zu - u64s 0? aieee!\n", set,
+ _k->_data - i->_data);
+ break;
+ }
+
k = bkey_disassemble(b, _k, &uk);
printbuf_reset(&buf);
prt_printf(out, "at btree ");
bch2_btree_pos_to_text(out, c, b);
- prt_printf(out, "\n node offset %u", b->written);
+ prt_printf(out, "\n node offset %u/%u",
+ b->written, btree_ptr_sectors_written(&b->key));
if (i)
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
prt_str(out, ": ");
(rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
}
+static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+ struct bset *i, struct bkey_packed *k)
+{
+ if (bkey_p_next(k) > vstruct_last(i))
+ return false;
+
+ if (k->format > KEY_FORMAT_CURRENT)
+ return false;
+
+ struct printbuf buf = PRINTBUF;
+ struct bkey tmp;
+ struct bkey_s u = __bkey_disassemble(b, k, &tmp);
+ bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
struct bset *i, int write,
bool have_retry, bool *saw_error)
k != vstruct_last(i);) {
struct bkey_s u;
struct bkey tmp;
+ unsigned next_good_key;
if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
-BCH_ERR_btree_node_read_err_fixable,
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i,
btree_node_bkey_bad_format,
- "invalid bkey format %u", k->format)) {
- i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
- memmove_u64s_down(k, bkey_p_next(k),
- (u64 *) vstruct_end(i) - (u64 *) k);
- continue;
- }
+ "invalid bkey format %u", k->format))
+ goto drop_this_key;
/* XXX: validate k->u64s */
if (!write)
c, NULL, b, i,
btree_node_bad_bkey,
"invalid bkey: %s", buf.buf);
-
- i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
- memmove_u64s_down(k, bkey_p_next(k),
- (u64 *) vstruct_end(i) - (u64 *) k);
- continue;
+ goto drop_this_key;
}
if (write)
prt_printf(&buf, " > ");
bch2_bkey_to_text(&buf, u.k);
- bch2_dump_bset(c, b, i, 0);
-
if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i,
btree_node_bkey_out_of_order,
- "%s", buf.buf)) {
- i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
- memmove_u64s_down(k, bkey_p_next(k),
- (u64 *) vstruct_end(i) - (u64 *) k);
- continue;
- }
+ "%s", buf.buf))
+ goto drop_this_key;
}
prev = k;
k = bkey_p_next(k);
+ continue;
+drop_this_key:
+ next_good_key = k->u64s;
+
+ if (!next_good_key ||
+ (BSET_BIG_ENDIAN(i) == CPU_BIG_ENDIAN &&
+ version >= bcachefs_metadata_version_snapshot)) {
+ /*
+ * only do scanning if bch2_bkey_compat() has nothing to
+ * do
+ */
+
+ if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+ for (next_good_key = 1;
+ next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
+ next_good_key++)
+ if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+ goto got_good_key;
+
+ }
+
+ /*
+ * didn't find a good key, have to truncate the rest of
+ * the bset
+ */
+ next_good_key = (u64 *) vstruct_last(i) - (u64 *) k;
+ }
+got_good_key:
+ le16_add_cpu(&i->u64s, -next_good_key);
+ memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
}
fsck_err:
printbuf_exit(&buf);
nonce = btree_nonce(i, b->written << 9);
- csum_bad = bch2_crc_cmp(b->data->csum,
- csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+ csum_bad = bch2_crc_cmp(b->data->csum, csum);
if (csum_bad)
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
-BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i,
bset_bad_csum,
- "invalid checksum");
+ "%s",
+ (printbuf_reset(&buf),
+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
+ buf.buf));
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
nonce = btree_nonce(i, b->written << 9);
- csum_bad = bch2_crc_cmp(bne->csum,
- csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+ csum_bad = bch2_crc_cmp(bne->csum, csum);
if (csum_bad)
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
-BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i,
bset_bad_csum,
- "invalid checksum");
+ "%s",
+ (printbuf_reset(&buf),
+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
+ buf.buf));
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
return p;
}
-#include "sb-members.h"
-
static inline void check_srcu_held_too_long(struct btree_trans *trans)
{
- if (trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10)) {
- struct printbuf buf = PRINTBUF;
-
- prt_str(&buf, "btree node read time:\n");
- bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read]);
-
- prt_str(&buf, "btree node read_done time:\n");
- bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read_done]);
-
- for_each_member_device(trans->c, ca) {
- prt_printf(&buf, "device %u read time:\n", ca->dev_idx);
- bch2_time_stats_to_text(&buf, &ca->io_latency[READ]);
- }
-
- struct btree_transaction_stats *s = btree_trans_stats(trans);
- prt_str(&buf, "transaction duration:\n");
- bch2_time_stats_to_text(&buf, &s->duration);
-
- WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
- "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
- (jiffies - trans->srcu_lock_time) / HZ);
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
- printbuf_exit(&buf);
- }
+ WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+ "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
+ (jiffies - trans->srcu_lock_time) / HZ);
}
void bch2_trans_srcu_unlock(struct btree_trans *trans)
bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
})
+static inline void bch2_csum_to_text(struct printbuf *out,
+ enum bch_csum_type type,
+ struct bch_csum csum)
+{
+ const u8 *p = (u8 *) &csum;
+ unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16;
+
+ for (unsigned i = 0; i < bytes; i++)
+ prt_hex_byte(out, p[i]);
+}
+
+static inline void bch2_csum_err_msg(struct printbuf *out,
+ enum bch_csum_type type,
+ struct bch_csum expected,
+ struct bch_csum got)
+{
+ prt_printf(out, "checksum error: got ");
+ bch2_csum_to_text(out, type, got);
+ prt_str(out, " should be ");
+ bch2_csum_to_text(out, type, expected);
+ prt_printf(out, " type %s", bch2_csum_types[type]);
+}
+
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
int bch2_request_key(struct bch_sb *, struct bch_key *);
#ifndef __KERNEL__
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret;
- /*
- * check if unlinked, disable/defer until relink
- */
-
- /*
- * also: add a mode where a file is a tmpfile until fully,
- * asynchronously written
- */
-
-
ret = file_write_and_wait_range(file, start, end);
if (ret)
goto out;
goto out;
}
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+ prt_str(&buf, "data ");
+ bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
+
bch_err_inum_offset_ratelimited(ca,
rbio->read_pos.inode,
rbio->read_pos.offset << 9,
- "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
- rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
- csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
+ "data %s", buf.buf);
+ printbuf_exit(&buf);
+
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
goto out;
}};
}
-static bool jset_csum_good(struct bch_fs *c, struct jset *j)
+static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum)
{
- return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) &&
- !bch2_crc_cmp(j->csum,
- csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j));
+ if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) {
+ *csum = (struct bch_csum) {};
+ return false;
+ }
+
+ *csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
+ return !bch2_crc_cmp(j->csum, *csum);
}
static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
end = offset + ca->mi.bucket_size;
bool saw_bad = false, csum_good;
+ struct printbuf err = PRINTBUF;
int ret = 0;
pr_debug("reading %u", bucket);
* found on a different device, and missing or
* no journal entries will be handled later
*/
- return 0;
+ goto out;
}
j = buf->data;
ret = journal_read_buf_realloc(buf,
vstruct_bytes(j));
if (ret)
- return ret;
+ goto err;
}
goto reread;
case JOURNAL_ENTRY_NONE:
if (!saw_bad)
- return 0;
+ goto out;
/*
* On checksum error we don't really trust the size
* field of the journal entry we read, so try reading
sectors = block_sectors(c);
goto next_block;
default:
- return ret;
+ goto err;
}
/*
* bucket:
*/
if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
- return 0;
+ goto out;
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
- csum_good = jset_csum_good(c, j);
+ enum bch_csum_type csum_type = JSET_CSUM_TYPE(j);
+ struct bch_csum csum;
+ csum_good = jset_csum_good(c, j, &csum);
+
if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
- "journal checksum error"))
+ "%s",
+ (printbuf_reset(&err),
+ prt_str(&err, "journal "),
+ bch2_csum_err_msg(&err, csum_type, j->csum, csum),
+ err.buf)))
saw_bad = true;
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
j->encrypted_start,
vstruct_end(j) - (void *) j->encrypted_start);
bch2_fs_fatal_err_on(ret, c,
- "error decrypting journal entry: %i", ret);
+ "error decrypting journal entry: %s",
+ bch2_err_str(ret));
mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, (struct journal_ptr) {
case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
break;
default:
- return ret;
+ goto err;
}
next_block:
pr_debug("next");
j = ((void *) j) + (sectors << 9);
}
- return 0;
+out:
+ ret = 0;
+err:
+ printbuf_exit(&err);
+ return ret;
}
static CLOSURE_CALLBACK(bch2_journal_read_device)
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
+ unsigned pass = c->curr_recovery_pass;
+
ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery))
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
+ (ret && c->curr_recovery_pass < pass))
continue;
if (ret)
break;
static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
{
- struct bch_csum csum;
size_t bytes;
int ret;
reread:
if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) &&
!uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) {
- prt_printf(err, "Not a bcachefs superblock");
+ prt_str(err, "Not a bcachefs superblock (got magic ");
+ pr_uuid(err, sb->sb->magic.b);
+ prt_str(err, ")");
return -BCH_ERR_invalid_sb_magic;
}
goto reread;
}
- if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) {
+ enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
+ if (csum_type >= BCH_CSUM_NR) {
prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
return -BCH_ERR_invalid_sb_csum_type;
}
/* XXX: verify MACs */
- csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
- null_nonce(), sb->sb);
-
+ struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb);
if (bch2_crc_cmp(csum, sb->sb->csum)) {
- prt_printf(err, "bad checksum");
+ bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum);
return -BCH_ERR_invalid_sb_csum;
}
/*
* Downgrade, if superblock is at a higher version than currently
* supported:
+ *
+ * c->sb will be checked before we write the superblock, so update it as
+ * well:
*/
- if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) {
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
- if (c->sb.version > bcachefs_metadata_version_current)
+ c->sb.version_upgrade_complete = bcachefs_metadata_version_current;
+ }
+ if (c->sb.version > bcachefs_metadata_version_current) {
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
- if (c->sb.version_min > bcachefs_metadata_version_current)
+ c->sb.version = bcachefs_metadata_version_current;
+ }
+ if (c->sb.version_min > bcachefs_metadata_version_current) {
c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
+ c->sb.version_min = bcachefs_metadata_version_current;
+ }
c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
return ret;
}
pr_uuid(out, sb->uuid.b);
prt_newline(out);
+ prt_printf(out, "Magic number:");
+ prt_tab(out);
+ pr_uuid(out, sb->magic.b);
+ prt_newline(out);
+
prt_str(out, "Device index:");
prt_tab(out);
prt_printf(out, "%u", sb->dev_idx);