-96b991466ac851ea3c7adbd2e30184837573e2a0
+275cba438ed6630d5e4db7c9164ac5334a8a4cd7
bcachefs_metadata_version_min = 9,
bcachefs_metadata_version_new_versioning = 10,
bcachefs_metadata_version_bkey_renumber = 10,
- bcachefs_metadata_version_max = 11,
+ bcachefs_metadata_version_inode_btree_change = 11,
+ bcachefs_metadata_version_max = 12,
};
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
break;
}
}
+
+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write,
+ struct bkey_format *f,
+ struct bkey_packed *k)
+{
+ const struct bkey_ops *ops;
+ struct bkey uk;
+ struct bkey_s u;
+
+ if (big_endian != CPU_BIG_ENDIAN)
+ bch2_bkey_swab_key(f, k);
+
+ if (version < bcachefs_metadata_version_bkey_renumber)
+ bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id == BTREE_ID_INODES) {
+ if (!bkey_packed(k)) {
+ struct bkey_i *u = packed_to_bkey(k);
+ swap(u->k.p.inode, u->k.p.offset);
+ } else if (f->bits_per_field[BKEY_FIELD_INODE] &&
+ f->bits_per_field[BKEY_FIELD_OFFSET]) {
+ struct bkey_format tmp = *f, *in = f, *out = &tmp;
+
+ swap(tmp.bits_per_field[BKEY_FIELD_INODE],
+ tmp.bits_per_field[BKEY_FIELD_OFFSET]);
+ swap(tmp.field_offset[BKEY_FIELD_INODE],
+ tmp.field_offset[BKEY_FIELD_OFFSET]);
+
+ if (!write)
+ swap(in, out);
+
+ uk = __bch2_bkey_unpack_key(in, k);
+ swap(uk.p.inode, uk.p.offset);
+ BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
+ }
+ }
+
+ if (!bkey_packed(k)) {
+ u = bkey_i_to_s(packed_to_bkey(k));
+ } else {
+ uk = __bch2_bkey_unpack_key(f, k);
+ u.k = &uk;
+ u.v = bkeyp_val(f, k);
+ }
+
+ if (big_endian != CPU_BIG_ENDIAN)
+ bch2_bkey_swab_val(u);
+
+ ops = &bch2_bkey_ops[k->type];
+
+ if (ops->compat)
+ ops->compat(btree_id, version, big_endian, write, u);
+}
bool (*key_normalize)(struct bch_fs *, struct bkey_s);
enum merge_result (*key_merge)(struct bch_fs *,
struct bkey_s, struct bkey_s);
+ void (*compat)(enum btree_id id, unsigned version,
+ unsigned big_endian, int write,
+ struct bkey_s);
};
const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
+ int, struct bkey_format *, struct bkey_packed *);
+
+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write,
+ struct bkey_format *f,
+ struct bkey_packed *k)
+{
+ if (version < bcachefs_metadata_version_current ||
+ big_endian != CPU_BIG_ENDIAN)
+ __bch2_bkey_compat(level, btree_id, version,
+ big_endian, write, f, k);
+
+}
+
#endif /* _BCACHEFS_BKEY_METHODS_H */
if (sib != btree_prev_sib)
swap(n1, n2);
- BUG_ON(bkey_cmp(btree_type_successor(n1->btree_id,
- n1->key.k.p),
+ BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
n2->data->min_key));
}
__gc_pos_set(c, new_pos);
}
-/* range_checks - for validating min/max pos of each btree node: */
-
-struct range_checks {
- struct range_level {
- struct bpos min;
- struct bpos max;
- } l[BTREE_MAX_DEPTH];
- unsigned depth;
-};
-
-static void btree_node_range_checks_init(struct range_checks *r, unsigned depth)
+static int bch2_gc_check_topology(struct bch_fs *c,
+ struct bkey_s_c k,
+ struct bpos *expected_start,
+ struct bpos expected_end,
+ bool is_last)
{
- unsigned i;
-
- for (i = 0; i < BTREE_MAX_DEPTH; i++)
- r->l[i].min = r->l[i].max = POS_MIN;
- r->depth = depth;
-}
-
-static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
- struct range_checks *r)
-{
- struct range_level *l = &r->l[b->level];
-
- struct bpos expected_min = bkey_cmp(l->min, l->max)
- ? btree_type_successor(b->btree_id, l->max)
- : l->max;
-
- bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, expected_min), c,
- "btree node has incorrect min key: %llu:%llu != %llu:%llu",
- b->data->min_key.inode,
- b->data->min_key.offset,
- expected_min.inode,
- expected_min.offset);
-
- l->max = b->data->max_key;
+ int ret = 0;
- if (b->level > r->depth) {
- l = &r->l[b->level - 1];
+ if (k.k->type == KEY_TYPE_btree_ptr_v2) {
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
- bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, l->min), c,
- "btree node min doesn't match min of child nodes: %llu:%llu != %llu:%llu",
- b->data->min_key.inode,
- b->data->min_key.offset,
- l->min.inode,
- l->min.offset);
+ if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c,
+ "btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu",
+ bp.v->min_key.inode,
+ bp.v->min_key.offset,
+ expected_start->inode,
+ expected_start->offset)) {
+ BUG();
+ }
+ }
- bch2_fs_inconsistent_on(bkey_cmp(b->data->max_key, l->max), c,
- "btree node max doesn't match max of child nodes: %llu:%llu != %llu:%llu",
- b->data->max_key.inode,
- b->data->max_key.offset,
- l->max.inode,
- l->max.offset);
-
- if (bkey_cmp(b->data->max_key, POS_MAX))
- l->min = l->max =
- btree_type_successor(b->btree_id,
- b->data->max_key);
+ *expected_start = bkey_cmp(k.k->p, POS_MAX)
+ ? bkey_successor(k.k->p)
+ : k.k->p;
+
+ if (fsck_err_on(is_last &&
+ bkey_cmp(k.k->p, expected_end), c,
+ "btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu",
+ k.k->p.inode,
+ k.k->p.offset,
+ expected_end.inode,
+ expected_end.offset)) {
+ BUG();
}
+fsck_err:
+ return ret;
}
/* marking of btree keys/nodes: */
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
bool initial)
{
+ struct bpos next_node_start = b->data->min_key;
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
if (!btree_node_type_needs_gc(btree_node_type(b)))
return 0;
- for_each_btree_node_key_unpack(b, k, &iter,
- &unpacked) {
+ bch2_btree_node_iter_init_from_start(&iter, b);
+
+ while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, k, max_stale, initial);
if (ret)
break;
+
+ bch2_btree_node_iter_advance(&iter, b);
+
+ if (b->level) {
+ ret = bch2_gc_check_topology(c, k,
+ &next_node_start,
+ b->data->max_key,
+ bch2_btree_node_iter_end(&iter));
+ if (ret)
+ break;
+ }
}
return ret;
struct btree_trans trans;
struct btree_iter *iter;
struct btree *b;
- struct range_checks r;
unsigned depth = metadata_only ? 1
: expensive_debug_checks(c) ? 0
: !btree_node_type_needs_gc(btree_id) ? 1
gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
- btree_node_range_checks_init(&r, depth);
-
__for_each_btree_node(&trans, iter, btree_id, POS_MIN,
0, depth, BTREE_ITER_PREFETCH, b) {
- btree_node_range_checks(c, b, &r);
-
bch2_verify_btree_nr_keys(b);
gc_pos_set(c, gc_pos_btree_node(b));
}
static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
- struct journal_keys *journal_keys,
- unsigned target_depth)
+ struct journal_keys *journal_keys,
+ unsigned target_depth)
{
struct btree_and_journal_iter iter;
struct bkey_s_c k;
+ struct bpos next_node_start = b->data->min_key;
u8 max_stale = 0;
int ret = 0;
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_debugcheck(c, b, k);
+ BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
+ BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
+
ret = bch2_gc_mark_key(c, k, &max_stale, true);
if (ret)
break;
- if (b->level > target_depth) {
+ if (b->level) {
struct btree *child;
BKEY_PADDED(k) tmp;
bkey_reassemble(&tmp.k, k);
+ k = bkey_i_to_s_c(&tmp.k);
+
+ bch2_btree_and_journal_iter_advance(&iter);
- child = bch2_btree_node_get_noiter(c, &tmp.k,
- b->btree_id, b->level - 1);
- ret = PTR_ERR_OR_ZERO(child);
+ ret = bch2_gc_check_topology(c, k,
+ &next_node_start,
+ b->data->max_key,
+ !bch2_btree_and_journal_iter_peek(&iter).k);
if (ret)
break;
- bch2_gc_btree_init_recurse(c, child,
- journal_keys, target_depth);
- six_unlock_read(&child->lock);
- }
+ if (b->level > target_depth) {
+ child = bch2_btree_node_get_noiter(c, &tmp.k,
+ b->btree_id, b->level - 1);
+ ret = PTR_ERR_OR_ZERO(child);
+ if (ret)
+ break;
- bch2_btree_and_journal_iter_advance(&iter);
+ ret = bch2_gc_btree_init_recurse(c, child,
+ journal_keys, target_depth);
+ six_unlock_read(&child->lock);
+
+ if (ret)
+ break;
+ }
+ } else {
+ bch2_btree_and_journal_iter_advance(&iter);
+ }
}
return ret;
return 0;
six_lock_read(&b->lock);
+ if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
+ "btree root with incorrect min_key: %llu:%llu",
+ b->data->min_key.inode,
+ b->data->min_key.offset)) {
+ BUG();
+ }
+
+ if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
+ "btree root with incorrect min_key: %llu:%llu",
+ b->data->max_key.inode,
+ b->data->max_key.offset)) {
+ BUG();
+ }
+
if (b->level >= target_depth)
ret = bch2_gc_btree_init_recurse(c, b,
journal_keys, target_depth);
if (!ret)
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
&max_stale, true);
+fsck_err:
six_unlock_read(&b->lock);
return ret;
n1->key.k.p = n1->data->max_key =
bkey_unpack_pos(n1, last);
- n2->data->min_key =
- btree_type_successor(iter->btree_id,
- n1->data->max_key);
+ n2->data->min_key = bkey_successor(n1->data->max_key);
memcpy_u64s(vstruct_last(s1),
s2->start, u64s);
#include "journal_seq_blacklist.h"
#include "super-io.h"
+#include <linux/sched/mm.h>
#include <trace/events/bcachefs.h>
static void verify_no_dups(struct btree *b,
static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
bool *used_mempool)
{
+ unsigned flags = memalloc_nofs_save();
void *p;
BUG_ON(order > btree_page_order(c));
*used_mempool = false;
p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order);
- if (p)
- return p;
-
- *used_mempool = true;
- return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
+ if (!p) {
+ *used_mempool = true;
+ p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
+ }
+ memalloc_nofs_restore(flags);
+ return p;
}
static void sort_bkey_ptrs(const struct btree *bt,
static int validate_bset(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors,
- unsigned *whiteout_u64s, int write,
- bool have_retry)
+ int write, bool have_retry)
{
- struct bkey_packed *k, *prev = NULL;
- bool seen_non_whiteout = false;
- unsigned version;
+ unsigned version = le16_to_cpu(i->version);
const char *err;
int ret = 0;
+ btree_err_on((version != BCH_BSET_VERSION_OLD &&
+ version < bcachefs_metadata_version_min) ||
+ version >= bcachefs_metadata_version_max,
+ BTREE_ERR_FATAL, c, b, i,
+ "unsupported bset version");
+
+ if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
+ BTREE_ERR_FIXABLE, c, b, i,
+ "bset past end of btree node")) {
+ i->u64s = 0;
+ return 0;
+ }
+
+ btree_err_on(b->written && !i->u64s,
+ BTREE_ERR_FIXABLE, c, b, i,
+ "empty bset");
+
if (!b->written) {
+ struct btree_node *bn =
+ container_of(i, struct btree_node, keys);
/* These indicate that we read the wrong btree node: */
- btree_err_on(BTREE_NODE_ID(b->data) != b->btree_id,
+ btree_err_on(BTREE_NODE_ID(bn) != b->btree_id,
BTREE_ERR_MUST_RETRY, c, b, i,
"incorrect btree id");
- btree_err_on(BTREE_NODE_LEVEL(b->data) != b->level,
+ btree_err_on(BTREE_NODE_LEVEL(bn) != b->level,
BTREE_ERR_MUST_RETRY, c, b, i,
"incorrect level");
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
- u64 *p = (u64 *) &b->data->ptr;
+ u64 *p = (u64 *) &bn->ptr;
*p = swab64(*p);
- bch2_bpos_swab(&b->data->min_key);
- bch2_bpos_swab(&b->data->max_key);
}
+ if (!write)
+ compat_btree_node(b->level, b->btree_id, version,
+ BSET_BIG_ENDIAN(i), write, bn);
+
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
struct bch_btree_ptr_v2 *bp =
&bkey_i_to_btree_ptr_v2(&b->key)->v;
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
BTREE_ERR_MUST_RETRY, c, b, NULL,
- "incorrect min_key");
+ "incorrect min_key: got %llu:%llu should be %llu:%llu",
+ b->data->min_key.inode,
+ b->data->min_key.offset,
+ bp->min_key.inode,
+ bp->min_key.offset);
}
- btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p),
+ btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
BTREE_ERR_MUST_RETRY, c, b, i,
"incorrect max key");
+ if (write)
+ compat_btree_node(b->level, b->btree_id, version,
+ BSET_BIG_ENDIAN(i), write, bn);
+
/* XXX: ideally we would be validating min_key too */
#if 0
/*
* not correct anymore, due to btree node write error
* handling
*
- * need to add b->data->seq to btree keys and verify
+ * need to add bn->seq to btree keys and verify
* against that
*/
btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
- b->data->ptr),
+ bn->ptr),
BTREE_ERR_FATAL, c, b, i,
"incorrect backpointer");
#endif
- err = bch2_bkey_format_validate(&b->data->format);
+ err = bch2_bkey_format_validate(&bn->format);
btree_err_on(err,
BTREE_ERR_FATAL, c, b, i,
"invalid bkey format: %s", err);
- }
- version = le16_to_cpu(i->version);
- btree_err_on((version != BCH_BSET_VERSION_OLD &&
- version < bcachefs_metadata_version_min) ||
- version >= bcachefs_metadata_version_max,
- BTREE_ERR_FATAL, c, b, i,
- "unsupported bset version");
-
- if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
- BTREE_ERR_FIXABLE, c, b, i,
- "bset past end of btree node")) {
- i->u64s = 0;
- return 0;
+ compat_bformat(b->level, b->btree_id, version,
+ BSET_BIG_ENDIAN(i), write,
+ &bn->format);
}
+fsck_err:
+ return ret;
+}
- btree_err_on(b->written && !i->u64s,
- BTREE_ERR_FIXABLE, c, b, i,
- "empty bset");
+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
+ struct bset *i, unsigned *whiteout_u64s,
+ int write, bool have_retry)
+{
+ unsigned version = le16_to_cpu(i->version);
+ struct bkey_packed *k, *prev = NULL;
+ bool seen_non_whiteout = false;
+ int ret = 0;
if (!BSET_SEPARATE_WHITEOUTS(i)) {
seen_non_whiteout = true;
continue;
}
- if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
- bch2_bkey_swab_key(&b->format, k);
-
- if (!write &&
- version < bcachefs_metadata_version_bkey_renumber)
- bch2_bkey_renumber(btree_node_type(b), k, write);
+ /* XXX: validate k->u64s */
+ if (!write)
+ bch2_bkey_compat(b->level, b->btree_id, version,
+ BSET_BIG_ENDIAN(i), write,
+ &b->format, k);
u = __bkey_disassemble(b, k, &tmp);
- if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
- bch2_bkey_swab_val(u);
-
invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?:
bch2_bkey_in_btree_node(b, u.s_c) ?:
(write ? bch2_bkey_val_invalid(c, u.s_c) : NULL);
continue;
}
- if (write &&
- version < bcachefs_metadata_version_bkey_renumber)
- bch2_bkey_renumber(btree_node_type(b), k, write);
+ if (write)
+ bch2_bkey_compat(b->level, b->btree_id, version,
+ BSET_BIG_ENDIAN(i), write,
+ &b->format, k);
/*
* with the separate whiteouts thing (used for extents), the
prev = k;
k = bkey_next_skip_noops(k, vstruct_last(i));
}
-
- SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
fsck_err:
return ret;
}
set_btree_node_old_extent_overwrite(b);
sectors = vstruct_sectors(b->data, c->block_bits);
-
- btree_node_set_format(b, b->data->format);
} else {
bne = write_block(b);
i = &bne->keys;
sectors = vstruct_sectors(bne, c->block_bits);
}
- ret = validate_bset(c, b, i, sectors, &whiteout_u64s,
+ ret = validate_bset(c, b, i, sectors,
READ, have_retry);
if (ret)
goto fsck_err;
+ if (!b->written)
+ btree_node_set_format(b, b->data->format);
+
+ ret = validate_bset_keys(c, b, i, &whiteout_u64s,
+ READ, have_retry);
+ if (ret)
+ goto fsck_err;
+
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
b->written += sectors;
blacklisted = bch2_journal_seq_is_blacklisted(c,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
return -1;
- ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false);
+ ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
+ validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
if (ret)
bch2_inconsistent_error(c);
validate_before_checksum = true;
/* validate_bset will be modifying: */
- if (le16_to_cpu(i->version) <
- bcachefs_metadata_version_bkey_renumber)
+ if (le16_to_cpu(i->version) < bcachefs_metadata_version_max)
validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */
#ifndef _BCACHEFS_BTREE_IO_H
#define _BCACHEFS_BTREE_IO_H
+#include "bkey_methods.h"
#include "bset.h"
#include "btree_locking.h"
#include "extents.h"
void bch2_btree_verify_flushed(struct bch_fs *);
ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write, struct bkey_format *f)
+{
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id == BTREE_ID_INODES) {
+ swap(f->bits_per_field[BKEY_FIELD_INODE],
+ f->bits_per_field[BKEY_FIELD_OFFSET]);
+ swap(f->field_offset[BKEY_FIELD_INODE],
+ f->field_offset[BKEY_FIELD_OFFSET]);
+ }
+}
+
+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write, struct bpos *p)
+{
+ if (big_endian != CPU_BIG_ENDIAN)
+ bch2_bpos_swab(p);
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id == BTREE_ID_INODES)
+ swap(p->inode, p->offset);
+}
+
+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write,
+ struct btree_node *bn)
+{
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_node_type_is_extents(btree_id) &&
+ bkey_cmp(bn->min_key, POS_MIN) &&
+ write)
+ bn->min_key = bkey_predecessor(bn->min_key);
+
+ compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
+ compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_node_type_is_extents(btree_id) &&
+ bkey_cmp(bn->min_key, POS_MIN) &&
+ !write)
+ bn->min_key = bkey_successor(bn->min_key);
+}
+
#endif /* _BCACHEFS_BTREE_IO_H */
static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
struct btree *b)
{
- return bkey_cmp(iter->pos, b->data->min_key) < 0;
+ return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0;
}
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
if (btree_node_read_locked(iter, iter->level))
btree_node_unlock(iter, iter->level);
- /* ick: */
- iter->pos = iter->btree_id == BTREE_ID_INODES
- ? btree_type_successor(iter->btree_id, iter->pos)
- : bkey_successor(iter->pos);
+ iter->pos = bkey_successor(iter->pos);
iter->level = iter->min_depth;
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
iter->k.p = iter->pos = l->b->key.k.p;
ret = bkey_cmp(iter->pos, POS_MAX) != 0;
- if (ret)
- iter->k.p = iter->pos = btree_type_successor(iter->btree_id, iter->pos);
+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+ iter->k.p = iter->pos = bkey_successor(iter->pos);
btree_iter_pos_changed(iter, 1);
return ret;
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
ret = bkey_cmp(iter->pos, POS_MIN) != 0;
- if (ret)
- iter->k.p = iter->pos = btree_type_predecessor(iter->btree_id, iter->pos);
+ if (ret) {
+ iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+
+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
+ iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+ }
btree_iter_pos_changed(iter, -1);
return ret;
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter,
- btree_type_successor(iter->btree_id, iter->k.p));
+ (iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? iter->k.p
+ : bkey_successor(iter->k.p));
return bch2_btree_iter_peek(iter);
}
if (k.k && bkey_deleted(k.k)) {
bch2_btree_iter_set_pos(iter,
- btree_type_successor(iter->btree_id, iter->k.p));
+ (iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? iter->k.p
+ : bkey_successor(iter->k.p));
continue;
}
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter,
- btree_type_successor(iter->btree_id, iter->k.p));
+ (iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? iter->k.p
+ : bkey_successor(iter->k.p));
return bch2_btree_iter_peek_with_updates(iter);
}
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter,
- btree_type_successor(iter->btree_id, iter->k.p));
+ (iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? iter->k.p
+ : bkey_successor(iter->k.p));
return bch2_btree_iter_peek_slot(iter);
}
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
-static inline struct bpos btree_type_successor(enum btree_id id,
- struct bpos pos)
-{
- if (id == BTREE_ID_INODES) {
- pos.inode++;
- pos.offset = 0;
- } else if (!btree_node_type_is_extents(id)) {
- pos = bkey_successor(pos);
- }
-
- return pos;
-}
-
-static inline struct bpos btree_type_predecessor(enum btree_id id,
- struct bpos pos)
-{
- if (id == BTREE_ID_INODES) {
- --pos.inode;
- pos.offset = 0;
- } else {
- pos = bkey_predecessor(pos);
- }
-
- return pos;
-}
-
static inline int __btree_iter_cmp(enum btree_id id,
struct bpos pos,
const struct btree_iter *r)
struct btree_iter *iter;
};
+#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
+#else
+#define BTREE_ITER_MAX 32
+#endif
struct btree_trans {
struct bch_fs *c;
/* Asynchronous interior node update machinery */
-static void bch2_btree_update_free(struct btree_update *as)
+static void __bch2_btree_update_free(struct btree_update *as)
{
struct bch_fs *c = as->c;
if (as->reserve)
bch2_btree_reserve_put(c, as->reserve);
- mutex_lock(&c->btree_interior_update_lock);
list_del(&as->list);
closure_debug_destroy(&as->cl);
mempool_free(as, &c->btree_interior_update_pool);
closure_wake_up(&c->btree_interior_update_wait);
- mutex_unlock(&c->btree_interior_update_lock);
}
-static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
+static void bch2_btree_update_free(struct btree_update *as)
{
struct bch_fs *c = as->c;
mutex_lock(&c->btree_interior_update_lock);
+ __bch2_btree_update_free(as);
+ mutex_unlock(&c->btree_interior_update_lock);
+}
+
+static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
+{
+ struct bch_fs *c = as->c;
while (as->nr_new_nodes) {
struct btree *b = as->new_nodes[--as->nr_new_nodes];
BUG_ON(b->will_make_reachable != (unsigned long) as);
b->will_make_reachable = 0;
- mutex_unlock(&c->btree_interior_update_lock);
/*
* b->will_make_reachable prevented it from being written, so
btree_node_lock_type(c, b, SIX_LOCK_read);
bch2_btree_node_write_cond(c, b, btree_node_need_write(b));
six_unlock_read(&b->lock);
- mutex_lock(&c->btree_interior_update_lock);
}
while (as->nr_pending)
bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending],
seq);
-
- mutex_unlock(&c->btree_interior_update_lock);
}
static void btree_update_nodes_written(struct closure *cl)
mutex_unlock(&c->btree_interior_update_lock);
btree_node_lock_type(c, b, SIX_LOCK_intent);
six_unlock_intent(&b->lock);
- goto out;
+ mutex_lock(&c->btree_interior_update_lock);
+ goto again;
}
+ list_del(&as->unwritten_list);
+
journal_u64s = 0;
if (as->mode != BTREE_INTERIOR_UPDATING_ROOT)
bch2_btree_add_journal_pin(c, b, res.seq);
six_unlock_write(&b->lock);
-
- list_del(&as->unwritten_list);
- mutex_unlock(&c->btree_interior_update_lock);
-
- /*
- * b->write_blocked prevented it from being written, so
- * write it now if it needs to be written:
- */
- btree_node_write_if_need(c, b, SIX_LOCK_intent);
- six_unlock_intent(&b->lock);
break;
case BTREE_INTERIOR_UPDATING_AS:
BUG_ON(b);
-
- list_del(&as->unwritten_list);
- mutex_unlock(&c->btree_interior_update_lock);
break;
case BTREE_INTERIOR_UPDATING_ROOT: {
r->alive = true;
c->btree_roots_dirty = true;
mutex_unlock(&c->btree_root_lock);
-
- list_del(&as->unwritten_list);
- mutex_unlock(&c->btree_interior_update_lock);
break;
}
}
bch2_journal_res_put(&c->journal, &res);
bch2_journal_preres_put(&c->journal, &as->journal_preres);
+ /* Do btree write after dropping journal res: */
+ if (b) {
+ /*
+ * b->write_blocked prevented it from being written, so
+ * write it now if it needs to be written:
+ */
+ btree_node_write_if_need(c, b, SIX_LOCK_intent);
+ six_unlock_intent(&b->lock);
+ }
+
btree_update_nodes_reachable(as, res.seq);
free_update:
- bch2_btree_update_free(as);
+ __bch2_btree_update_free(as);
/*
* for flush_held_btree_writes() waiting on updates to flush or
* nodes to be writeable:
*/
closure_wake_up(&c->btree_interior_update_wait);
-out:
- mutex_lock(&c->btree_interior_update_lock);
goto again;
}
BUG_ON(!prev);
btree_set_max(n1, bkey_unpack_pos(n1, prev));
- btree_set_min(n2, btree_type_successor(n1->btree_id, n1->key.k.p));
+ btree_set_min(n2, bkey_successor(n1->key.k.p));
set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
EBUG_ON(btree_node_just_written(b));
EBUG_ON(bset_written(b, btree_bset_last(b)));
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
- EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 ||
- bkey_cmp(insert->k.p, b->data->max_key) > 0);
+ EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
+ bkey_cmp(bkey_start_pos(&insert->k),
+ bkey_predecessor(b->data->min_key)) < 0);
+ EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0);
+ EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0);
EBUG_ON(insert->k.u64s >
bch_btree_keys_u64s_remaining(iter->trans->c, b));
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
BB_NONE,
BB_VMAP,
BB_KMALLOC,
- BB_VMALLOC,
BB_MEMPOOL,
} type;
int rw;
if (b)
return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
- b = mempool_alloc(&c->compression_bounce[rw], GFP_NOWAIT);
- b = b ? page_address(b) : NULL;
- if (b)
- return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
-
- b = vmalloc(size);
- if (b)
- return (struct bbuf) { .b = b, .type = BB_VMALLOC, .rw = rw };
-
b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
- b = b ? page_address(b) : NULL;
if (b)
return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
case BB_KMALLOC:
kfree(buf.b);
break;
- case BB_VMALLOC:
- vfree(buf.b);
- break;
case BB_MEMPOOL:
- mempool_free(virt_to_page(buf.b),
- &c->compression_bounce[buf.rw]);
+ mempool_free(buf.b, &c->compression_bounce[buf.rw]);
break;
}
}
have_compressed:
if (!mempool_initialized(&c->compression_bounce[READ])) {
- ret = mempool_init_page_pool(&c->compression_bounce[READ],
- 1, order);
+ ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
+ 1, order);
if (ret)
goto out;
}
if (!mempool_initialized(&c->compression_bounce[WRITE])) {
- ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
- 1, order);
+ ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
+ 1, order);
if (ret)
goto out;
}
continue;
}
- bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
-
dev = s->key.v.ptrs[idx].dev;
bkey_on_stack_reassemble(&sk, c, k);
extent_stripe_ptr_add(e, s, ec_ptr, idx);
+ bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
bch2_trans_update(&trans, iter, sk.k, 0);
ret = bch2_trans_commit(&trans, NULL, NULL,
b = iter->l[0].b;
node_iter = iter->l[0].iter;
- BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
+ BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
+ bkey_cmp(bkey_start_pos(&insert->k),
+ bkey_predecessor(b->data->min_key)) < 0);
*end = bpos_min(insert->k.p, b->key.k.p);
#include "bcachefs.h"
#include "bkey_methods.h"
#include "btree_gc.h"
+#include "btree_io.h"
#include "btree_iter.h"
#include "buckets.h"
#include "checksum.h"
bch2_bkey_ptrs_to_text(out, c, k);
}
+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
+ unsigned big_endian, int write,
+ struct bkey_s k)
+{
+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
+
+ compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_node_type_is_extents(btree_id) &&
+ bkey_cmp(bp.v->min_key, POS_MIN))
+ bp.v->min_key = write
+ ? bkey_predecessor(bp.v->min_key)
+ : bkey_successor(bp.v->min_key);
+}
+
/* KEY_TYPE_extent: */
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
+ int, struct bkey_s);
#define bch2_bkey_ops_btree_ptr (struct bkey_ops) { \
.key_invalid = bch2_btree_ptr_invalid, \
.key_debugcheck = bch2_btree_ptr_debugcheck, \
.val_to_text = bch2_btree_ptr_to_text, \
.swab = bch2_ptr_swab, \
+ .compat = bch2_btree_ptr_v2_compat, \
}
/* KEY_TYPE_extent: */
if (!ret)
continue;
- if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
+ if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
"unreachable directory found (inum %llu)",
- k.k->p.inode)) {
+ k.k->p.offset)) {
bch2_trans_unlock(&trans);
- ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
+ ret = reattach_inode(c, lostfound_inode, k.k->p.offset);
if (ret) {
goto err;
}
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
- POS(range_start, 0), 0);
+ POS(0, range_start), 0);
nlinks_iter = genradix_iter_init(links, 0);
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret2 = bkey_err(k))) {
peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
- if (!link && (!k.k || iter->pos.inode >= range_end))
+ if (!link && (!k.k || iter->pos.offset >= range_end))
break;
nlinks_pos = range_start + nlinks_iter.pos;
- if (iter->pos.inode > nlinks_pos) {
+ if (iter->pos.offset > nlinks_pos) {
/* Should have been caught by dirents pass: */
need_fsck_err_on(link && link->count, c,
"missing inode %llu (nlink %u)",
goto peek_nlinks;
}
- if (iter->pos.inode < nlinks_pos || !link)
+ if (iter->pos.offset < nlinks_pos || !link)
link = &zero_links;
if (k.k && k.k->type == KEY_TYPE_inode) {
nlinks_pos, link->count);
}
- if (nlinks_pos == iter->pos.inode)
+ if (nlinks_pos == iter->pos.offset)
genradix_iter_advance(&nlinks_iter, links);
bch2_btree_iter_next(iter);
unsigned bytes;
bkey_inode_init(&packed->inode.k_i);
- packed->inode.k.p.inode = inode->bi_inum;
+ packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
unsigned fieldnr = 0, field_bits;
int ret;
- unpacked->bi_inum = inode.k->p.inode;
+ unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
struct bkey_s_c k;
int ret;
- iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inum, 0),
+ iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
BTREE_ITER_SLOTS|flags);
if (IS_ERR(iter))
return iter;
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
struct bch_inode_unpacked unpacked;
- if (k.k->p.offset)
- return "nonzero offset";
+ if (k.k->p.inode)
+ return "nonzero k.p.inode";
if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
return "incorrect value size";
- if (k.k->p.inode < BLOCKDEV_INODE_MAX)
+ if (k.k->p.offset < BLOCKDEV_INODE_MAX)
return "fs inode in blockdev range";
if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
const char *bch2_inode_generation_invalid(const struct bch_fs *c,
struct bkey_s_c k)
{
- if (k.k->p.offset)
- return "nonzero offset";
+ if (k.k->p.inode)
+ return "nonzero k.p.inode";
if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
return "incorrect value size";
if (IS_ERR(inode_p))
return PTR_ERR(inode_p);
again:
- for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(start, 0),
+ for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
- if (iter->pos.inode > max)
+ if (bkey_cmp(iter->pos, POS(0, max)) > 0)
break;
if (k.k->type != KEY_TYPE_inode)
return -ENOSPC;
found_slot:
- *hint = k.k->p.inode;
- inode_u->bi_inum = k.k->p.inode;
+ *hint = k.k->p.offset;
+ inode_u->bi_inum = k.k->p.offset;
inode_u->bi_generation = bkey_generation(k);
bch2_inode_pack(inode_p, inode_u);
bch2_trans_init(&trans, c, 0, 0);
- iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0),
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
do {
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
if (!bi_generation) {
bkey_init(&delete.k);
- delete.k.p.inode = inode_nr;
+ delete.k.p.offset = inode_nr;
} else {
bkey_inode_generation_init(&delete.k_i);
- delete.k.p.inode = inode_nr;
+ delete.k.p.offset = inode_nr;
delete.v.bi_generation = cpu_to_le32(bi_generation);
}
int ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
- POS(inode_nr, 0), BTREE_ITER_SLOTS);
+ POS(0, inode_nr), BTREE_ITER_SLOTS);
if (IS_ERR(iter))
return PTR_ERR(iter);
goto retry;
if (ret == -ENOSPC) {
- BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED));
+ WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
+ "JOURNAL_RES_GET_RESERVED set but journal full");
/*
* Journal is full - can't rely on reclaim from work item due to
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "btree_io.h"
#include "buckets.h"
#include "checksum.h"
#include "error.h"
static int journal_validate_key(struct bch_fs *c, struct jset *jset,
struct jset_entry *entry,
- struct bkey_i *k, enum btree_node_type key_type,
+ unsigned level, enum btree_id btree_id,
+ struct bkey_i *k,
const char *type, int write)
{
void *next = vstruct_next(entry);
return 0;
}
- if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) {
- bch2_bkey_swab_key(NULL, bkey_to_packed(k));
- bch2_bkey_swab_val(bkey_i_to_s(k));
- }
-
- if (!write &&
- version < bcachefs_metadata_version_bkey_renumber)
- bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
+ if (!write)
+ bch2_bkey_compat(level, btree_id, version,
+ JSET_BIG_ENDIAN(jset), write,
+ NULL, bkey_to_packed(k));
- invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type);
+ invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
+ __btree_node_type(level, btree_id));
if (invalid) {
char buf[160];
return 0;
}
- if (write &&
- version < bcachefs_metadata_version_bkey_renumber)
- bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
+ if (write)
+ bch2_bkey_compat(level, btree_id, version,
+ JSET_BIG_ENDIAN(jset), write,
+ NULL, bkey_to_packed(k));
fsck_err:
return ret;
}
struct bkey_i *k;
vstruct_for_each(entry, k) {
- int ret = journal_validate_key(c, jset, entry, k,
- __btree_node_type(entry->level,
- entry->btree_id),
- "key", write);
+ int ret = journal_validate_key(c, jset, entry,
+ entry->level,
+ entry->btree_id,
+ k, "key", write);
if (ret)
return ret;
}
return 0;
}
- return journal_validate_key(c, jset, entry, k, BKEY_TYPE_BTREE,
+ return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
"btree root", write);
fsck_err:
return ret;
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true;
- if (le32_to_cpu(jset->version) <
- bcachefs_metadata_version_bkey_renumber)
+ if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max)
validate_before_checksum = true;
if (validate_before_checksum &&
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
- if (!c->sb.clean || c->opts.fsck) {
+ if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
struct jset *j;
ret = bch2_journal_read(c, &c->journal_entries);