// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bbpos.h"
#include "alloc_background.h"
#include "backpointers.h"
#include "btree_cache.h"
#include <linux/mm.h>
-#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
-
/*
* Convert from pos in backpointer btree to pos of corresponding bucket in alloc
* btree:
(bucket_to_sector(ca, bucket.offset) <<
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
- BUG_ON(bpos_cmp(bucket, bp_pos_to_bucket(c, ret)));
+ BUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
return ret;
}
-void bch2_extent_ptr_to_bp(struct bch_fs *c,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, struct extent_ptr_decoded p,
- struct bpos *bucket_pos, struct bch_backpointer *bp)
-{
- enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
- s64 sectors = level ? btree_sectors(c) : k.k->size;
- u32 bucket_offset;
-
- *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
- *bp = (struct bch_backpointer) {
- .btree_id = btree_id,
- .level = level,
- .data_type = data_type,
- .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
- p.crc.offset,
- .bucket_len = ptr_disk_sectors(sectors, p),
- .pos = k.k->p,
- };
-}
-
static bool extent_matches_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k,
bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
&bucket2, &bp2);
- if (!bpos_cmp(bucket, bucket2) &&
+ if (bpos_eq(bucket, bucket2) &&
!memcmp(&bp, &bp2, sizeof(bp)))
return true;
}
}
int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
- int rw, struct printbuf *err)
+ unsigned flags, struct printbuf *err)
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
if (bkey_val_bytes(bp.k) < sizeof(*bp.v)) {
prt_str(err, "incorrect value size");
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
- if (bpos_cmp(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
+ if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
prt_str(err, "backpointer at wrong pos");
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
return 0;
memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
struct printbuf buf = PRINTBUF;
+ if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
+ goto err;
+
prt_printf(&buf, "backpointer not found when deleting");
prt_newline(&buf);
printbuf_indent_add(&buf, 2);
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
- if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
- bch_err(c, "%s", buf.buf);
- } else {
- ret = -EIO;
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- }
+ bch_err(c, "%s", buf.buf);
+ bch2_inconsistent_error(c);
+ ret = -EIO;
printbuf_exit(&buf);
goto err;
}
struct bkey_s_c orig_k)
{
struct bch_fs *c = trans->c;
- struct bch_dev *ca;
struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
struct bkey_i_backpointer *bp_k;
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
- if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
- bch_err(c, "%s", buf.buf);
- else {
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- printbuf_exit(&buf);
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
+ bch2_inconsistent_error(c);
return -EIO;
}
}
}
/* Overflow: use backpointer btree */
- bp_k = bch2_trans_kmalloc(trans, sizeof(*bp_k));
- ret = PTR_ERR_OR_ZERO(bp_k);
- if (ret)
- return ret;
-
- ca = bch_dev_bkey_exists(c, a->k.p.inode);
- bkey_backpointer_init(&bp_k->k_i);
- bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
- bp_k->v = bp;
-
- bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p,
+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
BTREE_ITER_INTENT|
BTREE_ITER_SLOTS|
BTREE_ITER_WITH_UPDATES);
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
- if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
- bch_err(c, "%s", buf.buf);
- else {
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- printbuf_exit(&buf);
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
+ bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}
}
+ bp_k = bch2_bkey_alloc(trans, &bp_iter, backpointer);
+ ret = PTR_ERR_OR_ZERO(bp_k);
+ if (ret)
+ goto err;
+
+ bp_k->v = bp;
+
ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
err:
bch2_trans_iter_exit(trans, &bp_iter);
int bch2_get_next_backpointer(struct btree_trans *trans,
struct bpos bucket, int gen,
u64 *bp_offset,
- struct bch_backpointer *dst)
+ struct bch_backpointer *dst,
+ unsigned iter_flags)
{
struct bch_fs *c = trans->c;
struct bpos bp_pos, bp_end_pos;
for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
bp_pos, 0, k, ret) {
- if (bpos_cmp(k.k->p, bp_end_pos) >= 0)
+ if (bpos_ge(k.k->p, bp_end_pos))
break;
if (k.k->type != KEY_TYPE_backpointer)
if (bp.level == c->btree_roots[bp.btree_id].level + 1)
k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
- if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+ if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
return k;
bch2_trans_iter_exit(trans, iter);
if (IS_ERR(b))
goto err;
- if (extent_matches_bp(c, bp.btree_id, bp.level,
- bkey_i_to_s_c(&b->key),
- bucket, bp))
+ if (b && extent_matches_bp(c, bp.btree_id, bp.level,
+ bkey_i_to_s_c(&b->key),
+ bucket, bp))
return b;
- if (btree_node_will_make_reachable(b)) {
+ if (b && btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
backpointer_not_found(trans, bucket, bp_offset, bp,
static int check_bp_exists(struct btree_trans *trans,
struct bpos bucket_pos,
struct bch_backpointer bp,
- struct bkey_s_c orig_k)
+ struct bkey_s_c orig_k,
+ struct bpos bucket_start,
+ struct bpos bucket_end)
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter, bp_iter = { NULL };
struct bkey_s_c alloc_k, bp_k;
int ret;
+ if (bpos_lt(bucket_pos, bucket_start) ||
+ bpos_gt(bucket_pos, bucket_end))
+ return 0;
+
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
ret = bkey_err(alloc_k);
}
static int check_extent_to_backpointers(struct btree_trans *trans,
- struct btree_iter *iter)
+ struct btree_iter *iter,
+ struct bpos bucket_start,
+ struct bpos bucket_end)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs;
bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
k, p, &bucket_pos, &bp);
- ret = check_bp_exists(trans, bucket_pos, bp, k);
+ ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
if (ret)
return ret;
}
}
static int check_btree_root_to_backpointers(struct btree_trans *trans,
- enum btree_id btree_id)
+ enum btree_id btree_id,
+ struct bpos bucket_start,
+ struct bpos bucket_end)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
k, p, &bucket_pos, &bp);
- ret = check_bp_exists(trans, bucket_pos, bp, k);
+ ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end);
if (ret)
goto err;
}
return ret;
}
-struct bbpos {
- enum btree_id btree;
- struct bpos pos;
-};
-
-static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
-{
- return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos);
-}
-
-static inline struct bbpos bbpos_successor(struct bbpos pos)
-{
- if (bpos_cmp(pos.pos, SPOS_MAX)) {
- pos.pos = bpos_successor(pos.pos);
- return pos;
- }
-
- if (pos.btree != BTREE_ID_NR) {
- pos.btree++;
- pos.pos = POS_MIN;
- return pos;
- }
-
- BUG();
-}
-
-#if 0
-static void bbpos_to_text(struct printbuf *out, struct bbpos pos)
-{
- prt_str(out, bch2_btree_ids[pos.btree]);
- prt_char(out, ':');
- bch2_bpos_to_text(out, pos.pos);
-}
-#endif
-
static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
{
return (struct bbpos) {
};
}
+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+{
+ struct sysinfo i;
+ u64 mem_bytes;
+
+ si_meminfo(&i);
+ mem_bytes = i.totalram * i.mem_unit;
+ return (mem_bytes >> 1) / btree_bytes(c);
+}
+
int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
unsigned btree_leaf_mask,
unsigned btree_interior_mask,
{
struct btree_iter iter;
struct bkey_s_c k;
- struct sysinfo i;
- size_t btree_nodes;
+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
enum btree_id btree;
int ret = 0;
- si_meminfo(&i);
-
- btree_nodes = (i.totalram >> 1) / btree_bytes(trans->c);
-
for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
--btree_nodes;
if (!btree_nodes) {
- end->btree = btree;
- end->pos = k.k->p;
+ *end = BBPOS(btree, k.k->p);
bch2_trans_iter_exit(trans, &iter);
return 0;
}
bch2_trans_iter_exit(trans, &iter);
}
- end->btree = BTREE_ID_NR;
- end->pos = POS_MIN;
+ *end = BBPOS_MAX;
return ret;
}
-int bch2_check_extents_to_backpointers(struct bch_fs *c)
+static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
+ struct bpos bucket_start,
+ struct bpos bucket_end)
{
- struct btree_trans trans;
struct btree_iter iter;
enum btree_id btree_id;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
- bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0,
+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
depth,
BTREE_ITER_ALL_LEVELS|
BTREE_ITER_PREFETCH);
do {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_NOFAIL,
- check_extent_to_backpointers(&trans, &iter));
+ ret = commit_do(trans, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL,
+ check_extent_to_backpointers(trans, &iter,
+ bucket_start, bucket_end));
if (ret)
break;
} while (!bch2_btree_iter_advance(&iter));
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (ret)
break;
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_NOFAIL,
- check_btree_root_to_backpointers(&trans, btree_id));
+ ret = commit_do(trans, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL,
+ check_btree_root_to_backpointers(trans, btree_id,
+ bucket_start, bucket_end));
if (ret)
break;
}
+ return ret;
+}
+
+static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
+ struct bpos bucket)
+{
+ return bch2_dev_exists2(c, bucket.inode)
+ ? bucket_pos_to_bp(c, bucket, 0)
+ : bucket;
+}
+
+int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
+ struct bpos start, struct bpos *end)
+{
+ struct btree_iter alloc_iter;
+ struct btree_iter bp_iter;
+ struct bkey_s_c alloc_k, bp_k;
+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
+ bool alloc_end = false, bp_end = false;
+ int ret = 0;
+
+ bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+ start, 0, 1, 0);
+ bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
+ while (1) {
+ alloc_k = !alloc_end
+ ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
+ : bkey_s_c_null;
+ bp_k = !bp_end
+ ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
+ : bkey_s_c_null;
+
+ ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
+ if ((!alloc_k.k && !bp_k.k) || ret) {
+ *end = SPOS_MAX;
+ break;
+ }
+
+ --btree_nodes;
+ if (!btree_nodes) {
+ *end = alloc_k.k->p;
+ break;
+ }
+
+ if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
+ bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
+ if (!bch2_btree_iter_advance(&alloc_iter))
+ alloc_end = true;
+ } else {
+ if (!bch2_btree_iter_advance(&bp_iter))
+ bp_end = true;
+ }
+ }
+ bch2_trans_iter_exit(trans, &bp_iter);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ return ret;
+}
+
+int bch2_check_extents_to_backpointers(struct bch_fs *c)
+{
+ struct btree_trans trans;
+ struct bpos start = POS_MIN, end;
+ int ret;
+
+ bch2_trans_init(&trans, c, 0, 0);
+ while (1) {
+ ret = bch2_get_alloc_in_memory_pos(&trans, start, &end);
+ if (ret)
+ break;
+
+ if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
+ bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
+ __func__, btree_nodes_fit_in_ram(c));
+
+ if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "check_extents_to_backpointers(): ");
+ bch2_bpos_to_text(&buf, start);
+ prt_str(&buf, "-");
+ bch2_bpos_to_text(&buf, end);
+
+ bch_verbose(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ ret = bch2_check_extents_to_backpointers_pass(&trans, start, end);
+ if (ret || bpos_eq(end, SPOS_MAX))
+ break;
+
+ start = bpos_successor(end);
+ }
bch2_trans_exit(&trans);
+
return ret;
}
struct printbuf buf = PRINTBUF;
int ret;
- ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp);
+ ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp, 0);
if (ret || *bp_offset == U64_MAX)
return ret;
bch2_trans_init(&trans, c, 0, 0);
while (1) {
- ret = bch2_get_btree_in_memory_pos(&trans,
- (1U << BTREE_ID_extents)|
- (1U << BTREE_ID_reflink),
- ~0,
- start, &end) ?:
- bch2_check_backpointers_to_extents_pass(&trans, start, end);
- if (ret || end.btree == BTREE_ID_NR)
+ ret = bch2_get_btree_in_memory_pos(&trans,
+ (1U << BTREE_ID_extents)|
+ (1U << BTREE_ID_reflink),
+ ~0,
+ start, &end);
+ if (ret)
+ break;
+
+ if (!bbpos_cmp(start, BBPOS_MIN) &&
+ bbpos_cmp(end, BBPOS_MAX))
+ bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
+ __func__, btree_nodes_fit_in_ram(c));
+
+ if (bbpos_cmp(start, BBPOS_MIN) ||
+ bbpos_cmp(end, BBPOS_MAX)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "check_backpointers_to_extents(): ");
+ bch2_bbpos_to_text(&buf, start);
+ prt_str(&buf, "-");
+ bch2_bbpos_to_text(&buf, end);
+
+ bch_verbose(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ ret = bch2_check_backpointers_to_extents_pass(&trans, start, end);
+ if (ret || !bbpos_cmp(end, BBPOS_MAX))
break;
start = bbpos_successor(end);