#include "alloc_foreground.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
+#include "btree_journal_iter.h"
#include "btree_key_cache.h"
#include "btree_locking.h"
#include "btree_update_interior.h"
#include "reflink.h"
#include "replicas.h"
#include "super-io.h"
+#include "trace.h"
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/preempt.h>
#include <linux/rcupdate.h>
#include <linux/sched/task.h>
-#include <trace/events/bcachefs.h>
#define DROP_THIS_NODE 10
#define DROP_PREV_NODE 11
+static bool should_restart_for_topology_repair(struct bch_fs *c)
+{
+ return c->opts.fix_errors != FSCK_FIX_no &&
+ !(c->recovery_passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology));
+}
+
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
preempt_disable();
" cur %s",
bch2_btree_ids[b->c.btree_id], b->c.level,
buf1.buf, buf2.buf) &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
+ should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = -BCH_ERR_need_topology_repair;
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
" expected %s",
bch2_btree_ids[b->c.btree_id], b->c.level,
buf1.buf, buf2.buf) &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
+ should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = -BCH_ERR_need_topology_repair;
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
if (!new)
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_gc_repair_key;
btree_ptr_to_v2(b, new);
b->data->min_key = new_min;
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
if (!new)
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_gc_repair_key;
btree_ptr_to_v2(b, new);
b->data->max_key = new_max;
}
if (ret) {
- bch_err(c, "%s: error getting btree node: %s",
- __func__, bch2_err_str(ret));
+ bch_err_msg(c, ret, "getting btree node");
break;
}
ret = PTR_ERR_OR_ZERO(cur);
if (ret) {
- bch_err(c, "%s: error getting btree node: %s",
- __func__, bch2_err_str(ret));
+ bch_err_msg(c, ret, "getting btree node");
goto err;
}
return ret;
}
-static int bch2_repair_topology(struct bch_fs *c)
+int bch2_check_topology(struct bch_fs *c)
{
struct btree_trans trans;
struct btree *b;
bch2_trans_init(&trans, c, 0, 0);
- for (i = 0; i < BTREE_ID_NR && !ret; i++) {
- b = c->btree_roots[i].b;
+ for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
+ struct btree_root *r = bch2_btree_id_root(c, i);
+
+ if (!r->alive)
+ continue;
+
+ b = r->b;
if (btree_node_fake(b))
continue;
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
- if (c->opts.reconstruct_alloc ||
- fsck_err_on(!g->gen_valid, c,
- "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
- "while marking %s",
- p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
- bch2_data_types[ptr_data_type(k->k, &p.ptr)],
- p.ptr.gen,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
+ if (!g->gen_valid &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
if (!p.ptr.cached) {
g->gen_valid = true;
g->gen = p.ptr.gen;
}
}
- if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, c,
- "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
- "while marking %s",
- p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
- bch2_data_types[ptr_data_type(k->k, &p.ptr)],
- p.ptr.gen, g->gen,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
+ if (gen_cmp(p.ptr.gen, g->gen) > 0 &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen, g->gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
if (!p.ptr.cached) {
g->gen_valid = true;
g->gen = p.ptr.gen;
}
}
- if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
- "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
- "while marking %s",
- p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
- bch2_data_types[ptr_data_type(k->k, &p.ptr)],
- p.ptr.gen,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
+ if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
do_update = true;
- if (fsck_err_on(!p.ptr.cached &&
- gen_cmp(p.ptr.gen, g->gen) < 0, c,
- "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
- "while marking %s",
- p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
- bch2_data_types[ptr_data_type(k->k, &p.ptr)],
- p.ptr.gen, g->gen,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
+ if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen, g->gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
do_update = true;
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
continue;
- if (fsck_err_on(g->data_type &&
- g->data_type != data_type, c,
+ if (fsck_err_on(bucket_data_type(g->data_type) &&
+ bucket_data_type(g->data_type) != data_type, c,
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
do_update = true;
- if (fsck_err_on(!bch2_ptr_matches_stripe_m(m, p), c,
+ if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c,
"pointer does not match stripe %llu\n"
"while marking %s",
(u64) p.ec.idx,
new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
if (!new) {
- bch_err(c, "%s: error allocating new key", __func__);
- ret = -ENOMEM;
+ bch_err_msg(c, ret, "allocating new key");
+ ret = -BCH_ERR_ENOMEM_gc_repair_key;
goto err;
}
if (level)
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
- if (c->opts.verbose) {
+ if (0) {
printbuf_reset(&buf);
bch2_bkey_val_to_text(&buf, c, *k);
bch_info(c, "updated %s", buf.buf);
}
ret = commit_do(trans, NULL, NULL, 0,
- bch2_mark_key(trans, old, *k, flags));
+ bch2_mark_key(trans, btree_id, level, old, *k, flags));
fsck_err:
err:
if (ret)
- bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
+ bch_err_fn(c, ret);
return ret;
}
return ret;
mutex_lock(&c->btree_root_lock);
- b = c->btree_roots[btree_id].b;
+ b = bch2_btree_id_root(c, btree_id)->b;
if (!btree_node_fake(b)) {
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
- ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1,
true, &k, initial);
}
gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
false, &k, true);
- if (ret) {
- bch_err(c, "%s: error from bch2_gc_mark_key: %s",
- __func__, bch2_err_str(ret));
+ if (ret)
goto fsck_err;
- }
if (b->c.level) {
bch2_bkey_buf_reassemble(&cur, c, k);
b->c.level - 1,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
- ret = -BCH_ERR_need_topology_repair;
+ should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto fsck_err;
} else {
/* Continue marking when opted to not
continue;
}
} else if (ret) {
- bch_err(c, "%s: error getting btree node: %s",
- __func__, bch2_err_str(ret));
+ bch_err_msg(c, ret, "getting btree node");
break;
}
struct printbuf buf = PRINTBUF;
int ret = 0;
- b = c->btree_roots[btree_id].b;
+ b = bch2_btree_id_root(c, btree_id)->b;
if (btree_node_fake(b))
return 0;
if (!ret) {
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
- ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, true,
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, true,
&k, true);
}
fsck_err:
six_unlock_read(&b->c.lock);
if (ret < 0)
- bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
+ bch_err_fn(c, ret);
printbuf_exit(&buf);
return ret;
}
? bch2_gc_btree_init(&trans, ids[i], metadata_only)
: bch2_gc_btree(&trans, ids[i], initial, metadata_only);
+ for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) {
+ if (!bch2_btree_id_root(c, i)->alive)
+ continue;
+
+ ret = initial
+ ? bch2_gc_btree_init(&trans, i, metadata_only)
+ : bch2_gc_btree(&trans, i, initial, metadata_only);
+ }
+
if (ret < 0)
- bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
+ bch_err_fn(c, ret);
bch2_trans_exit(&trans);
return ret;
for_each_member_device(ca, c, dev) {
struct bch_dev_usage *dst = ca->usage_base;
struct bch_dev_usage *src = (void *)
- bch2_acc_percpu_u64s((void *) ca->usage_gc,
+ bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc,
dev_usage_u64s());
copy_dev_field(buckets_ec, "buckets_ec");
unsigned nr = fs_usage_u64s(c);
struct bch_fs_usage *dst = c->usage_base;
struct bch_fs_usage *src = (void *)
- bch2_acc_percpu_u64s((void *) c->usage_gc, nr);
+ bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
copy_fs_field(hidden, "hidden");
copy_fs_field(btree, "btree");
if (ca)
percpu_ref_put(&ca->ref);
if (ret)
- bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
+ bch_err_fn(c, ret);
percpu_up_write(&c->mark_lock);
printbuf_exit(&buf);
sizeof(u64), GFP_KERNEL);
if (!c->usage_gc) {
bch_err(c, "error allocating c->usage_gc");
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_gc_start;
}
for_each_member_device(ca, c, i) {
if (!ca->usage_gc) {
bch_err(c, "error allocating ca->usage_gc");
percpu_ref_put(&ca->ref);
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_gc_start;
}
this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets,
if (gen_after(old->gen, gc.gen))
return 0;
+ if (c->opts.reconstruct_alloc ||
+ fsck_err_on(new.data_type != gc.data_type, c,
+ "bucket %llu:%llu gen %u has wrong data_type"
+ ": got %s, should be %s",
+ iter->pos.inode, iter->pos.offset,
+ gc.gen,
+ bch2_data_types[new.data_type],
+ bch2_data_types[gc.data_type]))
+ new.data_type = gc.data_type;
+
#define copy_bucket_field(_f) \
if (c->opts.reconstruct_alloc || \
fsck_err_on(new._f != gc._f, c, \
new._f = gc._f; \
copy_bucket_field(gen);
- copy_bucket_field(data_type);
copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors);
copy_bucket_field(stripe_redundancy);
if (!buckets) {
percpu_ref_put(&ca->ref);
bch_err(c, "error allocating ca->buckets[gc]");
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_gc_alloc_start;
}
buckets->first_bucket = ca->mi.first_bucket;
" should be %u",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf),
r->refcount)) {
- struct bkey_i *new = bch2_bkey_make_mut(trans, k);
+ struct bkey_i *new = bch2_bkey_make_mut(trans, iter, &k, 0);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
new->k.type = KEY_TYPE_deleted;
else
*bkey_refcount(new) = cpu_to_le64(r->refcount);
-
- ret = bch2_trans_update(trans, iter, new, 0);
}
fsck_err:
printbuf_exit(&buf);
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r) {
- ret = -ENOMEM;
+ ret = -BCH_ERR_ENOMEM_gc_reflink_start;
break;
}
struct printbuf buf = PRINTBUF;
const struct bch_stripe *s;
struct gc_stripe *m;
+ bool bad = false;
unsigned i;
int ret = 0;
s = bkey_s_c_to_stripe(k).v;
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
- for (i = 0; i < s->nr_blocks; i++)
- if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
- goto inconsistent;
- return 0;
-inconsistent:
- if (fsck_err_on(true, c,
- "stripe has wrong block sector count %u:\n"
- " %s\n"
- " should be %u", i,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf),
- m ? m->block_sectors[i] : 0)) {
+ for (i = 0; i < s->nr_blocks; i++) {
+ u32 old = stripe_blockcount_get(s, i);
+ u32 new = (m ? m->block_sectors[i] : 0);
+
+ if (old != new) {
+ prt_printf(&buf, "stripe block %u has wrong sector count: got %u, should be %u\n",
+ i, old, new);
+ bad = true;
+ }
+ }
+
+ if (bad)
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ if (fsck_err_on(bad, c, "%s", buf.buf)) {
struct bkey_i_stripe *new;
new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
bch2_mark_superblocks(c);
- if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
- !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags) &&
- c->opts.fix_errors != FSCK_OPT_NO) {
- bch_info(c, "Starting topology repair pass");
- ret = bch2_repair_topology(c);
- if (ret)
- goto out;
- bch_info(c, "Topology repair pass done");
-
- set_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags);
- }
-
ret = bch2_gc_btrees(c, initial, metadata_only);
- if (ret == -BCH_ERR_need_topology_repair &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) &&
- !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
- set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
- SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, true);
- ret = 0;
- }
-
- if (ret == -BCH_ERR_need_topology_repair)
- ret = -BCH_ERR_fsck_errors_not_fixed;
-
if (ret)
goto out;
* allocator thread - issue wakeup in case they blocked on gc_lock:
*/
closure_wake_up(&c->freelist_wait);
+
+ if (ret)
+ bch_err_fn(c, ret);
return ret;
}
percpu_up_read(&c->mark_lock);
return 0;
update:
- u = bch2_bkey_make_mut(trans, k);
+ u = bch2_bkey_make_mut(trans, iter, &k, 0);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
return ret;
bch2_extent_normalize(c, bkey_i_to_s(u));
- return bch2_trans_update(trans, iter, u, 0);
+ return 0;
}
static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter,
ca->oldest_gen = kvmalloc(ca->mi.nbuckets, GFP_KERNEL);
if (!ca->oldest_gen) {
percpu_ref_put(&ca->ref);
- ret = -ENOMEM;
+ ret = -BCH_ERR_ENOMEM_gc_gens;
goto err;
}