#include "alloc_foreground.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
+#include "btree_journal_iter.h"
#include "btree_key_cache.h"
#include "btree_locking.h"
#include "btree_update_interior.h"
#define DROP_THIS_NODE 10
#define DROP_PREV_NODE 11
+static bool should_restart_for_topology_repair(struct bch_fs *c)
+{
+ return c->opts.fix_errors != FSCK_FIX_no &&
+ !(c->recovery_passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology));
+}
+
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
preempt_disable();
"btree node with incorrect min_key at btree %s level %u:\n"
" prev %s\n"
" cur %s",
- bch2_btree_ids[b->c.btree_id], b->c.level,
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
buf1.buf, buf2.buf) &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
+ should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = -BCH_ERR_need_topology_repair;
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
"btree node with incorrect max_key at btree %s level %u:\n"
" %s\n"
" expected %s",
- bch2_btree_ids[b->c.btree_id], b->c.level,
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
buf1.buf, buf2.buf) &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
+ should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = -BCH_ERR_need_topology_repair;
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
"btree node overwritten by next node at btree %s level %u:\n"
" node %s\n"
" next %s",
- bch2_btree_ids[b->c.btree_id], b->c.level,
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
buf1.buf, buf2.buf)) {
ret = DROP_PREV_NODE;
goto out;
"btree node with incorrect max_key at btree %s level %u:\n"
" node %s\n"
" next %s",
- bch2_btree_ids[b->c.btree_id], b->c.level,
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
buf1.buf, buf2.buf))
ret = set_node_max(c, prev,
bpos_predecessor(cur->data->min_key));
"btree node overwritten by prev node at btree %s level %u:\n"
" prev %s\n"
" node %s",
- bch2_btree_ids[b->c.btree_id], b->c.level,
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
buf1.buf, buf2.buf)) {
ret = DROP_THIS_NODE;
goto out;
"btree node with incorrect min_key at btree %s level %u:\n"
" prev %s\n"
" node %s",
- bch2_btree_ids[b->c.btree_id], b->c.level,
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
buf1.buf, buf2.buf))
ret = set_node_min(c, cur, expected_start);
}
"btree node with incorrect max_key at btree %s level %u:\n"
" %s\n"
" expected %s",
- bch2_btree_ids[b->c.btree_id], b->c.level,
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
buf1.buf, buf2.buf)) {
ret = set_node_max(c, child, b->key.k.p);
if (ret)
if (mustfix_fsck_err_on(ret == -EIO, c,
"Topology repair: unreadable btree node at btree %s level %u:\n"
" %s",
- bch2_btree_ids[b->c.btree_id],
+ bch2_btree_id_str(b->c.btree_id),
b->c.level - 1,
buf.buf)) {
bch2_btree_node_evict(trans, cur_k.k);
if (mustfix_fsck_err_on(!have_child, c,
"empty interior btree node at btree %s level %u\n"
" %s",
- bch2_btree_ids[b->c.btree_id],
+ bch2_btree_id_str(b->c.btree_id),
b->c.level, buf.buf))
ret = DROP_THIS_NODE;
err:
return ret;
}
-static int bch2_repair_topology(struct bch_fs *c)
+int bch2_check_topology(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree *b;
unsigned i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
- for (i = 0; i < btree_id_nr_alive(c)&& !ret; i++) {
+ for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (!r->alive)
if (btree_node_fake(b))
continue;
- btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
- ret = bch2_btree_repair_topology_recurse(&trans, b);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ ret = bch2_btree_repair_topology_recurse(trans, b);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
}
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
struct bkey_s_c *k)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
- const union bch_extent_entry *entry;
+ struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(*k);
+ const union bch_extent_entry *entry_c;
struct extent_ptr_decoded p = { 0 };
bool do_update = false;
struct printbuf buf = PRINTBUF;
* XXX
* use check_bucket_ref here
*/
- bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
+ bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
if (!g->gen_valid &&
(c->opts.reconstruct_alloc ||
FSCK_NO_RATELIMIT,
"Unreadable btree node at btree %s level %u:\n"
" %s",
- bch2_btree_ids[b->c.btree_id],
+ bch2_btree_id_str(b->c.btree_id),
b->c.level - 1,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
- ret = -BCH_ERR_need_topology_repair;
+ should_restart_for_topology_repair(c)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
goto fsck_err;
} else {
/* Continue marking when opted to not
static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
enum btree_id ids[BTREE_ID_NR];
unsigned i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
if (initial)
- trans.is_initial_gc = true;
+ trans->is_initial_gc = true;
for (i = 0; i < BTREE_ID_NR; i++)
ids[i] = i;
for (i = 0; i < BTREE_ID_NR && !ret; i++)
ret = initial
- ? bch2_gc_btree_init(&trans, ids[i], metadata_only)
- : bch2_gc_btree(&trans, ids[i], initial, metadata_only);
+ ? bch2_gc_btree_init(trans, ids[i], metadata_only)
+ : bch2_gc_btree(trans, ids[i], initial, metadata_only);
for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) {
if (!bch2_btree_id_root(c, i)->alive)
continue;
ret = initial
- ? bch2_gc_btree_init(&trans, i, metadata_only)
- : bch2_gc_btree(&trans, i, initial, metadata_only);
+ ? bch2_gc_btree_init(trans, i, metadata_only)
+ : bch2_gc_btree(trans, i, initial, metadata_only);
}
if (ret < 0)
bch_err_fn(c, ret);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f))) \
dst->_f = src->_f
-#define copy_stripe_field(_f, _msg, ...) \
- if (dst->_f != src->_f && \
- (!verify || \
- fsck_err(c, "stripe %zu has wrong "_msg \
- ": got %u, should be %u", \
- iter.pos, ##__VA_ARGS__, \
- dst->_f, src->_f))) \
- dst->_f = src->_f
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
#define copy_fs_field(_f, _msg, ...) \
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
}
- };
+ }
{
unsigned nr = fs_usage_u64s(c);
static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
unsigned i;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
-
for_each_member_device(ca, c, i) {
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW,
- bch2_alloc_write_key(&trans, &iter, k, metadata_only));
+ bch2_alloc_write_key(trans, &iter, k, metadata_only));
if (ret < 0) {
- bch_err(c, "error writing alloc info: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
percpu_ref_put(&ca->ref);
break;
}
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret < 0 ? ret : 0;
}
static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
{
struct bch_dev *ca;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bucket *g;
if (!buckets) {
percpu_ref_put(&ca->ref);
bch_err(c, "error allocating ca->buckets[gc]");
- return -BCH_ERR_ENOMEM_gc_alloc_start;
+ ret = -BCH_ERR_ENOMEM_gc_alloc_start;
+ goto err;
}
buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets;
rcu_assign_pointer(ca->buckets_gc, buckets);
- };
-
- bch2_trans_init(&trans, c, 0, 0);
+ }
- for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = gc_bucket(ca, k.k->p.offset);
g->stripe_redundancy = a->stripe_redundancy;
}
}
- bch2_trans_iter_exit(&trans, &iter);
-
- bch2_trans_exit(&trans);
-
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ bch2_trans_put(trans);
if (ret)
- bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret));
-
+ bch_err_fn(c, ret);
return ret;
}
g->dirty_sectors = 0;
g->cached_sectors = 0;
}
- };
+ }
}
static int bch2_gc_write_reflink_key(struct btree_trans *trans,
static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
size_t idx = 0;
if (metadata_only)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
- ret = for_each_btree_key_commit(&trans, iter,
+ ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
- bch2_gc_write_reflink_key(&trans, &iter, k, &idx));
+ bch2_gc_write_reflink_key(trans, &iter, k, &idx));
c->reflink_gc_nr = 0;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
static int bch2_gc_reflink_start(struct bch_fs *c,
bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct reflink_gc *r;
if (metadata_only)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
c->reflink_gc_nr = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
r->size = k.k->size;
r->refcount = 0;
}
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
if (metadata_only)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
- ret = for_each_btree_key_commit(&trans, iter,
+ ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
- bch2_gc_write_stripes_key(&trans, &iter, k));
+ bch2_gc_write_stripes_key(trans, &iter, k));
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
/**
* bch2_gc - walk _all_ references to buckets, and recompute them:
*
+ * @c: filesystem object
+ * @initial: are we in recovery?
+ * @metadata_only: are we just checking metadata references, or everything?
+ *
+ * Returns: 0 on success, or standard errcode on failure
+ *
* Order matters here:
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node(),
bch2_mark_superblocks(c);
- if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
- (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
- !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags) &&
- c->opts.fix_errors != FSCK_OPT_NO)) {
- bch_info(c, "Starting topology repair pass");
- ret = bch2_repair_topology(c);
- if (ret)
- goto out;
- bch_info(c, "Topology repair pass done");
-
- set_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags);
- }
-
ret = bch2_gc_btrees(c, initial, metadata_only);
- if (ret == -BCH_ERR_need_topology_repair &&
- !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) &&
- !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
- set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
- SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, true);
- ret = 0;
- }
-
- if (ret == -BCH_ERR_need_topology_repair)
- ret = -BCH_ERR_fsck_errors_not_fixed;
-
if (ret)
goto out;
int bch2_gc_gens(struct bch_fs *c)
{
- struct btree_trans trans;
+ struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
trace_and_count(c, gc_gens_start, c);
down_read(&c->gc_lock);
- bch2_trans_init(&trans, c, 0, 0);
+ trans = bch2_trans_get(c);
for_each_member_device(ca, c, i) {
struct bucket_gens *gens;
for (i = 0; i < BTREE_ID_NR; i++)
if (btree_type_has_ptrs(i)) {
- struct btree_iter iter;
- struct bkey_s_c k;
-
c->gc_gens_btree = i;
c->gc_gens_pos = POS_MIN;
- ret = for_each_btree_key_commit(&trans, iter, i,
+
+ ret = for_each_btree_key_commit(trans, iter, i,
POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
k,
NULL, NULL,
BTREE_INSERT_NOFAIL,
- gc_btree_gens_key(&trans, &iter, k));
+ gc_btree_gens_key(trans, &iter, k));
if (ret && !bch2_err_matches(ret, EROFS))
- bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
if (ret)
goto err;
}
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN,
BTREE_ITER_PREFETCH,
k,
NULL, NULL,
BTREE_INSERT_NOFAIL,
- bch2_alloc_write_oldest_gen(&trans, &iter, k));
+ bch2_alloc_write_oldest_gen(trans, &iter, k));
if (ret && !bch2_err_matches(ret, EROFS))
- bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
if (ret)
goto err;
ca->oldest_gen = NULL;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
up_read(&c->gc_lock);
mutex_unlock(&c->gc_gens_lock);
return ret;
ret = bch2_gc_gens(c);
#endif
if (ret < 0)
- bch_err(c, "btree gc failed: %s", bch2_err_str(ret));
+ bch_err_fn(c, ret);
debug_check_no_locks_held();
}
p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
if (IS_ERR(p)) {
- bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p)));
+ bch_err_fn(c, PTR_ERR(p));
return PTR_ERR(p);
}