-ba398d29060ecc2e2c9d6292a94ddc181761de1a
+a0d7001b0f35580ec941acc553cf5fe28d6efea9
opt_set(opts, sb, sb_offset);
opt_set(opts, nostart, true);
opt_set(opts, noexcl, true);
+ opt_set(opts, buckets_nouse, true);
c = bch2_fs_open(path, 1, opts);
if (IS_ERR(c))
TP_STRUCT__entry(
__array(char, trans_fn, 24 )
- __array(char, caller, 32 )
+ __field(unsigned long, caller_ip )
__field(u8, btree_id )
__field(u64, pos_inode )
__field(u64, pos_offset )
TP_fast_assign(
strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
- snprintf(__entry->caller, sizeof(__entry->caller), "%pS", (void *) caller_ip);
+ __entry->caller_ip = caller_ip;
__entry->btree_id = btree_id;
__entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset;
__entry->node_lock_seq = node_lock_seq;
),
- TP_printk("%s %s btree %u pos %llu:%llu:%u, node %lu iter seq %u lock seq %u",
+ TP_printk("%s %pS btree %u pos %llu:%llu:%u, node %lu iter seq %u lock seq %u",
__entry->trans_fn,
- __entry->caller,
+ (void *) __entry->caller_ip,
__entry->btree_id,
__entry->pos_inode,
__entry->pos_offset,
TP_STRUCT__entry(
__array(char, trans_fn, 24 )
- __array(char, caller, 32 )
+ __field(unsigned long, caller_ip )
__field(u8, btree_id )
__field(u64, pos_inode )
__field(u64, pos_offset )
TP_fast_assign(
strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
- snprintf(__entry->caller, sizeof(__entry->caller), "%pS", (void *) caller_ip);
+ __entry->caller_ip = caller_ip;
__entry->btree_id = btree_id;
__entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset;
__entry->pos_snapshot = pos->snapshot;
),
- TP_printk("%s %s btree %u pos %llu:%llu:%u",
+ TP_printk("%s %pS btree %u pos %llu:%llu:%u",
__entry->trans_fn,
- __entry->caller,
+ (void *) __entry->caller_ip,
__entry->btree_id,
__entry->pos_inode,
__entry->pos_offset,
}
}
-static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
-{
-#ifdef CONFIG_BCACHEFS_DEBUG
- struct open_bucket *ob;
- unsigned i;
-
- rcu_read_lock();
- open_bucket_for_each(c, obs, ob, i) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-
- BUG_ON(*bucket_gen(ca, ob->bucket) != ob->gen);
- }
- rcu_read_unlock();
-#endif
-}
-
/* _only_ for allocating the journal on a new device: */
long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
{
BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
- verify_not_stale(c, &wp->ptrs);
-
return wp;
err:
open_bucket_for_each(c, &wp->ptrs, ob, i)
"significantly affect performance") \
BCH_DEBUG_PARAM(debug_check_iterators, \
"Enables extra verification for btree iterators") \
- BCH_DEBUG_PARAM(debug_check_bkeys, \
- "Run bkey_debugcheck (primarily checking GC/allocation "\
- "information) when iterating over keys") \
BCH_DEBUG_PARAM(debug_check_btree_accounting, \
"Verify btree accounting for keys within a node") \
BCH_DEBUG_PARAM(journal_seq_verify, \
* it's not while a gc is in progress.
*/
struct rw_semaphore gc_lock;
+ struct mutex gc_gens_lock;
/* IO PATH */
struct semaphore io_in_flight;
return NULL;
}
-void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
-{
- const char *invalid;
-
- BUG_ON(!k.k->u64s);
-
- invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
- bch2_bkey_in_btree_node(b, k);
- if (invalid) {
- char buf[160];
-
- bch2_bkey_val_to_text(&PBUF(buf), c, k);
- bch2_fs_inconsistent(c, "invalid bkey %s: %s", buf, invalid);
- }
-}
-
void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
{
if (!bpos_cmp(pos, POS_MIN))
enum btree_node_type);
const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
-void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
-
void bch2_bpos_to_text(struct printbuf *, struct bpos);
void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
void bch2_val_to_text(struct printbuf *, struct bch_fs *,
static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
unsigned level, bool is_root,
struct bkey_s_c *k,
- u8 *max_stale, bool initial)
+ bool initial)
{
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs;
- const struct bch_extent_ptr *ptr;
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
unsigned flags =
atomic64_set(&c->key_version, k->k->version.lo);
}
- ptrs = bch2_bkey_ptrs_c(*k);
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = PTR_GC_BUCKET(ca, ptr);
-
- if (gen_after(g->oldest_gen, ptr->gen))
- g->oldest_gen = ptr->gen;
-
- *max_stale = max(*max_stale, ptr_stale(ca, ptr));
- }
-
ret = bch2_mark_key(trans, old, *k, flags);
fsck_err:
err:
return ret;
}
-static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, u8 *max_stale,
- bool initial)
+static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
{
struct bch_fs *c = trans->c;
struct btree_node_iter iter;
struct bkey_buf prev, cur;
int ret = 0;
- *max_stale = 0;
-
if (!btree_node_type_needs_gc(btree_node_type(b)))
return 0;
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
- &k, max_stale, initial);
+ &k, initial);
if (ret)
break;
: bch2_expensive_debug_checks ? 0
: !btree_node_type_needs_gc(btree_id) ? 1
: 0;
- u8 max_stale = 0;
int ret = 0;
gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
gc_pos_set(c, gc_pos_btree_node(b));
- ret = btree_gc_mark_node(trans, b, &max_stale, initial);
+ ret = btree_gc_mark_node(trans, b, initial);
if (ret)
break;
-
- if (!initial) {
- if (max_stale > 64)
- bch2_btree_node_rewrite(trans, &iter, b,
- BTREE_INSERT_NOWAIT|
- BTREE_INSERT_GC_LOCK_HELD);
- else if (!bch2_btree_gc_rewrite_disabled &&
- (bch2_btree_gc_always_rewrite || max_stale > 16))
- bch2_btree_node_rewrite(trans, &iter,
- b, BTREE_INSERT_NOWAIT|
- BTREE_INSERT_GC_LOCK_HELD);
- }
}
bch2_trans_iter_exit(trans, &iter);
if (!btree_node_fake(b)) {
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
- ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, true,
- &k, &max_stale, initial);
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
+ true, &k, initial);
}
gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
mutex_unlock(&c->btree_root_lock);
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf cur, prev;
- u8 max_stale = 0;
char buf[200];
int ret = 0;
BUG_ON(bpos_cmp(k.k->p, b->data->min_key) < 0);
BUG_ON(bpos_cmp(k.k->p, b->data->max_key) > 0);
- ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
- &k, &max_stale, true);
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
+ false, &k, true);
if (ret) {
bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret);
goto fsck_err;
: bch2_expensive_debug_checks ? 0
: !btree_node_type_needs_gc(btree_id) ? 1
: 0;
- u8 max_stale = 0;
char buf[100];
int ret = 0;
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, true,
- &k, &max_stale, true);
+ &k, true);
}
fsck_err:
six_unlock_read(&b->c.lock);
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen,
- .oldest_gen = g->oldest_gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
gc_u.data_type != BCH_DATA_btree)
return 0;
- if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
- gen_after(old_u.gen, gc_u.gen))
+ if (gen_after(old_u.gen, gc_u.gen))
return 0;
#define copy_bucket_field(_f) \
copy_bucket_field(stripe);
#undef copy_bucket_field
- new_u.oldest_gen = gc_u.oldest_gen;
-
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
return 0;
* introduces a deadlock in the RO path - we currently take the state
* lock at the start of going RO, thus the gc thread may get stuck:
*/
+ if (!mutex_trylock(&c->gc_gens_lock))
+ return 0;
+
down_read(&c->gc_lock);
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_exit(&trans);
up_read(&c->gc_lock);
+ mutex_unlock(&c->gc_gens_lock);
return ret;
}
struct bpos r_pos,
unsigned r_level)
{
+ /*
+ * Must match lock ordering as defined by __bch2_btree_node_lock:
+ */
return cmp_int(l->btree_id, r_btree_id) ?:
cmp_int((int) l->cached, (int) r_cached) ?:
bpos_cmp(l->pos, r_pos) ?:
else
this_cpu_sub(*b->c.lock.readers, readers);
- btree_node_lock_type(trans->c, b, SIX_LOCK_write);
+ six_lock_write(&b->c.lock, NULL, NULL);
if (!b->c.lock.readers)
atomic64_add(__SIX_VAL(read_lock, readers),
six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{
- struct btree_path *linked, *deadlock_path = NULL;
- u64 start_time = local_clock();
- unsigned reason = 9;
- bool ret;
+ struct btree_path *linked;
+ unsigned reason;
/* Check if it's safe to block: */
trans_for_each_path(trans, linked) {
*/
if (type == SIX_LOCK_intent &&
linked->nodes_locked != linked->nodes_intent_locked) {
- deadlock_path = linked;
reason = 1;
+ goto deadlock;
}
if (linked->btree_id != path->btree_id) {
- if (linked->btree_id > path->btree_id) {
- deadlock_path = linked;
- reason = 3;
- }
- continue;
+ if (linked->btree_id < path->btree_id)
+ continue;
+
+ reason = 3;
+ goto deadlock;
}
/*
- * Within the same btree, cached paths come before non
- * cached paths:
+ * Within the same btree, non-cached paths come before cached
+ * paths:
*/
if (linked->cached != path->cached) {
- if (path->cached) {
- deadlock_path = linked;
- reason = 4;
- }
- continue;
+ if (!linked->cached)
+ continue;
+
+ reason = 4;
+ goto deadlock;
}
/*
* we're about to lock, it must have the ancestors locked too:
*/
if (level > __fls(linked->nodes_locked)) {
- deadlock_path = linked;
reason = 5;
+ goto deadlock;
}
/* Must lock btree nodes in key order: */
if (btree_node_locked(linked, level) &&
bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
linked->cached)) <= 0) {
- deadlock_path = linked;
- reason = 7;
BUG_ON(trans->in_traverse_all);
+ reason = 7;
+ goto deadlock;
}
}
- if (unlikely(deadlock_path)) {
- trace_trans_restart_would_deadlock(trans->fn, ip,
- trans->in_traverse_all, reason,
- deadlock_path->btree_id,
- deadlock_path->cached,
- &deadlock_path->pos,
- path->btree_id,
- path->cached,
- &pos);
- btree_trans_restart(trans);
- return false;
- }
-
- if (six_trylock_type(&b->c.lock, type))
- return true;
-
- trans->locking_path_idx = path->idx;
- trans->locking_pos = pos;
- trans->locking_btree_id = path->btree_id;
- trans->locking_level = level;
- trans->locking = b;
-
- ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
-
- trans->locking = NULL;
-
- if (ret)
- bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
- start_time);
- return ret;
+ return btree_node_lock_type(trans, path, b, pos, level,
+ type, should_sleep_fn, p);
+deadlock:
+ trace_trans_restart_would_deadlock(trans->fn, ip,
+ trans->in_traverse_all, reason,
+ linked->btree_id,
+ linked->cached,
+ &linked->pos,
+ path->btree_id,
+ path->cached,
+ &pos);
+ btree_trans_restart(trans);
+ return false;
}
/* Btree iterator locking: */
struct bkey *u,
struct bkey_packed *k)
{
- struct bkey_s_c ret;
-
if (unlikely(!k)) {
/*
* signal to bch2_btree_iter_peek_slot() that we're currently at
return bkey_s_c_null;
}
- ret = bkey_disassemble(l->b, k, u);
-
- /*
- * XXX: bch2_btree_bset_insert_key() generates invalid keys when we
- * overwrite extents - it sets k->type = KEY_TYPE_deleted on the key
- * being overwritten but doesn't change k->size. But this is ok, because
- * those keys are never written out, we just have to avoid a spurious
- * assertion here:
- */
- if (bch2_debug_check_bkeys && !bkey_deleted(ret.k))
- bch2_bkey_debugcheck(c, l->b, ret);
-
- return ret;
+ return bkey_disassemble(l->b, k, u);
}
static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
while (i < trans->nr_sorted) {
path = trans->paths + trans->sorted[i];
- EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
-
- ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
- if (ret)
- goto retry_all;
-
- EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
-
- if (path->nodes_locked ||
- !btree_path_node(path, path->level))
+ /*
+ * Traversing a path can cause another path to be added at about
+ * the same position:
+ */
+ if (path->uptodate) {
+ ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
+ if (ret)
+ goto retry_all;
+ } else {
i++;
+ }
}
/*
const char *fn)
__acquires(&c->btree_trans_barrier)
{
+ BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+
memset(trans, 0, sizeof(*trans));
trans->c = c;
trans->fn = fn;
struct btree_trans *trans;
struct btree_path *path;
struct btree *b;
+ static char lock_types[] = { 'r', 'i', 'w' };
unsigned l;
mutex_lock(&c->btree_trans_lock);
b = READ_ONCE(trans->locking);
if (b) {
path = &trans->paths[trans->locking_path_idx];
- pr_buf(out, " locking path %u %c l=%u %s:",
+ pr_buf(out, " locking path %u %c l=%u %c %s:",
trans->locking_path_idx,
path->cached ? 'c' : 'b',
trans->locking_level,
+ lock_types[trans->locking_lock_type],
bch2_btree_ids[trans->locking_btree_id]);
bch2_bpos_to_text(out, trans->locking_pos);
if (!trans->restarted)
goto retry;
- trace_transaction_restart_ip(trans->fn, _THIS_IP_);
ret = -EINTR;
goto err;
}
}
}
-/*
- * wrapper around six locks that just traces lock contended time
- */
-static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b,
- enum six_lock_type type)
-{
- u64 start_time = local_clock();
-
- six_lock_type(&b->c.lock, type, NULL, NULL);
- bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
-}
-
-static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
- enum six_lock_type type)
-{
- if (!six_trylock_type(&b->c.lock, type))
- __btree_node_lock_type(c, b, type);
+static inline bool btree_node_lock_type(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct bpos pos, unsigned level,
+ enum six_lock_type type,
+ six_lock_should_sleep_fn should_sleep_fn, void *p)
+{
+ struct bch_fs *c = trans->c;
+ u64 start_time;
+ bool ret;
+
+ if (six_trylock_type(&b->c.lock, type))
+ return true;
+
+ start_time = local_clock();
+
+ trans->locking_path_idx = path->idx;
+ trans->locking_pos = pos;
+ trans->locking_btree_id = path->btree_id;
+ trans->locking_level = level;
+ trans->locking_lock_type = type;
+ trans->locking = b;
+ ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
+ trans->locking = NULL;
+
+ if (ret)
+ bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
+
+ return ret;
}
/*
struct bpos locking_pos;
u8 locking_btree_id;
u8 locking_level;
+ u8 locking_lock_type;
pid_t pid;
int srcu_idx;
* we're in journal error state:
*/
- btree_node_lock_type(c, b, SIX_LOCK_intent);
- btree_node_lock_type(c, b, SIX_LOCK_write);
+ six_lock_intent(&b->c.lock, NULL, NULL);
+ six_lock_write(&b->c.lock, NULL, NULL);
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
- btree_node_lock_type(c, b, SIX_LOCK_read);
+ six_lock_read(&b->c.lock, NULL, NULL);
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
- btree_node_lock_type(c, b, SIX_LOCK_read);
+ six_lock_read(&b->c.lock, NULL, NULL);
bch2_btree_node_write_cond(c, b,
(btree_current_write(b) == w && w->journal.seq == seq));
six_unlock_read(&b->c.lock);
if (have_conflicting_read_lock(trans, i->path))
goto fail;
- __btree_node_lock_type(trans->c, insert_l(i)->b,
- SIX_LOCK_write);
+ btree_node_lock_type(trans, i->path,
+ insert_l(i)->b,
+ i->path->pos, i->level,
+ SIX_LOCK_write, NULL, NULL);
}
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
struct bkey_s_c k,
const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type,
- u8 bucket_gen, u8 bucket_data_type,
+ u8 b_gen, u8 bucket_data_type,
u16 dirty_sectors, u16 cached_sectors)
{
- size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr);
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
u16 bucket_sectors = !ptr->cached
? dirty_sectors
: cached_sectors;
char buf[200];
- if (gen_after(ptr->gen, bucket_gen)) {
+ if (gen_after(ptr->gen, b_gen)) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
"while marking %s",
- ptr->dev, bucket_nr, bucket_gen,
+ ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
ptr->gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
- if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
+ if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
"while marking %s",
- ptr->dev, bucket_nr, bucket_gen,
+ ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
ptr->gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
- if (bucket_gen != ptr->gen && !ptr->cached) {
+ if (b_gen != ptr->gen && !ptr->cached) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
"while marking %s",
- ptr->dev, bucket_nr, bucket_gen,
+ ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
ptr->gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
- if (bucket_gen != ptr->gen)
+ if (b_gen != ptr->gen)
return 1;
if (bucket_data_type && ptr_data_type &&
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s",
- ptr->dev, bucket_nr, bucket_gen,
+ ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type],
bch2_data_types[ptr_data_type],
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
if ((unsigned) (bucket_sectors + sectors) > U16_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
- "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
+ "bucket %u:%zu gen %u (mem gen %u) data type %s sector count overflow: %u + %lli > U16_MAX\n"
"while marking %s",
- ptr->dev, bucket_nr, bucket_gen,
+ ptr->dev, bucket_nr, b_gen,
+ *bucket_gen(ca, bucket_nr),
bch2_data_types[bucket_data_type ?: ptr_data_type],
bucket_sectors, sectors,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
GFP_KERNEL|__GFP_ZERO)) ||
!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
GFP_KERNEL|__GFP_ZERO)) ||
- !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
+ (c->opts.buckets_nouse &&
+ !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
- GFP_KERNEL|__GFP_ZERO)) ||
+ GFP_KERNEL|__GFP_ZERO))) ||
!init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
memcpy(bucket_gens->b,
old_bucket_gens->b,
n);
- memcpy(buckets_nouse,
- ca->buckets_nouse,
- BITS_TO_LONGS(n) * sizeof(unsigned long));
+ if (buckets_nouse)
+ memcpy(buckets_nouse,
+ ca->buckets_nouse,
+ BITS_TO_LONGS(n) * sizeof(unsigned long));
}
rcu_assign_pointer(ca->buckets[0], buckets);
sectors = min(sectors, k.k->size - offset_into_extent);
- bch2_trans_unlock(trans);
-
if (readpages_iter)
readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
extent_partial_reads_expensive(k));
if (i->inode.bi_nlink == i->count)
continue;
- count2 = lockrestart_do(trans,
- bch2_count_subdirs(trans, w->cur_inum, i->snapshot));
+ count2 = bch2_count_subdirs(trans, w->cur_inum, i->snapshot);
+ if (count2 < 0)
+ return count2;
if (i->count != count2) {
bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
return ret;
}
+static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bch_extent_ptr ptr)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
+ struct btree_iter iter;
+ char buf[200];
+ int ret;
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
+ bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf);
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)),
+ BTREE_ITER_CACHED);
+
+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ if (ret)
+ return;
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
+ bch_err(c, "%s", buf);
+ bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
+ bch2_trans_iter_exit(trans, &iter);
+}
+
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
- struct bch_dev *ca;
+ struct bch_dev *ca = NULL;
struct promote_op *promote = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos data_pos = bkey_start_pos(k.k);
zero_fill_bio_iter(&orig->bio, iter);
goto out_read_done;
}
-
+retry_pick:
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
goto err;
}
- if (pick_ret > 0)
- ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+ if (!pick.ptr.cached &&
+ unlikely(ptr_stale(ca, &pick.ptr))) {
+ read_from_stale_dirty_pointer(trans, k, pick.ptr);
+ bch2_mark_io_failure(failed, &pick);
+ goto retry_pick;
+ }
+
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ bch2_trans_unlock(trans);
if (flags & BCH_READ_NODECODE) {
/*
*/
sectors = min(sectors, k.k->size - offset_into_extent);
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(&trans);
-
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
struct jset_entry *entry)
{
struct bkey_i *k;
+ bool first = true;
- pr_buf(out, "btree=%s l=%u ", bch2_btree_ids[entry->btree_id], entry->level);
-
- vstruct_for_each(entry, k)
+ vstruct_for_each(entry, k) {
+ if (!first) {
+ printbuf_newline(out);
+ pr_buf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ }
+ pr_buf(out, "btree=%s l=%u ", bch2_btree_ids[entry->btree_id], entry->level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
+ first = false;
+ }
}
static int journal_entry_btree_root_validate(struct bch_fs *c,
closure_put(&ctxt->cl);
}
-static void do_pending_writes(struct moving_context *ctxt)
+static void do_pending_writes(struct moving_context *ctxt, struct btree_trans *trans)
{
struct moving_io *io;
+ if (trans)
+ bch2_trans_unlock(trans);
+
while ((io = next_pending_write(ctxt))) {
list_del(&io->list);
closure_call(&io->cl, move_write, NULL, &ctxt->cl);
}
}
-#define move_ctxt_wait_event(_ctxt, _cond) \
+#define move_ctxt_wait_event(_ctxt, _trans, _cond) \
do { \
- do_pending_writes(_ctxt); \
+ do_pending_writes(_ctxt, _trans); \
\
if (_cond) \
break; \
next_pending_write(_ctxt) || (_cond)); \
} while (1)
-static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
+static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
+ struct btree_trans *trans)
{
unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
- move_ctxt_wait_event(ctxt,
+ move_ctxt_wait_event(ctxt, trans,
!atomic_read(&ctxt->write_sectors) ||
atomic_read(&ctxt->write_sectors) != sectors_pending);
}
unsigned sectors = k.k->size, pages;
int ret = -ENOMEM;
- move_ctxt_wait_event(ctxt,
- atomic_read(&ctxt->write_sectors) <
- SECTORS_IN_FLIGHT_PER_DEVICE);
-
- move_ctxt_wait_event(ctxt,
- atomic_read(&ctxt->read_sectors) <
- SECTORS_IN_FLIGHT_PER_DEVICE);
-
/* write path might have to decompress data: */
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
schedule_timeout(delay);
if (unlikely(freezing(current))) {
- bch2_trans_unlock(&trans);
- move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+ move_ctxt_wait_event(ctxt, &trans, list_empty(&ctxt->reads));
try_to_freeze();
}
} while (delay);
+ move_ctxt_wait_event(ctxt, &trans,
+ atomic_read(&ctxt->write_sectors) <
+ SECTORS_IN_FLIGHT_PER_DEVICE);
+
+ move_ctxt_wait_event(ctxt, &trans,
+ atomic_read(&ctxt->read_sectors) <
+ SECTORS_IN_FLIGHT_PER_DEVICE);
+
bch2_trans_begin(&trans);
k = bch2_btree_iter_peek(&iter);
BUG();
}
- /* unlock before doing IO: */
+ /*
+ * The iterator gets unlocked by __bch2_read_extent - need to
+ * save a copy of @k elsewhere:
+ */
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
- bch2_move_ctxt_wait_for_io(ctxt);
+ bch2_move_ctxt_wait_for_io(ctxt, &trans);
continue;
}
}
- move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
+ move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl);
EBUG_ON(atomic_read(&ctxt.write_sectors));
NO_SB_OPT, false, \
NULL, "Set superblock to latest version,\n" \
"allowing any new features to be used") \
+ x(buckets_nouse, u8, \
+ 0, \
+ OPT_BOOL(), \
+ NO_SB_OPT, false, \
+ NULL, "Allocate the buckets_nouse bitmap") \
x(project, u8, \
OPT_INODE, \
OPT_BOOL(), \
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bool metadata_only = c->opts.norecovery;
- bch_info(c, "starting mark and sweep");
+ bch_info(c, "checking allocations");
err = "error in mark and sweep";
ret = bch2_gc(c, true, metadata_only);
if (ret)
goto err;
- bch_verbose(c, "mark and sweep done");
+ bch_verbose(c, "done checking allocations");
}
bch2_stripes_heap_start(c);
INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
init_rwsem(&c->gc_lock);
+ mutex_init(&c->gc_gens_lock);
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
}
/* return with ref on ca->ref: */
-struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
{
struct bch_dev *ca;
- dev_t dev;
unsigned i;
- int ret;
-
- ret = lookup_bdev(path, &dev);
- if (ret)
- return ERR_PTR(ret);
rcu_read_lock();
for_each_member_device_rcu(ca, c, i, NULL)
- if (ca->dev == dev)
+ if (!strcmp(name, ca->name))
goto found;
ca = ERR_PTR(-ENOENT);
found:
struct printbuf {
char *pos;
char *end;
+ unsigned indent;
};
static inline size_t printbuf_remaining(struct printbuf *buf)
__VA_ARGS__); \
} while (0)
+static inline void printbuf_indent_push(struct printbuf *buf, unsigned spaces)
+{
+ buf->indent += spaces;
+ while (spaces--)
+ pr_buf(buf, " ");
+}
+
+static inline void printbuf_indent_pop(struct printbuf *buf, unsigned spaces)
+{
+ buf->indent -= spaces;
+}
+
+static inline void printbuf_newline(struct printbuf *buf)
+{
+ unsigned i;
+
+ pr_buf(buf, "\n");
+ for (i = 0; i < buf->indent; i++)
+ pr_buf(buf, " ");
+}
+
void bch_scnmemcpy(struct printbuf *, const char *, size_t);
int bch2_strtoint_h(const char *, int *);