-31718a290491ef933e0bfc5fb666a197b08a4d10
+04036b491089aeb4bac5d796ae1716d019564f7a
__entry->bytes)
);
+DEFINE_EVENT(transaction_restart_iter, trans_restart_key_cache_key_realloced,
+ TP_PROTO(const char *trans_fn,
+ unsigned long caller_ip,
+ enum btree_id btree_id,
+ struct bpos *pos),
+ TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+);
+
#endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */
BCH_FS_NEED_ANOTHER_GC,
BCH_FS_DELETED_NODES,
BCH_FS_REBUILD_REPLICAS,
- BCH_FS_HOLD_BTREE_WRITES,
};
struct btree_debug {
unsigned id;
- struct dentry *btree;
- struct dentry *btree_format;
- struct dentry *failed;
};
struct bch_fs_pcpu {
struct bch_memquota_type quotas[QTYP_NR];
/* DEBUG JUNK */
- struct dentry *debug;
+ struct dentry *fs_debug_dir;
+ struct dentry *btree_debug_dir;
struct btree_debug btree_debug[BTREE_ID_NR];
struct btree *verify_data;
struct btree_node *verify_ondisk;
struct lock_class_key bch2_btree_node_lock_key;
+const char * const bch2_btree_node_flags[] = {
+#define x(f) #f,
+ BTREE_FLAGS()
+#undef x
+ NULL
+};
+
void bch2_recalc_btree_reserve(struct bch_fs *c)
{
unsigned i, reserve = 16;
goto wait_on_io;
}
- if (btree_node_noevict(b))
- goto out_unlock;
-
- if (!btree_node_may_write(b))
+ if (btree_node_noevict(b) ||
+ btree_node_write_blocked(b) ||
+ btree_node_will_make_reachable(b))
goto out_unlock;
if (btree_node_dirty(b)) {
- if (!flush ||
- test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
+ if (!flush)
goto out_unlock;
/*
* Using the underscore version because we don't want to compact
* the post write cleanup:
*/
if (bch2_verify_btree_ondisk)
- bch2_btree_node_write(c, b, SIX_LOCK_intent);
+ bch2_btree_node_write(c, b, SIX_LOCK_intent, 0);
else
- __bch2_btree_node_write(c, b, false);
+ __bch2_btree_node_write(c, b, 0);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
if (btree_node_dirty(b))
bch2_btree_complete_write(c, b, btree_current_write(b));
- clear_btree_node_dirty(c, b);
+ clear_btree_node_dirty_acct(c, b);
btree_node_data_free(c, b);
}
six_lock_write(&b->c.lock, NULL, NULL);
if (btree_node_dirty(b)) {
- __bch2_btree_node_write(c, b, false);
+ __bch2_btree_node_write(c, b, 0);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
goto wait_on_io;
extern struct lock_class_key bch2_btree_node_lock_key;
+extern const char * const bch2_btree_node_flags[];
+
struct btree_iter;
void bch2_recalc_btree_reserve(struct bch_fs *);
bch2_trans_init(&trans, c, 0, 0);
+ if (initial)
+ trans.is_initial_gc = true;
+
for (i = 0; i < BTREE_ID_NR; i++)
ids[i] = i;
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
};
if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
- bch2_btree_node_write(c, b, SIX_LOCK_write);
+ bch2_btree_node_write(c, b, SIX_LOCK_write, 0);
reinit_iter = true;
}
}
bch2_journal_pin_drop(&c->journal, &w->journal);
}
-static void btree_node_write_done(struct bch_fs *c, struct btree *b)
+static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
{
struct btree_write *w = btree_prev_write(b);
unsigned long old, new, v;
bch2_btree_complete_write(c, b, w);
- v = READ_ONCE(b->flags);
- do {
- old = new = v;
-
- if (old & (1U << BTREE_NODE_need_write))
- goto do_write;
-
- new &= ~(1U << BTREE_NODE_write_in_flight);
- new &= ~(1U << BTREE_NODE_write_in_flight_inner);
- } while ((v = cmpxchg(&b->flags, old, new)) != old);
-
- wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
- return;
-
-do_write:
- six_lock_read(&b->c.lock, NULL, NULL);
v = READ_ONCE(b->flags);
do {
old = new = v;
if ((old & (1U << BTREE_NODE_dirty)) &&
(old & (1U << BTREE_NODE_need_write)) &&
!(old & (1U << BTREE_NODE_never_write)) &&
- btree_node_may_write(b)) {
+ !(old & (1U << BTREE_NODE_write_blocked)) &&
+ !(old & (1U << BTREE_NODE_will_make_reachable))) {
new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight);
} while ((v = cmpxchg(&b->flags, old, new)) != old);
if (new & (1U << BTREE_NODE_write_in_flight))
- __bch2_btree_node_write(c, b, true);
+ __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
+}
+static void btree_node_write_done(struct bch_fs *c, struct btree *b)
+{
+ six_lock_read(&b->c.lock, NULL, NULL);
+ __btree_node_write_done(c, b);
six_unlock_read(&b->c.lock);
}
bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &tmp.k);
}
-void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_started)
+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
{
struct btree_write_bio *wbio;
struct bset_tree *t;
void *data;
int ret;
- if (already_started)
+ if (flags & BTREE_WRITE_ALREADY_STARTED)
goto do_write;
- if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
- return;
-
/*
* We may only have a read lock on the btree node - the dirty bit is our
* "lock" against racing with other threads that may be trying to start
if (!(old & (1 << BTREE_NODE_dirty)))
return;
- if (!btree_node_may_write(b))
+ if ((flags & BTREE_WRITE_ONLY_IF_NEED) &&
+ !(old & (1 << BTREE_NODE_need_write)))
return;
- if (old & (1 << BTREE_NODE_never_write))
+ if (old &
+ ((1 << BTREE_NODE_never_write)|
+ (1 << BTREE_NODE_write_blocked)))
return;
- BUG_ON(old & (1 << BTREE_NODE_write_in_flight));
+ if (b->written &&
+ (old & (1 << BTREE_NODE_will_make_reachable)))
+ return;
+
+ if (old & (1 << BTREE_NODE_write_in_flight))
+ return;
new &= ~(1 << BTREE_NODE_dirty);
new &= ~(1 << BTREE_NODE_need_write);
b->written += sectors_to_write;
nowrite:
btree_bounce_free(c, bytes, used_mempool, data);
- btree_node_write_done(c, b);
+ __btree_node_write_done(c, b);
}
/*
* Use this one if the node is intent locked:
*/
void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
- enum six_lock_type lock_type_held)
+ enum six_lock_type lock_type_held,
+ unsigned flags)
{
if (lock_type_held == SIX_LOCK_intent ||
(lock_type_held == SIX_LOCK_read &&
six_lock_tryupgrade(&b->c.lock))) {
- __bch2_btree_node_write(c, b, false);
+ __bch2_btree_node_write(c, b, flags);
/* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) &&
if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
} else {
- __bch2_btree_node_write(c, b, false);
+ __bch2_btree_node_write(c, b, flags);
if (lock_type_held == SIX_LOCK_write &&
btree_node_just_written(b))
bch2_btree_post_write_cleanup(c, b);
{
__bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
}
-
-void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c)
-{
- struct bucket_table *tbl;
- struct rhash_head *pos;
- struct btree *b;
- unsigned i;
-
- rcu_read_lock();
- for_each_cached_btree(b, c, tbl, i, pos) {
- unsigned long flags = READ_ONCE(b->flags);
-
- if (!(flags & (1 << BTREE_NODE_dirty)))
- continue;
-
- pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
- b,
- (flags & (1 << BTREE_NODE_dirty)) != 0,
- (flags & (1 << BTREE_NODE_need_write)) != 0,
- b->c.level,
- b->written,
- !list_empty_careful(&b->write_blocked),
- b->will_make_reachable != 0,
- b->will_make_reachable & 1);
- }
- rcu_read_unlock();
-}
struct btree_iter;
struct btree_node_read_all;
-static inline bool btree_node_dirty(struct btree *b)
-{
- return test_bit(BTREE_NODE_dirty, &b->flags);
-}
-
-static inline void set_btree_node_dirty(struct bch_fs *c, struct btree *b)
+static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
{
if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
atomic_inc(&c->btree_cache.dirty);
}
-static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
+static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
{
if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
atomic_dec(&c->btree_cache.dirty);
void bch2_btree_node_wait_on_read(struct btree *);
void bch2_btree_node_wait_on_write(struct btree *);
-static inline bool btree_node_may_write(struct btree *b)
-{
- return list_empty_careful(&b->write_blocked) &&
- (!b->written || !b->will_make_reachable);
-}
-
enum compact_mode {
COMPACT_LAZY,
COMPACT_ALL,
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
-void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
+#define BTREE_WRITE_ONLY_IF_NEED (1U << 0)
+#define BTREE_WRITE_ALREADY_STARTED (1U << 1)
+
+void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
- enum six_lock_type);
+ enum six_lock_type, unsigned);
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
enum six_lock_type lock_held)
{
- if (b->written &&
- btree_node_need_write(b) &&
- btree_node_may_write(b) &&
- !btree_node_write_in_flight(b))
- bch2_btree_node_write(c, b, lock_held);
+ bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
}
-#define bch2_btree_node_write_cond(_c, _b, cond) \
-do { \
- unsigned long old, new, v = READ_ONCE((_b)->flags); \
- \
- do { \
- old = new = v; \
- \
- if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \
- break; \
- \
- new |= (1 << BTREE_NODE_need_write); \
- } while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \
- \
- btree_node_write_if_need(_c, _b, SIX_LOCK_read); \
-} while (0)
-
void bch2_btree_flush_all_reads(struct bch_fs *);
void bch2_btree_flush_all_writes(struct bch_fs *);
-void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
unsigned version, unsigned big_endian,
trans_for_each_path(trans, path)
__bch2_btree_path_unlock(path);
- BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+ /*
+ * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking
+ * btree nodes, it implements its own walking:
+ */
+ BUG_ON(!trans->is_initial_gc &&
+ lock_class_is_held(&bch2_btree_node_lock_key));
}
/* Btree iterator: */
bool restarted:1;
bool memory_allocation_failure:1;
bool journal_transaction_names:1;
+ bool is_initial_gc:1;
/*
* For when bch2_trans_update notices we'll be splitting a compressed
* extent:
struct replicas_delta_list *fs_usage_deltas;
};
-#define BTREE_FLAG(flag) \
+#define BTREE_FLAGS() \
+ x(read_in_flight) \
+ x(read_error) \
+ x(dirty) \
+ x(need_write) \
+ x(write_blocked) \
+ x(will_make_reachable) \
+ x(noevict) \
+ x(write_idx) \
+ x(accessed) \
+ x(write_in_flight) \
+ x(write_in_flight_inner) \
+ x(just_written) \
+ x(dying) \
+ x(fake) \
+ x(need_rewrite) \
+ x(never_write)
+
+enum btree_flags {
+#define x(flag) BTREE_NODE_##flag,
+ BTREE_FLAGS()
+#undef x
+};
+
+#define x(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
\
static inline void clear_btree_node_ ## flag(struct btree *b) \
{ clear_bit(BTREE_NODE_ ## flag, &b->flags); }
-enum btree_flags {
- BTREE_NODE_read_in_flight,
- BTREE_NODE_read_error,
- BTREE_NODE_dirty,
- BTREE_NODE_need_write,
- BTREE_NODE_noevict,
- BTREE_NODE_write_idx,
- BTREE_NODE_accessed,
- BTREE_NODE_write_in_flight,
- BTREE_NODE_write_in_flight_inner,
- BTREE_NODE_just_written,
- BTREE_NODE_dying,
- BTREE_NODE_fake,
- BTREE_NODE_need_rewrite,
- BTREE_NODE_never_write,
-};
-
-BTREE_FLAG(read_in_flight);
-BTREE_FLAG(read_error);
-BTREE_FLAG(need_write);
-BTREE_FLAG(noevict);
-BTREE_FLAG(write_idx);
-BTREE_FLAG(accessed);
-BTREE_FLAG(write_in_flight);
-BTREE_FLAG(write_in_flight_inner);
-BTREE_FLAG(just_written);
-BTREE_FLAG(dying);
-BTREE_FLAG(fake);
-BTREE_FLAG(need_rewrite);
-BTREE_FLAG(never_write);
+BTREE_FLAGS()
+#undef x
static inline struct btree_write *btree_current_write(struct btree *b)
{
six_lock_write(&b->c.lock, NULL, NULL);
set_btree_node_accessed(b);
- set_btree_node_dirty(c, b);
+ set_btree_node_dirty_acct(c, b);
set_btree_node_need_write(b);
bch2_bset_init_first(b, &b->data->keys);
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
+ if (list_empty(&b->write_blocked))
+ clear_btree_node_write_blocked(b);
/*
* Node might have been freed, recheck under
BUG_ON(b->will_make_reachable != (unsigned long) as);
b->will_make_reachable = 0;
+ clear_btree_node_will_make_reachable(b);
}
mutex_unlock(&c->btree_interior_update_lock);
as->mode = BTREE_INTERIOR_UPDATING_NODE;
as->b = b;
+
+ set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
mutex_unlock(&c->btree_interior_update_lock);
as->new_nodes[as->nr_new_nodes++] = b;
b->will_make_reachable = 1UL|(unsigned long) as;
+ set_btree_node_will_make_reachable(b);
mutex_unlock(&c->btree_interior_update_lock);
* xchg() is for synchronization with bch2_btree_complete_write:
*/
v = xchg(&b->will_make_reachable, 0);
+ clear_btree_node_will_make_reachable(b);
as = (struct btree_update *) (v & ~1UL);
if (!as) {
closure_wake_up(&c->btree_interior_update_wait);
}
- clear_btree_node_dirty(c, b);
+ clear_btree_node_dirty_acct(c, b);
clear_btree_node_need_write(b);
/*
struct btree *old;
trace_btree_set_root(c, b);
- BUG_ON(!b->written &&
- !test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags));
+ BUG_ON(!b->written);
old = btree_node_root(c, b);
bch2_btree_node_iter_advance(node_iter, b);
bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
- set_btree_node_dirty(c, b);
+ set_btree_node_dirty_acct(c, b);
set_btree_node_need_write(b);
}
six_unlock_write(&n2->c.lock);
six_unlock_write(&n1->c.lock);
- bch2_btree_node_write(c, n1, SIX_LOCK_intent);
- bch2_btree_node_write(c, n2, SIX_LOCK_intent);
+ bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
+ bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
/*
* Note that on recursive parent_keys == keys, so we
btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
- bch2_btree_node_write(c, n3, SIX_LOCK_intent);
+ bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
}
} else {
trace_btree_compact(c, b);
bch2_btree_build_aux_trees(n1);
six_unlock_write(&n1->c.lock);
- bch2_btree_node_write(c, n1, SIX_LOCK_intent);
+ bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
if (parent)
bch2_keylist_add(&as->parent_keys, &n1->key);
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock);
- bch2_btree_node_write(c, n, SIX_LOCK_intent);
+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bkey_init(&delete.k);
delete.k.p = prev->key.k.p;
trace_btree_gc_rewrite_node(c, b);
- bch2_btree_node_write(c, n, SIX_LOCK_intent);
+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
+ unsigned long old, new, v;
+ unsigned idx = w - b->writes;
six_lock_read(&b->c.lock, NULL, NULL);
- bch2_btree_node_write_cond(c, b,
- (btree_current_write(b) == w && w->journal.seq == seq));
+ v = READ_ONCE(b->flags);
+
+ do {
+ old = new = v;
+
+ if (!(old & (1 << BTREE_NODE_dirty)) ||
+ !!(old & (1 << BTREE_NODE_write_idx)) != idx ||
+ w->journal.seq != seq)
+ break;
+
+ new |= 1 << BTREE_NODE_need_write;
+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
+
+ btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
return 0;
}
bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
if (unlikely(!btree_node_dirty(b)))
- set_btree_node_dirty(c, b);
+ set_btree_node_dirty_acct(c, b);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) bset_u64s(t) - old_u64s;
ck->u64s = new_u64s;
ck->k = new_k;
- return BTREE_INSERT_OK;
+ /*
+ * Keys returned by peek() are no longer valid pointers, so we need a
+ * transaction restart:
+ */
+ trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_,
+ path->btree_id, &path->pos);
+ return btree_trans_restart(trans);
}
static inline void do_btree_insert_one(struct btree_trans *trans,
/* XXX: bch_fs refcounting */
struct dump_iter {
- struct bpos from;
- struct bch_fs *c;
+ struct bch_fs *c;
enum btree_id id;
+ struct bpos from;
+ u64 iter;
struct printbuf buf;
file->private_data = i;
i->from = POS_MIN;
+ i->iter = 0;
i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]);
i->id = bd->id;
i->buf = PRINTBUF;
.read = bch2_read_bfloat_failed,
};
+static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
+ struct btree *b)
+{
+ out->tabstops[0] = 32;
+
+ pr_buf(out, "%px btree=%s l=%u ",
+ b,
+ bch2_btree_ids[b->c.btree_id],
+ b->c.level);
+ pr_newline(out);
+
+ pr_indent_push(out, 2);
+
+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+ pr_newline(out);
+
+ pr_buf(out, "flags: ");
+ pr_tab(out);
+ bch2_flags_to_text(out, bch2_btree_node_flags, b->flags);
+ pr_newline(out);
+
+ pr_buf(out, "written:");
+ pr_tab(out);
+ pr_buf(out, "%u", b->written);
+ pr_newline(out);
+
+ pr_buf(out, "writes blocked:");
+ pr_tab(out);
+ pr_buf(out, "%u", !list_empty_careful(&b->write_blocked));
+ pr_newline(out);
+
+ pr_buf(out, "will make reachable:");
+ pr_tab(out);
+ pr_buf(out, "%lx", b->will_make_reachable);
+ pr_newline(out);
+
+ pr_buf(out, "journal pin %px:", &b->writes[0].journal);
+ pr_tab(out);
+ pr_buf(out, "%llu", b->writes[0].journal.seq);
+ pr_newline(out);
+
+ pr_buf(out, "journal pin %px:", &b->writes[1].journal);
+ pr_tab(out);
+ pr_buf(out, "%llu", b->writes[1].journal.seq);
+ pr_newline(out);
+
+ pr_indent_pop(out, 2);
+}
+
+static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ bool done = false;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ do {
+ struct bucket_table *tbl;
+ struct rhash_head *pos;
+ struct btree *b;
+
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (!i->size)
+ break;
+
+ rcu_read_lock();
+ i->buf.atomic++;
+ tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
+ &c->btree_cache.table);
+ if (i->iter < tbl->size) {
+ rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
+ bch2_cached_btree_node_to_text(&i->buf, c, b);
+ i->iter++;;
+ } else {
+ done = true;
+ }
+ --i->buf.atomic;
+ rcu_read_unlock();
+ } while (!done);
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations cached_btree_nodes_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_cached_btree_nodes_read,
+};
+
+static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ bool done = false;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ do {
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (!i->size)
+ break;
+
+ done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
+ i->iter++;
+ } while (!done);
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations journal_pins_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_journal_pins_read,
+};
+
void bch2_fs_debug_exit(struct bch_fs *c)
{
- if (!IS_ERR_OR_NULL(c->debug))
- debugfs_remove_recursive(c->debug);
+ if (!IS_ERR_OR_NULL(c->fs_debug_dir))
+ debugfs_remove_recursive(c->fs_debug_dir);
}
void bch2_fs_debug_init(struct bch_fs *c)
return;
snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
- c->debug = debugfs_create_dir(name, bch_debug);
- if (IS_ERR_OR_NULL(c->debug))
+ c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
+ if (IS_ERR_OR_NULL(c->fs_debug_dir))
+ return;
+
+ debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
+ c->btree_debug, &cached_btree_nodes_ops);
+
+ debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
+ c->btree_debug, &journal_pins_ops);
+
+ c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
+ if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;
for (bd = c->btree_debug;
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
bd++) {
bd->id = bd - c->btree_debug;
- bd->btree = debugfs_create_file(bch2_btree_ids[bd->id],
- 0400, c->debug, bd,
- &btree_debug_ops);
+ debugfs_create_file(bch2_btree_ids[bd->id],
+ 0400, c->btree_debug_dir, bd,
+ &btree_debug_ops);
snprintf(name, sizeof(name), "%s-formats",
bch2_btree_ids[bd->id]);
- bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
- &btree_format_debug_ops);
+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
+ &btree_format_debug_ops);
snprintf(name, sizeof(name), "%s-bfloat-failed",
bch2_btree_ids[bd->id]);
- bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
- &bfloat_failed_debug_ops);
+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
+ &bfloat_failed_debug_ops);
}
}
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
- if (!pick.ptr.cached &&
+ /*
+ * Stale dirty pointers are treated as IO errors, but @failed isn't
+ * allocated unless we're in the retry path - so if we're not in the
+ * retry path, don't check here, it'll be caught in bch2_read_endio()
+ * and we'll end up in the retry path:
+ */
+ if ((flags & BCH_READ_IN_RETRY) &&
+ !pick.ptr.cached &&
unlikely(ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, k, pick.ptr);
bch2_mark_io_failure(failed, &pick);
spin_unlock(&j->lock);
}
-void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
+bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
- u64 i;
spin_lock(&j->lock);
+ *seq = max(*seq, j->pin.front);
+
+ if (*seq >= j->pin.back) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+
out->atomic++;
- fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
- pr_buf(out, "%llu: count %u\n",
- i, atomic_read(&pin_list->count));
+ pin_list = journal_seq_pin(j, *seq);
- list_for_each_entry(pin, &pin_list->key_cache_list, list)
- pr_buf(out, "\t%px %ps\n",
- pin, pin->flush);
+ pr_buf(out, "%llu: count %u", *seq, atomic_read(&pin_list->count));
+ pr_newline(out);
+ pr_indent_push(out, 2);
- list_for_each_entry(pin, &pin_list->list, list)
- pr_buf(out, "\t%px %ps\n",
- pin, pin->flush);
+ list_for_each_entry(pin, &pin_list->list, list) {
+ pr_buf(out, "\t%px %ps", pin, pin->flush);
+ pr_newline(out);
+ }
+
+ list_for_each_entry(pin, &pin_list->key_cache_list, list) {
+ pr_buf(out, "\t%px %ps", pin, pin->flush);
+ pr_newline(out);
+ }
- if (!list_empty(&pin_list->flushed))
- pr_buf(out, "flushed:\n");
+ if (!list_empty(&pin_list->flushed)) {
+ pr_buf(out, "flushed:");
+ pr_newline(out);
+ }
- list_for_each_entry(pin, &pin_list->flushed, list)
- pr_buf(out, "\t%px %ps\n",
- pin, pin->flush);
+ list_for_each_entry(pin, &pin_list->flushed, list) {
+ pr_buf(out, "\t%px %ps", pin, pin->flush);
+ pr_newline(out);
}
+ pr_indent_pop(out, 2);
+
--out->atomic;
spin_unlock(&j->lock);
+
+ return false;
+}
+
+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
+{
+ u64 seq = 0;
+
+ while (!bch2_journal_seq_pins_to_text(out, j, &seq))
+ seq++;
}
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
+bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
};
static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
- struct printbuf *orig_err)
+ struct printbuf *err)
{
unsigned type = le32_to_cpu(f->type);
- struct printbuf err = *orig_err;
+ struct printbuf field_err = PRINTBUF;
int ret;
if (type >= BCH_SB_FIELD_NR)
return 0;
- pr_buf(&err, "Invalid superblock section %s: ", bch2_sb_fields[type]);
-
- ret = bch2_sb_field_ops[type]->validate(sb, f, &err);
+ ret = bch2_sb_field_ops[type]->validate(sb, f, &field_err);
if (ret) {
- pr_newline(&err);
- bch2_sb_field_to_text(&err, sb, f);
- *orig_err = err;
+ pr_buf(err, "Invalid superblock section %s: %s",
+ bch2_sb_fields[type],
+ field_err.buf);
+ pr_newline(err);
+ bch2_sb_field_to_text(err, sb, f);
}
+ printbuf_exit(&field_err);
return ret;
}
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(journal_debug);
-read_attribute(journal_pins);
read_attribute(btree_updates);
-read_attribute(dirty_btree_nodes);
read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(btree_transactions);
if (attr == &sysfs_journal_debug)
bch2_journal_debug_to_text(out, &c->journal);
- if (attr == &sysfs_journal_pins)
- bch2_journal_pins_to_text(out, &c->journal);
-
if (attr == &sysfs_btree_updates)
bch2_btree_updates_to_text(out, c);
- if (attr == &sysfs_dirty_btree_nodes)
- bch2_dirty_btree_nodes_to_text(out, c);
-
if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c);
struct attribute *bch2_fs_internal_files[] = {
&sysfs_journal_debug,
- &sysfs_journal_pins,
&sysfs_btree_updates,
- &sysfs_dirty_btree_nodes,
&sysfs_btree_cache,
&sysfs_btree_key_cache,
&sysfs_btree_transactions,
static inline void pr_indent_pop(struct printbuf *buf, unsigned spaces)
{
+ if (buf->last_newline + buf->indent == buf->pos) {
+ buf->pos -= spaces;
+ buf->buf[buf->pos] = 0;
+ }
buf->indent -= spaces;
}