-6f603b8d79efa7d9ac04ea0c38ef1bbaa10fd678
+454bd4f82d85bb42a86b8eb0172b13e86e5788a7
__entry->buckets_moved, __entry->buckets_not_moved)
);
+DECLARE_EVENT_CLASS(transaction_restart,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip),
+
+ TP_STRUCT__entry(
+ __array(char, name, 16)
+ __field(unsigned long, ip )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->name, c->name, 16);
+ __entry->ip = ip;
+ ),
+
+ TP_printk("%pf", (void *) __entry->ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_iters_realloced,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_mem_realloced,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_fault_inject,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_btree_node_split,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_traverse,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_atomic,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
#endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */
goto retry;
trans_restart();
+ trace_trans_restart_btree_node_reused(c,
+ iter->trans->ip);
return ERR_PTR(-EINTR);
}
}
}
}
- if (ret)
- __btree_node_lock_type(iter->trans->c, b, type);
- else
+ if (unlikely(!ret)) {
trans_restart();
+ trace_trans_restart_would_deadlock(iter->trans->c,
+ iter->trans->ip);
+ return false;
+ }
- return ret;
+ __btree_node_lock_type(iter->trans->c, b, type);
+ return true;
}
/* Btree iterator locking: */
if (trans->iters_live) {
trans_restart();
+ trace_trans_restart_iters_realloced(trans->c, trans->ip);
return -EINTR;
}
if (old_bytes) {
trans_restart();
+ trace_trans_restart_mem_realloced(trans->c, trans->ip);
return ERR_PTR(-EINTR);
}
}
memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
trans->c = c;
+ trans->ip = _RET_IP_;
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
struct btree_trans {
struct bch_fs *c;
+ unsigned long ip;
size_t nr_restarts;
u64 commit_start;
if (!bch2_btree_trans_relock(trans)) {
trans_restart(" (iter relock after journal preres get blocked)");
+ trace_trans_restart_journal_preres_get(c, trans->ip);
return -EINTR;
}
if (race_fault()) {
ret = -EINTR;
trans_restart(" (race)");
+ trace_trans_restart_fault_inject(c, trans->ip);
goto out;
}
*/
if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
trans_restart(" (split)");
+ trace_trans_restart_btree_node_split(c, trans->ip);
ret = -EINTR;
}
break;
return 0;
trans_restart(" (iter relock after marking replicas)");
+ trace_trans_restart_mark_replicas(c, trans->ip);
ret = -EINTR;
break;
case BTREE_INSERT_NEED_JOURNAL_RES:
return 0;
trans_restart(" (iter relock after journal res get blocked)");
+ trace_trans_restart_journal_res_get(c, trans->ip);
ret = -EINTR;
break;
default:
if (ret2) {
trans_restart(" (traverse)");
+ trace_trans_restart_traverse(c, trans->ip);
return ret2;
}
return 0;
trans_restart(" (atomic)");
+ trace_trans_restart_atomic(c, trans->ip);
}
return ret;
return -1;
}
- if (m->alive)
+ if (!gc && m->alive)
bch2_stripes_heap_del(c, m, idx);
memset(m, 0, sizeof(*m));
struct replicas_delta {
s64 delta;
struct bch_replicas_entry r;
-};
+} __packed;
struct replicas_delta_list {
struct bch_fs_usage fs_usage;
unsigned i;
if (io->op.op.error) {
- bio_for_each_segment_all(bvec, bio, i)
+ bio_for_each_segment_all(bvec, bio, i) {
SetPageError(bvec->bv_page);
- set_bit(AS_EIO, &io->op.inode->v.i_mapping->flags);
+ mapping_set_error(bvec->bv_page->mapping, -EIO);
+ }
}
/*
if (ret)
return ret;
out:
- if (c->opts.journal_flush_disabled)
- return 0;
-
- ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq);
+ if (!c->opts.journal_flush_disabled)
+ ret = bch2_journal_flush_seq(&c->journal,
+ inode->ei_journal_seq);
ret2 = file_check_and_advance_wb_err(file);
return ret ?: ret2;
if (!tmpfile) {
bch2_inode_update_after_write(c, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
- journal_seq_copy(dir, inode->ei_journal_seq);
+ journal_seq_copy(dir, journal_seq);
mutex_unlock(&dir->ei_update_lock);
}
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH);
- mutex_lock(&c->replicas_gc_lock);
- bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
-
-
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) {
if (!bkey_extent_is_data(k.k) ||
break;
}
- BUG_ON(ret == -EINTR);
-
- bch2_trans_exit(&trans);
+ ret = bch2_trans_exit(&trans) ?: ret;
- bch2_replicas_gc_end(c, ret);
- mutex_unlock(&c->replicas_gc_lock);
+ BUG_ON(ret == -EINTR);
return ret;
}
bch2_trans_init(&trans, c);
closure_init_stack(&cl);
- mutex_lock(&c->replicas_gc_lock);
- bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
-
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) {
ret = 0;
err:
- bch2_trans_exit(&trans);
+ ret = bch2_trans_exit(&trans) ?: ret;
- ret = bch2_replicas_gc_end(c, ret);
- mutex_unlock(&c->replicas_gc_lock);
+ BUG_ON(ret == -EINTR);
return ret;
}
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
- bch2_dev_metadata_drop(c, dev_idx, flags);
+ bch2_dev_metadata_drop(c, dev_idx, flags) ?:
+ bch2_replicas_gc2(c);
}
return ret;
}
-static int bch2_gc_data_replicas(struct bch_fs *c)
-{
- struct btree_trans trans;
- struct btree_iter *iter;
- struct bkey_s_c k;
- int ret;
-
- bch2_trans_init(&trans, c);
-
- mutex_lock(&c->replicas_gc_lock);
- bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
-
- for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
- ret = bch2_mark_bkey_replicas(c, k);
- if (ret)
- break;
- }
- ret = bch2_trans_exit(&trans) ?: ret;
-
- bch2_replicas_gc_end(c, ret);
- mutex_unlock(&c->replicas_gc_lock);
-
- return ret;
-}
-
-static int bch2_gc_btree_replicas(struct bch_fs *c)
-{
- struct btree_trans trans;
- struct btree_iter *iter;
- struct btree *b;
- unsigned id;
- int ret = 0;
-
- bch2_trans_init(&trans, c);
-
- mutex_lock(&c->replicas_gc_lock);
- bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
-
- for (id = 0; id < BTREE_ID_NR; id++) {
- for_each_btree_node(&trans, iter, id, POS_MIN,
- BTREE_ITER_PREFETCH, b) {
- ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
-
- bch2_trans_cond_resched(&trans);
- }
-
- ret = bch2_trans_iter_free(&trans, iter) ?: ret;
- }
-
- bch2_trans_exit(&trans);
-
- bch2_replicas_gc_end(c, ret);
- mutex_unlock(&c->replicas_gc_lock);
-
- return ret;
-}
-
static int bch2_move_btree(struct bch_fs *c,
move_pred_fn pred,
void *arg,
bch2_journal_meta(&c->journal);
}
- ret = bch2_gc_btree_replicas(c) ?: ret;
+ ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL,
writepoint_hashed((unsigned long) current),
op.start,
op.end,
rereplicate_pred, c, stats) ?: ret;
- ret = bch2_gc_data_replicas(c) ?: ret;
+ ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_MIGRATE:
if (op.migrate.dev >= c->sb.nr_devices)
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
- ret = bch2_gc_btree_replicas(c) ?: ret;
+ ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL,
writepoint_hashed((unsigned long) current),
op.start,
op.end,
migrate_pred, &op, stats) ?: ret;
- ret = bch2_gc_data_replicas(c) ?: ret;
+ ret = bch2_replicas_gc2(c) ?: ret;
break;
default:
ret = -EINVAL;
x(fsck, u8, \
OPT_MOUNT, \
OPT_BOOL(), \
- NO_SB_OPT, true, \
+ NO_SB_OPT, false, \
NULL, "Run fsck on mount") \
x(fix_errors, u8, \
OPT_MOUNT, \
return 0;
}
+int bch2_replicas_gc2(struct bch_fs *c)
+{
+ struct bch_replicas_cpu new = { 0 };
+ unsigned i, nr;
+ int ret = 0;
+
+ bch2_journal_meta(&c->journal);
+retry:
+ nr = READ_ONCE(c->replicas.nr);
+ new.entry_size = READ_ONCE(c->replicas.entry_size);
+ new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
+ if (!new.entries)
+ return -ENOMEM;
+
+ mutex_lock(&c->sb_lock);
+ percpu_down_write(&c->mark_lock);
+
+ if (nr != c->replicas.nr ||
+ new.entry_size != c->replicas.entry_size) {
+ percpu_up_write(&c->mark_lock);
+ mutex_unlock(&c->sb_lock);
+ kfree(new.entries);
+ goto retry;
+ }
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ if (e->data_type == BCH_DATA_JOURNAL ||
+ c->usage_base->replicas[i] ||
+ percpu_u64_get(&c->usage[0]->replicas[i]) ||
+ percpu_u64_get(&c->usage[1]->replicas[i]))
+ memcpy(cpu_replicas_entry(&new, new.nr++),
+ e, new.entry_size);
+ }
+
+ bch2_cpu_replicas_sort(&new);
+
+ if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
+ ret = -ENOSPC;
+ goto err;
+ }
+
+ ret = replicas_table_update(c, &new);
+err:
+ kfree(new.entries);
+
+ percpu_up_write(&c->mark_lock);
+
+ if (!ret)
+ bch2_write_super(c);
+
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
int bch2_replicas_set_usage(struct bch_fs *c,
struct bch_replicas_entry *r,
u64 sectors)
int bch2_replicas_gc_end(struct bch_fs *, int);
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
+int bch2_replicas_gc2(struct bch_fs *);
int bch2_replicas_set_usage(struct bch_fs *,
struct bch_replicas_entry *,
sizeof(struct btree_node_iter_set);
if (!(c->wq = alloc_workqueue("bcachefs",
- WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
+ WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcache_copygc",
- WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
+ WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled,
break;
}
- BUG_ON(!ret);
+ if (ret >= 0)
+ ret = -EIO;
goto out;
}
nr++;
while (flags && (bit = __ffs(flags)) < nr) {
- pr_buf(out, "%s", list[bit]);
if (!first)
pr_buf(out, ",");
first = false;
+ pr_buf(out, "%s", list[bit]);
flags ^= 1 << bit;
}
}