#include "errcode.h"
#include "error.h"
#include "inode.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include "journal_reclaim.h"
+#include "keylist.h"
#include "move.h"
#include "replicas.h"
#include "super-io.h"
-#include "keylist.h"
+#include "trace.h"
#include <linux/ioprio.h>
#include <linux/kthread.h>
-#include <trace/events/bcachefs.h>
+static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k)
+{
+ if (trace_move_extent_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ trace_move_extent(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
+{
+ if (trace_move_extent_read_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ trace_move_extent_read(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
+{
+ if (trace_move_extent_alloc_mem_fail_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ trace_move_extent_alloc_mem_fail(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
static void progress_list_add(struct bch_fs *c, struct bch_move_stats *stats)
{
static void move_free(struct moving_io *io)
{
struct moving_context *ctxt = io->write.ctxt;
- struct bch_fs *c = ctxt->c;
if (io->b)
atomic_dec(&io->b->count);
struct bkey_i *n;
int ret;
- n = bch2_bkey_make_mut(trans, k);
+ n = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(n);
if (ret)
return ret;
unsigned sectors = k.k->size, pages;
int ret = -ENOMEM;
+ trace_move_extent2(c, k);
+
bch2_data_update_opts_normalize(k, &data_opts);
if (!data_opts.rewrite_ptrs &&
if (!io)
goto err;
+ INIT_LIST_HEAD(&io->io_list);
io->write.ctxt = ctxt;
io->read_sectors = k.k->size;
io->write_sectors = k.k->size;
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
- trace_move_extent_read(k.k);
-
+ trace_move_extent_read2(c, k);
mutex_lock(&ctxt->lock);
atomic_add(io->read_sectors, &ctxt->read_sectors);
err_free:
kfree(io);
err:
- trace_and_count(c, move_extent_alloc_mem_fail, k.k);
+ this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
+ trace_move_extent_alloc_mem_fail2(c, k);
return ret;
}
goto err;
if (!k.k || !bkey_eq(k.k->p, pos)) {
- ret = -ENOENT;
+ ret = -BCH_ERR_ENOENT_inode;
goto err;
}
struct bch_fs *c = ctxt->c;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct bkey_buf sk;
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct data_update_opts data_opts;
int ret = 0, ret2;
bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, 0, 0);
if (ctxt->stats) {
ctxt->stats->data_type = BCH_DATA_user;
ctxt->stats->pos = start;
}
- bch2_trans_iter_init(&trans, &iter, btree_id, start,
+ bch2_trans_iter_init(trans, &iter, btree_id, start,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
if (ctxt->rate)
bch2_ratelimit_reset(ctxt->rate);
- while (!move_ratelimit(&trans, ctxt)) {
- bch2_trans_begin(&trans);
+ while (!move_ratelimit(trans, ctxt)) {
+ bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
if (!k.k)
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
- ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
+ ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
if (ret)
continue;
*/
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(&trans);
- ret2 = bch2_move_extent(&trans, &iter, ctxt, NULL,
+ ret2 = bch2_move_extent(trans, &iter, ctxt, NULL,
io_opts, btree_id, k, data_opts);
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
- bch2_move_ctxt_wait_for_io(ctxt, &trans);
+ bch2_move_ctxt_wait_for_io(ctxt, trans);
continue;
}
bch2_btree_iter_advance(&iter);
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;
{
struct moving_context ctxt;
enum btree_id id;
- int ret;
+ int ret = 0;
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
for (id = start_btree_id;
- id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+ id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
id++) {
stats->btree_id = id;
id != BTREE_ID_reflink)
continue;
+ if (!bch2_btree_id_root(c, id)->b)
+ continue;
+
ret = __bch2_move_data(&ctxt,
id == start_btree_id ? start_pos : POS_MIN,
id == end_btree_id ? end_pos : POS_MAX,
return ret;
}
-void bch2_verify_bucket_evacuated(struct btree_trans *trans, struct bpos bucket, int gen)
-{
- struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct printbuf buf = PRINTBUF;
- struct bch_backpointer bp;
- u64 bp_offset = 0;
- int ret;
-
- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
- bucket, BTREE_ITER_CACHED);
-again:
- ret = lockrestart_do(trans,
- bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
-
- if (!ret && k.k->type == KEY_TYPE_alloc_v4) {
- struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
-
- if (a.v->gen == gen &&
- a.v->dirty_sectors) {
- if (a.v->data_type == BCH_DATA_btree) {
- bch2_trans_unlock(trans);
- if (bch2_btree_interior_updates_flush(c))
- goto again;
- goto failed_to_evacuate;
- }
- }
- }
-
- bch2_trans_iter_exit(trans, &iter);
- return;
-failed_to_evacuate:
- bch2_trans_iter_exit(trans, &iter);
-
- if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
- return;
-
- prt_printf(&buf, bch2_log_msg(c, "failed to evacuate bucket "));
- bch2_bkey_val_to_text(&buf, c, k);
-
- while (1) {
- bch2_trans_begin(trans);
-
- ret = bch2_get_next_backpointer(trans, bucket, gen,
- &bp_offset, &bp,
- BTREE_ITER_CACHED);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- break;
- if (bp_offset == U64_MAX)
- break;
-
- k = bch2_backpointer_get_key(trans, &iter,
- bucket, bp_offset, bp);
- ret = bkey_err(k);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- break;
- if (!k.k)
- continue;
- prt_newline(&buf);
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_trans_iter_exit(trans, &iter);
- }
-
- bch2_print_string_as_lines(KERN_ERR, buf.buf);
- printbuf_exit(&buf);
-}
-
int __bch2_evacuate_bucket(struct btree_trans *trans,
struct moving_context *ctxt,
struct move_bucket_in_flight *bucket_in_flight,
struct data_update_opts data_opts;
unsigned dirty_sectors, bucket_size;
u64 fragmentation;
- u64 bp_offset = 0, cur_inum = U64_MAX;
+ u64 cur_inum = U64_MAX;
+ struct bpos bp_pos = POS_MIN;
int ret = 0;
+ trace_bucket_evacuate(c, &bucket);
+
bch2_bkey_buf_init(&sk);
+ /*
+ * We're not run in a context that handles transaction restarts:
+ */
+ bch2_trans_begin(trans);
+
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
bucket, BTREE_ITER_CACHED);
ret = lockrestart_do(trans,
bch2_trans_iter_exit(trans, &iter);
if (ret) {
- bch_err(c, "%s: error looking up alloc key: %s", __func__, bch2_err_str(ret));
+ bch_err_msg(c, ret, "looking up alloc key");
goto err;
}
ret = bch2_btree_write_buffer_flush(trans);
if (ret) {
- bch_err(c, "%s: error flushing btree write buffer: %s", __func__, bch2_err_str(ret));
+ bch_err_msg(c, ret, "flushing btree write buffer");
goto err;
}
bch2_trans_begin(trans);
ret = bch2_get_next_backpointer(trans, bucket, gen,
- &bp_offset, &bp,
+ &bp_pos, &bp,
BTREE_ITER_CACHED);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
goto err;
- if (bp_offset == U64_MAX)
+ if (bkey_eq(bp_pos, POS_MAX))
break;
if (!bp.level) {
const struct bch_extent_ptr *ptr;
- struct bkey_s_c k;
unsigned i = 0;
- k = bch2_backpointer_get_key(trans, &iter,
- bucket, bp_offset, bp);
+ k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
} else {
struct btree *b;
- b = bch2_backpointer_get_node(trans, &iter,
- bucket, bp_offset, bp);
+ b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp);
ret = PTR_ERR_OR_ZERO(b);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
continue;
}
}
next:
- bp_offset++;
+ bp_pos = bpos_nosnap_successor(bp_pos);
}
trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret);
struct write_point_specifier wp,
bool wait_on_copygc)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct moving_context ctxt;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
- ret = __bch2_evacuate_bucket(&trans, &ctxt, NULL, bucket, gen, data_opts);
+ ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts);
bch2_moving_ctxt_exit(&ctxt);
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
return ret;
}
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct btree *b;
enum btree_id id;
struct data_update_opts data_opts;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
progress_list_add(c, stats);
stats->data_type = BCH_DATA_btree;
for (id = start_btree_id;
- id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+ id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
id++) {
stats->btree_id = id;
- bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0,
+ if (!bch2_btree_id_root(c, id)->b)
+ continue;
+
+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
BTREE_ITER_PREFETCH);
retry:
ret = 0;
- while (bch2_trans_begin(&trans),
+ while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (kthread && kthread_should_stop())
if (!pred(c, arg, b, &io_opts, &data_opts))
goto next;
- ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (kthread && kthread_should_stop())
break;
}
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
if (ret)
- bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
+ bch_err_fn(c, ret);
bch2_btree_interior_updates_flush(c);
mutex_unlock(&c->sb_lock);
}
+ if (ret)
+ bch_err_fn(c, ret);
return ret;
}
return ret;
}
-void bch2_data_jobs_to_text(struct printbuf *out, struct bch_fs *c)
-{
- struct bch_move_stats *stats;
-
- mutex_lock(&c->data_progress_lock);
- list_for_each_entry(stats, &c->data_progress_list, list) {
- prt_printf(out, "%s: data type %s btree_id %s position: ",
- stats->name,
- bch2_data_types[stats->data_type],
- bch2_btree_ids[stats->btree_id]);
- bch2_bpos_to_text(out, stats->pos);
- prt_printf(out, "%s", "\n");
- }
- mutex_unlock(&c->data_progress_lock);
-}
-
-static void bch2_moving_ctxt_to_text(struct printbuf *out, struct moving_context *ctxt)
+static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
{
+ struct bch_move_stats *stats = ctxt->stats;
struct moving_io *io;
- prt_printf(out, "%ps:", ctxt->fn);
+ prt_printf(out, "%s (%ps):", stats->name, ctxt->fn);
+ prt_newline(out);
+
+ prt_printf(out, " data type %s btree_id %s position: ",
+ bch2_data_types[stats->data_type],
+ bch2_btree_id_str(stats->btree_id));
+ bch2_bpos_to_text(out, stats->pos);
prt_newline(out);
printbuf_indent_add(out, 2);
- prt_printf(out, "reads: %u sectors %u",
+ prt_printf(out, "reads: ios %u/%u sectors %u/%u",
atomic_read(&ctxt->read_ios),
- atomic_read(&ctxt->read_sectors));
+ c->opts.move_ios_in_flight,
+ atomic_read(&ctxt->read_sectors),
+ c->opts.move_bytes_in_flight >> 9);
prt_newline(out);
- prt_printf(out, "writes: %u sectors %u",
+ prt_printf(out, "writes: ios %u/%u sectors %u/%u",
atomic_read(&ctxt->write_ios),
- atomic_read(&ctxt->write_sectors));
+ c->opts.move_ios_in_flight,
+ atomic_read(&ctxt->write_sectors),
+ c->opts.move_bytes_in_flight >> 9);
prt_newline(out);
printbuf_indent_add(out, 2);
mutex_lock(&ctxt->lock);
- list_for_each_entry(io, &ctxt->ios, io_list) {
+ list_for_each_entry(io, &ctxt->ios, io_list)
bch2_write_op_to_text(out, &io->write.op);
- }
mutex_unlock(&ctxt->lock);
printbuf_indent_sub(out, 4);
mutex_lock(&c->moving_context_lock);
list_for_each_entry(ctxt, &c->moving_context_list, list)
- bch2_moving_ctxt_to_text(out, ctxt);
+ bch2_moving_ctxt_to_text(out, c, ctxt);
mutex_unlock(&c->moving_context_lock);
}