X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Frebalance.c;h=4df981bd96df18966481ab05f76c68f2b18eea1c;hb=9690f783569ebeb166dfc1745c0ba0f48db523d0;hp=4797d620fe7723fb8c9591fc4f9b8ee7e4f9642f;hpb=72a408f84846fe702b8db4f158b678ee20bbf835;p=bcachefs-tools-debian diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 4797d62..4df981b 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -6,6 +6,7 @@ #include "buckets.h" #include "clock.h" #include "disk_groups.h" +#include "errcode.h" #include "extents.h" #include "io.h" #include "move.h" @@ -17,47 +18,76 @@ #include #include -static inline bool rebalance_ptr_pred(struct bch_fs *c, - struct extent_ptr_decoded p, - struct bch_io_opts *io_opts) +/* + * Check if an extent should be moved: + * returns -1 if it should not be moved, or + * device of pointer that should be moved, if known, or INT_MAX if unknown + */ +static bool rebalance_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { - if (io_opts->background_target && - !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target) && - !p.ptr.cached) - return true; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + unsigned i; + + data_opts->rewrite_ptrs = 0; + data_opts->target = io_opts->background_target; + data_opts->extra_replicas = 0; + data_opts->btree_insert_flags = 0; if (io_opts->background_compression && - p.crc.compression_type != - bch2_compression_opt_to_type[io_opts->background_compression]) - return true; + !bch2_bkey_is_incompressible(k)) { + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + + i = 0; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (!p.ptr.cached && + p.crc.compression_type != + bch2_compression_opt_to_type[io_opts->background_compression]) + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + } + + if (io_opts->background_target) { + const struct bch_extent_ptr *ptr; + + i = 0; + bkey_for_each_ptr(ptrs, ptr) { + if (!ptr->cached && + !bch2_dev_in_target(c, ptr->dev, io_opts->background_target)) + data_opts->rewrite_ptrs |= 1U << i; + i++; + } + } - return false; + return data_opts->rewrite_ptrs != 0; } void bch2_rebalance_add_key(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - - if (!bkey_extent_is_data(k.k)) - return; + struct data_update_opts update_opts = { 0 }; + struct bkey_ptrs_c ptrs; + const struct bch_extent_ptr *ptr; + unsigned i; - if (!io_opts->background_target && - !io_opts->background_compression) + if (!rebalance_pred(c, NULL, k, io_opts, &update_opts)) return; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (rebalance_ptr_pred(c, p, io_opts)) { - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - - if (atomic64_add_return(p.crc.compressed_size, - &ca->rebalance_work) == - p.crc.compressed_size) + i = 0; + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr(ptrs, ptr) { + if ((1U << i) && update_opts.rewrite_ptrs) + if (atomic64_add_return(k.k->size, + &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) == + k.k->size) rebalance_wakeup(c); - } + i++; + } } void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors) @@ -67,37 +97,6 @@ void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors) rebalance_wakeup(c); } -static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) -{ - switch (k.k->type) { - case KEY_TYPE_extent: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - - /* Make sure we have room to add a new pointer: */ - if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > - BKEY_EXTENT_VAL_U64s_MAX) - return DATA_SKIP; - - extent_for_each_ptr_decode(e, p, entry) - if (rebalance_ptr_pred(c, p, io_opts)) - goto found; - - return DATA_SKIP; -found: - data_opts->target = io_opts->background_target; - data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; - } - default: - return DATA_SKIP; - } -} - struct rebalance_work { int dev_most_full_idx; unsigned dev_most_full_percent; @@ -176,20 +175,24 @@ static int bch2_rebalance_thread(void *arg) struct bch_fs_rebalance *r = &c->rebalance; struct io_clock *clock = &c->io_clock[WRITE]; struct rebalance_work w, p; + struct bch_move_stats move_stats; unsigned long start, prev_start; unsigned long prev_run_time, prev_run_cputime; unsigned long cputime, prev_cputime; - unsigned long io_start; + u64 io_start; long throttle; set_freezable(); - io_start = atomic_long_read(&clock->now); + io_start = atomic64_read(&clock->now); p = rebalance_work(c); prev_start = jiffies; prev_cputime = curr_cputime(); + bch2_move_stats_init(&move_stats, "rebalance"); while (!kthread_wait_freezable(r->enabled)) { + cond_resched(); + start = jiffies; cputime = curr_cputime(); @@ -213,17 +216,21 @@ static int bch2_rebalance_thread(void *arg) prev_run_time; if (w.dev_most_full_percent < 20 && throttle > 0) { - r->state = REBALANCE_THROTTLED; r->throttled_until_iotime = io_start + div_u64(w.dev_most_full_capacity * (20 - w.dev_most_full_percent), 50); - r->throttled_until_cputime = start + throttle; - bch2_kthread_io_clock_wait(clock, - r->throttled_until_iotime, - throttle); - continue; + if (atomic64_read(&clock->now) + clock->max_slop < + r->throttled_until_iotime) { + r->throttled_until_cputime = start + throttle; + r->state = REBALANCE_THROTTLED; + + bch2_kthread_io_clock_wait(clock, + r->throttled_until_iotime, + throttle); + continue; + } } /* minimum 1 mb/sec: */ @@ -233,66 +240,75 @@ static int bch2_rebalance_thread(void *arg) max(p.dev_most_full_percent, 1U) / max(w.dev_most_full_percent, 1U)); - io_start = atomic_long_read(&clock->now); + io_start = atomic64_read(&clock->now); p = w; prev_start = start; prev_cputime = cputime; r->state = REBALANCE_RUNNING; - memset(&r->move_stats, 0, sizeof(r->move_stats)); + memset(&move_stats, 0, sizeof(move_stats)); rebalance_work_reset(c); bch2_move_data(c, + 0, POS_MIN, + BTREE_ID_NR, POS_MAX, /* ratelimiting disabled for now */ NULL, /* &r->pd.rate, */ + &move_stats, writepoint_ptr(&c->rebalance_write_point), - POS_MIN, POS_MAX, - rebalance_pred, NULL, - &r->move_stats); + true, + rebalance_pred, NULL); } return 0; } -ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf) +void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) { - struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bch_fs_rebalance *r = &c->rebalance; struct rebalance_work w = rebalance_work(c); - char h1[21], h2[21]; - bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9); - bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9); - pr_buf(&out, "fullest_dev (%i):\t%s/%s\n", - w.dev_most_full_idx, h1, h2); + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); + + prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx); + prt_tab(out); + + prt_human_readable_u64(out, w.dev_most_full_work << 9); + prt_printf(out, "/"); + prt_human_readable_u64(out, w.dev_most_full_capacity << 9); + prt_newline(out); + + prt_printf(out, "total work:"); + prt_tab(out); - bch2_hprint(&PBUF(h1), w.total_work << 9); - bch2_hprint(&PBUF(h2), c->capacity << 9); - pr_buf(&out, "total work:\t\t%s/%s\n", h1, h2); + prt_human_readable_u64(out, w.total_work << 9); + prt_printf(out, "/"); + prt_human_readable_u64(out, c->capacity << 9); + prt_newline(out); - pr_buf(&out, "rate:\t\t\t%u\n", r->pd.rate.rate); + prt_printf(out, "rate:"); + prt_tab(out); + prt_printf(out, "%u", r->pd.rate.rate); + prt_newline(out); switch (r->state) { case REBALANCE_WAITING: - pr_buf(&out, "waiting\n"); + prt_printf(out, "waiting"); break; case REBALANCE_THROTTLED: - bch2_hprint(&PBUF(h1), + prt_printf(out, "throttled for %lu sec or ", + (r->throttled_until_cputime - jiffies) / HZ); + prt_human_readable_u64(out, (r->throttled_until_iotime - - atomic_long_read(&c->io_clock[WRITE].now)) << 9); - pr_buf(&out, "throttled for %lu sec or %s io\n", - (r->throttled_until_cputime - jiffies) / HZ, - h1); + atomic64_read(&c->io_clock[WRITE].now)) << 9); + prt_printf(out, " io"); break; case REBALANCE_RUNNING: - pr_buf(&out, "running\n"); - pr_buf(&out, "pos %llu:%llu\n", - r->move_stats.pos.inode, - r->move_stats.pos.offset); + prt_printf(out, "running"); break; } - - return out.pos - buf; + prt_newline(out); } void bch2_rebalance_stop(struct bch_fs *c) @@ -317,13 +333,20 @@ void bch2_rebalance_stop(struct bch_fs *c) int bch2_rebalance_start(struct bch_fs *c) { struct task_struct *p; + int ret; + + if (c->rebalance.thread) + return 0; if (c->opts.nochanges) return 0; - p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance"); - if (IS_ERR(p)) - return PTR_ERR(p); + p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); + ret = PTR_ERR_OR_ZERO(p); + if (ret) { + bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret)); + return ret; + } get_task_struct(p); rcu_assign_pointer(c->rebalance.thread, p);