X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fmove.c;h=64e39c10e34b91623d01ea0cd84424df10822c21;hb=837b76ce9507fa2a69ddeecc1e1b6d3fabb08227;hp=6633d21f604ab00fc476b8530be18ebc5ba13606;hpb=37270fc79cbe4ab62001893eebd16b6fde4b621b;p=bcachefs-tools-debian diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 6633d21..64e39c1 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -2,17 +2,19 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" #include "disk_groups.h" +#include "ec.h" #include "inode.h" #include "io.h" #include "journal_reclaim.h" #include "move.h" #include "replicas.h" +#include "subvolume.h" #include "super-io.h" #include "keylist.h" @@ -53,41 +55,120 @@ struct moving_context { wait_queue_head_t wait; }; +static int insert_snapshot_whiteouts(struct btree_trans *trans, + enum btree_id id, + struct bpos old_pos, + struct bpos new_pos) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter, update_iter; + struct bkey_s_c k; + struct snapshots_seen s; + int ret; + + if (!btree_type_has_snapshots(id)) + return 0; + + snapshots_seen_init(&s); + + if (!bkey_cmp(old_pos, new_pos)) + return 0; + + if (!snapshot_t(c, old_pos.snapshot)->children[0]) + return 0; + + bch2_trans_iter_init(trans, &iter, id, old_pos, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS); + while (1) { +next: + k = bch2_btree_iter_prev(&iter); + ret = bkey_err(k); + if (ret) + break; + + if (bkey_cmp(old_pos, k.k->p)) + break; + + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) { + struct bkey_i *update; + size_t i; + + for (i = 0; i < s.nr; i++) + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, s.d[i])) + goto next; + + update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); + + ret = PTR_ERR_OR_ZERO(update); + if (ret) + break; + + bkey_init(&update->k); + update->k.p = new_pos; + update->k.p.snapshot = k.k->p.snapshot; + + bch2_trans_iter_init(trans, &update_iter, id, update->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&update_iter) ?: + bch2_trans_update(trans, &update_iter, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + bch2_trans_iter_exit(trans, &update_iter); + if (ret) + break; + + ret = snapshots_seen_add(c, &s, k.k->p.snapshot); + if (ret) + break; + } + } + bch2_trans_iter_exit(trans, &iter); + kfree(s.d); + + return ret; +} + static int bch2_migrate_index_update(struct bch_write_op *op) { struct bch_fs *c = op->c; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct migrate_write *m = container_of(op, struct migrate_write, op); + struct open_bucket *ec_ob = ec_open_bucket(c, &op->open_buckets); struct keylist *keys = &op->insert_keys; + struct bkey_buf _new, _insert; int ret = 0; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + bch2_bkey_buf_init(&_new); + bch2_bkey_buf_init(&_insert); + bch2_bkey_buf_realloc(&_insert, c, U8_MAX); + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, m->btree_id, - bkey_start_pos(&bch2_keylist_front(keys)->k), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, m->btree_id, + bkey_start_pos(&bch2_keylist_front(keys)->k), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); while (1) { struct bkey_s_c k; struct bkey_i *insert; struct bkey_i_extent *new; - BKEY_PADDED(k) _new, _insert; const union bch_extent_entry *entry; struct extent_ptr_decoded p; + struct bpos next_pos; bool did_work = false; - int nr; + bool should_check_enospc; + s64 i_sectors_delta = 0, disk_sectors_delta = 0; - bch2_trans_reset(&trans, 0); + bch2_trans_begin(&trans); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); - if (ret) { - if (ret == -EINTR) - continue; - break; - } + if (ret) + goto err; new = bkey_i_to_extent(bch2_keylist_front(keys)); @@ -95,14 +176,14 @@ static int bch2_migrate_index_update(struct bch_write_op *op) !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) goto nomatch; - bkey_reassemble(&_insert.k, k); - insert = &_insert.k; + bkey_reassemble(_insert.k, k); + insert = _insert.k; - bkey_copy(&_new.k, bch2_keylist_front(keys)); - new = bkey_i_to_extent(&_new.k); - bch2_cut_front(iter->pos, &new->k_i); + bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); + new = bkey_i_to_extent(_new.k); + bch2_cut_front(iter.pos, &new->k_i); - bch2_cut_front(iter->pos, insert); + bch2_cut_front(iter.pos, insert); bch2_cut_back(new->k.p, insert); bch2_cut_back(insert->k.p, &new->k_i); @@ -117,7 +198,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) extent_for_each_ptr(extent_i_to_s(new), new_ptr) new_ptr->cached = true; - bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); + __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); } extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { @@ -144,40 +225,45 @@ static int bch2_migrate_index_update(struct bch_write_op *op) op->opts.background_target, op->opts.data_replicas); - /* - * If we're not fully overwriting @k, and it's compressed, we - * need a reservation for all the pointers in @insert - */ - nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) - - m->nr_ptrs_reserved; + ret = bch2_sum_sector_overwrites(&trans, &iter, insert, + &should_check_enospc, + &i_sectors_delta, + &disk_sectors_delta); + if (ret) + goto err; - if (insert->k.size < k.k->size && - bch2_bkey_sectors_compressed(k) && - nr > 0) { + if (disk_sectors_delta > (s64) op->res.sectors) { ret = bch2_disk_reservation_add(c, &op->res, - keylist_sectors(keys) * nr, 0); + disk_sectors_delta - op->res.sectors, + !should_check_enospc + ? BCH_DISK_RESERVATION_NOFAIL : 0); if (ret) goto out; - - m->nr_ptrs_reserved += nr; - goto next; } - bch2_trans_update(&trans, iter, insert, 0); + next_pos = insert->k.p; - ret = bch2_trans_commit(&trans, &op->res, + ret = insert_snapshot_whiteouts(&trans, m->btree_id, + k.k->p, insert->k.p) ?: + bch2_trans_update(&trans, &iter, insert, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(&trans, &op->res, op_journal_seq(op), BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| m->data_opts.btree_insert_flags); - if (!ret) + if (!ret) { + bch2_btree_iter_set_pos(&iter, next_pos); atomic_long_inc(&c->extent_migrate_done); + if (ec_ob) + bch2_ob_add_backpointer(c, ec_ob, &insert->k); + } +err: if (ret == -EINTR) ret = 0; if (ret) break; next: - while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) { + while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) { bch2_keylist_pop_front(keys); if (bch2_keylist_empty(keys)) goto out; @@ -185,18 +271,21 @@ next: continue; nomatch: if (m->ctxt) { - BUG_ON(k.k->p.offset <= iter->pos.offset); + BUG_ON(k.k->p.offset <= iter.pos.offset); atomic64_inc(&m->ctxt->stats->keys_raced); - atomic64_add(k.k->p.offset - iter->pos.offset, + atomic64_add(k.k->p.offset - iter.pos.offset, &m->ctxt->stats->sectors_raced); } atomic_long_inc(&c->extent_migrate_raced); trace_move_race(&new->k); - bch2_btree_iter_next_slot(iter); + bch2_btree_iter_advance(&iter); goto next; } out: + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&_insert, c); + bch2_bkey_buf_exit(&_new, c); BUG_ON(ret == -EINTR); return ret; } @@ -207,18 +296,13 @@ void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio) BUG_ON(!m->op.wbio.bio.bi_vcnt); m->ptr = rbio->pick.ptr; - m->offset = rbio->pos.offset - rbio->pick.crc.offset; + m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; m->op.devs_have = rbio->devs_have; - m->op.pos = rbio->pos; + m->op.pos = rbio->data_pos; m->op.version = rbio->version; m->op.crc = rbio->pick.crc; m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; - if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) { - m->op.nonce = m->op.crc.nonce + m->op.crc.offset; - m->op.csum_type = m->op.crc.csum_type; - } - if (m->data_cmd == DATA_REWRITE) bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); } @@ -233,6 +317,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; + struct bch_extent_crc_unpacked crc; struct extent_ptr_decoded p; int ret; @@ -253,6 +338,18 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, m->op.target = data_opts.target, m->op.write_point = wp; + /* + * op->csum_type is normally initialized from the fs/file's current + * options - but if an extent is encrypted, we require that it stays + * encrypted: + */ + bkey_for_each_crc(k.k, ptrs, crc, entry) + if (bch2_csum_type_is_encryption(crc.csum_type)) { + m->op.nonce = crc.nonce + crc.offset; + m->op.csum_type = crc.csum_type; + break; + } + if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { m->op.alloc_reserve = RESERVE_MOVINGGC; m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; @@ -491,7 +588,9 @@ static int bch2_move_extent(struct btree_trans *trans, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(trans, &io->rbio, k, 0, + bch2_read_extent(trans, &io->rbio, + bkey_start_pos(k.k), + btree_id, k, 0, BCH_READ_NODECODE| BCH_READ_LAST_FRAGMENT); return 0; @@ -504,6 +603,37 @@ err: return ret; } +static int lookup_inode(struct btree_trans *trans, struct bpos pos, + struct bch_inode_unpacked *inode) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos, + BTREE_ITER_ALL_SNAPSHOTS); + k = bch2_btree_iter_peek(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (!k.k || bkey_cmp(k.k->p, pos)) { + ret = -ENOENT; + goto err; + } + + ret = bkey_is_inode(k.k) ? 0 : -EIO; + if (ret) + goto err; + + ret = bch2_inode_unpack(k, inode); + if (ret) + goto err; +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + static int __bch2_move_data(struct bch_fs *c, struct moving_context *ctxt, struct bch_ratelimit *rate, @@ -516,24 +646,25 @@ static int __bch2_move_data(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct bkey_on_stack sk; + struct bkey_buf sk; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct data_opts data_opts; enum data_cmd data_cmd; u64 delay, cur_inum = U64_MAX; int ret = 0, ret2; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_user; stats->btree_id = btree_id; - stats->pos = POS_MIN; + stats->pos = start; - iter = bch2_trans_get_iter(&trans, btree_id, start, - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, btree_id, start, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS); if (rate) bch2_ratelimit_reset(rate); @@ -561,10 +692,12 @@ static int __bch2_move_data(struct bch_fs *c, try_to_freeze(); } } while (delay); -peek: - k = bch2_btree_iter_peek(iter); - stats->pos = iter->pos; + bch2_trans_begin(&trans); + + k = bch2_btree_iter_peek(&iter); + + stats->pos = iter.pos; if (!k.k) break; @@ -577,18 +710,22 @@ peek: if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - if (btree_id == BTREE_ID_EXTENTS && + if (btree_id == BTREE_ID_extents && cur_inum != k.k->p.inode) { struct bch_inode_unpacked inode; - /* don't hold btree locks while looking up inode: */ - bch2_trans_unlock(&trans); - io_opts = bch2_opts_to_inode_opts(c->opts); - if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode)) + + ret = lookup_inode(&trans, + SPOS(0, k.k->p.inode, k.k->p.snapshot), + &inode); + if (ret == -EINTR) + continue; + + if (!ret) bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode)); + cur_inum = k.k->p.inode; - goto peek; } switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) { @@ -605,13 +742,18 @@ peek: } /* unlock before doing IO: */ - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, data_cmd, data_opts); if (ret2) { + if (ret2 == -EINTR) { + bch2_trans_begin(&trans); + continue; + } + if (ret2 == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ bch2_move_ctxt_wait_for_io(ctxt); @@ -628,37 +770,77 @@ next: atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k), &stats->sectors_seen); next_nondata: - bch2_btree_iter_next(iter); - bch2_trans_cond_resched(&trans); + bch2_btree_iter_advance(&iter); } out: - ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&sk, c); + + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); return ret; } +inline void bch_move_stats_init(struct bch_move_stats *stats, char *name) +{ + memset(stats, 0, sizeof(*stats)); + + scnprintf(stats->name, sizeof(stats->name), + "%s", name); +} + +static inline void progress_list_add(struct bch_fs *c, + struct bch_move_stats *stats) +{ + mutex_lock(&c->data_progress_lock); + list_add(&stats->list, &c->data_progress_list); + mutex_unlock(&c->data_progress_lock); +} + +static inline void progress_list_del(struct bch_fs *c, + struct bch_move_stats *stats) +{ + mutex_lock(&c->data_progress_lock); + list_del(&stats->list); + mutex_unlock(&c->data_progress_lock); +} + int bch2_move_data(struct bch_fs *c, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, struct bch_ratelimit *rate, struct write_point_specifier wp, - struct bpos start, - struct bpos end, move_pred_fn pred, void *arg, struct bch_move_stats *stats) { struct moving_context ctxt = { .stats = stats }; + enum btree_id id; int ret; + progress_list_add(c, stats); closure_init_stack(&ctxt.cl); INIT_LIST_HEAD(&ctxt.reads); init_waitqueue_head(&ctxt.wait); stats->data_type = BCH_DATA_user; - ret = __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_EXTENTS) ?: - __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_REFLINK); + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { + stats->btree_id = id; + + if (id != BTREE_ID_extents && + id != BTREE_ID_reflink) + continue; + + ret = __bch2_move_data(c, &ctxt, rate, wp, + id == start_btree_id ? start_pos : POS_MIN, + id == end_btree_id ? end_pos : POS_MAX, + pred, arg, stats, id); + if (ret) + break; + } + move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); @@ -669,37 +851,57 @@ int bch2_move_data(struct bch_fs *c, atomic64_read(&stats->sectors_moved), atomic64_read(&stats->keys_moved)); + progress_list_del(c, stats); return ret; } +typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *, + struct btree *, struct bch_io_opts *, + struct data_opts *); + static int bch2_move_btree(struct bch_fs *c, - move_pred_fn pred, - void *arg, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, + move_btree_pred pred, void *arg, struct bch_move_stats *stats) { + bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; - unsigned id; + enum btree_id id; struct data_opts data_opts; enum data_cmd cmd; int ret = 0; bch2_trans_init(&trans, c, 0, 0); + progress_list_add(c, stats); stats->data_type = BCH_DATA_btree; - for (id = 0; id < BTREE_ID_NR; id++) { + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { stats->btree_id = id; - for_each_btree_node(&trans, iter, id, POS_MIN, - BTREE_ITER_PREFETCH, b) { - stats->pos = iter->pos; + bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0, + BTREE_ITER_PREFETCH); +retry: + ret = 0; + while (bch2_trans_begin(&trans), + (b = bch2_btree_iter_peek_node(&iter)) && + !(ret = PTR_ERR_OR_ZERO(b))) { + if (kthread && kthread_should_stop()) + break; - switch ((cmd = pred(c, arg, - bkey_i_to_s_c(&b->key), - &io_opts, &data_opts))) { + if ((cmp_int(id, end_btree_id) ?: + bpos_cmp(b->key.k.p, end_pos)) > 0) + break; + + stats->pos = iter.pos; + + switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -711,17 +913,33 @@ static int bch2_move_btree(struct bch_fs *c, BUG(); } - ret = bch2_btree_node_rewrite(c, iter, - b->data->keys.seq, 0) ?: ret; + ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret; + if (ret == -EINTR) + continue; + if (ret) + break; next: - bch2_trans_cond_resched(&trans); + bch2_btree_iter_next_node(&iter); } + if (ret == -EINTR) + goto retry; - ret = bch2_trans_iter_free(&trans, iter) ?: ret; + bch2_trans_iter_exit(&trans, &iter); + + if (kthread && kthread_should_stop()) + break; } bch2_trans_exit(&trans); + if (ret) + bch_err(c, "error %i in bch2_move_btree", ret); + + /* flush relevant btree updates */ + closure_wait_event(&c->btree_interior_update_wait, + !bch2_btree_interior_updates_nr_pending(c)); + + progress_list_del(c, stats); return ret; } @@ -741,16 +959,9 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, struct data_opts *data_opts) { unsigned nr_good = bch2_bkey_durability(c, k); - unsigned replicas = 0; - - switch (k.k->type) { - case KEY_TYPE_btree_ptr: - replicas = c->opts.metadata_replicas; - break; - case KEY_TYPE_extent: - replicas = io_opts->data_replicas; - break; - } + unsigned replicas = bkey_is_btree_ptr(k.k) + ? c->opts.metadata_replicas + : io_opts->data_replicas; if (!nr_good || nr_good >= replicas) return DATA_SKIP; @@ -778,6 +989,83 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, return DATA_REWRITE; } +static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static bool bformat_needs_redo(struct bkey_format *f) +{ + unsigned i; + + for (i = 0; i < f->nr_fields; i++) { + unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; + u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1)); + u64 field_offset = le64_to_cpu(f->field_offset[i]); + + if (f->bits_per_field[i] > unpacked_bits) + return true; + + if ((f->bits_per_field[i] == unpacked_bits) && field_offset) + return true; + + if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) & + unpacked_mask) < + field_offset) + return true; + } + + return false; +} + +static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + if (b->version_ondisk != c->sb.version || + btree_node_need_rewrite(b) || + bformat_needs_redo(&b->format)) { + data_opts->target = 0; + data_opts->nr_replicas = 1; + data_opts->btree_insert_flags = 0; + return DATA_REWRITE; + } + + return DATA_SKIP; +} + +int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) +{ + int ret; + + ret = bch2_move_btree(c, + 0, POS_MIN, + BTREE_ID_NR, SPOS_MAX, + rewrite_old_nodes_pred, c, stats); + if (!ret) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); + c->disk_sb.sb->version_min = c->disk_sb.sb->version; + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + + return ret; +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -786,20 +1074,20 @@ int bch2_data_job(struct bch_fs *c, switch (op.op) { case BCH_DATA_OP_REREPLICATE: + bch_move_stats_init(stats, "rereplicate"); stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; - - closure_wait_event(&c->btree_interior_update_wait, - !bch2_btree_interior_updates_nr_pending(c)); - + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + rereplicate_btree_pred, c, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), rereplicate_pred, c, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; @@ -807,19 +1095,27 @@ int bch2_data_job(struct bch_fs *c, if (op.migrate.dev >= c->sb.nr_devices) return -EINVAL; + bch_move_stats_init(stats, "migrate"); stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + migrate_btree_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), migrate_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; + case BCH_DATA_OP_REWRITE_OLD_NODES: + bch_move_stats_init(stats, "rewrite_old_nodes"); + ret = bch2_scan_old_btree_nodes(c, stats); + break; default: ret = -EINVAL; }