-63924135a103cbf2411ef73e7ca9b1b6ebe265bd
+242d37cbd0abfa575ebf816c715e5bb9513c90a0
die("too many arguments");
return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) {
- .op = BCH_DATA_OP_REREPLICATE,
- .start = POS_MIN,
- .end = POS_MAX,
+ .op = BCH_DATA_OP_REREPLICATE,
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
});
}
return bchu_data(fs, (struct bch_ioctl_data) {
.op = BCH_DATA_OP_MIGRATE,
- .start = POS_MIN,
- .end = POS_MAX,
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
.migrate.dev = dev_idx,
});
}
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
bch2_trans_begin(&trans);
iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
- &inode->ei_str_hash, inode->v.i_ino,
+ &hash, inode->v.i_ino,
&X_SEARCH(acl_to_xattr_type(type), "", 0),
0);
if (IS_ERR(iter)) {
struct btree_trans trans;
struct btree_iter *inode_iter;
struct bch_inode_unpacked inode_u;
+ struct bch_hash_info hash_info;
struct posix_acl *acl;
umode_t mode;
int ret;
goto err;
}
- ret = bch2_set_acl_trans(&trans, &inode_u,
- &inode->ei_str_hash,
- acl, type);
+ hash_info = bch2_hash_info_init(c, &inode_u);
+
+ ret = bch2_set_acl_trans(&trans, &inode_u, &hash_info, acl, type);
if (ret)
goto btree_err;
}
int bch2_acl_chmod(struct btree_trans *trans,
- struct bch_inode_info *inode,
+ struct bch_inode_unpacked *inode,
umode_t mode,
struct posix_acl **new_acl)
{
+ struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode);
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
struct bkey_i_xattr *new;
int ret = 0;
iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
- &inode->ei_str_hash, inode->v.i_ino,
+ &hash_info, inode->bi_inum,
&X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
BTREE_ITER_INTENT);
if (IS_ERR(iter))
const struct bch_hash_info *,
struct posix_acl *, int);
int bch2_set_acl(struct inode *, struct posix_acl *, int);
-int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *,
+int bch2_acl_chmod(struct btree_trans *, struct bch_inode_unpacked *,
umode_t, struct posix_acl **);
#else
}
static inline int bch2_acl_chmod(struct btree_trans *trans,
- struct bch_inode_info *inode,
+ struct bch_inode_unpacked *inode,
umode_t mode,
struct posix_acl **new_acl)
{
LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
-LE64_BITMASK(BCH_SB_REFLINK, struct bch_sb, flags[0], 61, 62);
+/* bit 61 was reflink option */
LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
/* 61-64 unused */
};
enum bch_data_ops {
- BCH_DATA_OP_SCRUB = 0,
- BCH_DATA_OP_REREPLICATE = 1,
- BCH_DATA_OP_MIGRATE = 2,
- BCH_DATA_OP_NR = 3,
+ BCH_DATA_OP_SCRUB = 0,
+ BCH_DATA_OP_REREPLICATE = 1,
+ BCH_DATA_OP_MIGRATE = 2,
+ BCH_DATA_OP_REWRITE_OLD_NODES = 3,
+ BCH_DATA_OP_NR = 4,
};
/*
* job. The file descriptor is O_CLOEXEC.
*/
struct bch_ioctl_data {
- __u32 op;
+ __u16 op;
+ __u8 start_btree;
+ __u8 end_btree;
__u32 flags;
- struct bpos start;
- struct bpos end;
+ struct bpos start_pos;
+ struct bpos end_pos;
union {
struct {
if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
struct stripe *m = genradix_ptr(&c->stripes[true],
entry->stripe_ptr.idx);
+ union bch_extent_entry *next_ptr;
+
+ bkey_extent_entry_for_each_from(ptrs, next_ptr, entry)
+ if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr)
+ goto found;
+ next_ptr = NULL;
+found:
+ if (!next_ptr) {
+ bch_err(c, "aieee, found stripe ptr with no data ptr");
+ continue;
+ }
if (!m || !m->alive ||
- !bch2_ptr_matches_stripe_m(m, p)) {
+ !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block],
+ &next_ptr->ptr,
+ m->sectors)) {
bch2_bkey_extent_entry_drop(new, entry);
goto again;
}
/* Iterate across keys (in leaf nodes only) */
-static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
+static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos)
{
+ int cmp = bkey_cmp(new_pos, iter->real_pos);
unsigned l = iter->level;
if (!cmp)
goto out;
+ iter->real_pos = new_pos;
+
if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
btree_node_unlock(iter, 0);
iter->l[0].b = BTREE_ITER_NO_NODE_UP;
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
else
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-}
-
-static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos)
-{
- int cmp = bkey_cmp(new_pos, iter->real_pos);
-
- iter->real_pos = new_pos;
-
- btree_iter_pos_changed(iter, cmp);
bch2_btree_iter_verify(iter);
}
char buf[100];
trans_for_each_iter(trans, iter)
- printk(KERN_ERR "iter: btree %s pos %s%s%s%s %ps\n",
+ printk(KERN_ERR "iter: btree %s pos %s%s%s%s %pS\n",
bch2_btree_ids[iter->btree_id],
(bch2_bpos_to_text(&PBUF(buf), iter->pos), buf),
btree_iter_live(trans, iter) ? " live" : "",
if (best &&
bkey_cmp(bpos_diff(best->pos, pos),
- bpos_diff(iter->pos, pos)) < 0)
+ bpos_diff(iter->real_pos, pos)) < 0)
continue;
best = iter;
while (1) {
struct bkey_s_c k;
unsigned bytes, sectors, offset_into_extent;
+ enum btree_id data_btree = BTREE_ID_extents;
bch2_btree_iter_set_pos(iter,
POS(inum, rbio->bio.bi_iter.bi_sector));
bch2_bkey_buf_reassemble(&sk, c, k);
- ret = bch2_read_indirect_extent(trans,
+ ret = bch2_read_indirect_extent(trans, &data_btree,
&offset_into_extent, &sk);
if (ret)
break;
if (bkey_extent_is_allocation(k.k))
bch2_add_page_sectors(&rbio->bio, k);
- bch2_read_extent(trans, rbio, k, offset_into_extent, flags);
+ bch2_read_extent(trans, rbio, iter->pos,
+ data_btree, k, offset_into_extent, flags);
if (flags & BCH_READ_LAST_FRAGMENT)
break;
u64 aligned_len;
loff_t ret = 0;
- if (!c->opts.reflink)
- return -EOPNOTSUPP;
-
if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
return -EINVAL;
struct bch_inode_info *src,
const char __user *name)
{
+ struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode);
struct bch_inode_info *dst;
struct inode *vinode = NULL;
char *kname = NULL;
qstr.name = kname;
ret = -ENOENT;
- inum = bch2_dirent_lookup(c, src->v.i_ino,
- &src->ei_str_hash,
+ inum = bch2_dirent_lookup(c, src->v.i_ino, &hash,
&qstr);
if (!inum)
goto err1;
{
struct bch_fs *c = vdir->i_sb->s_fs_info;
struct bch_inode_info *dir = to_bch_ei(vdir);
+ struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
struct inode *vinode = NULL;
u64 inum;
- inum = bch2_dirent_lookup(c, dir->v.i_ino,
- &dir->ei_str_hash,
+ inum = bch2_dirent_lookup(c, dir->v.i_ino, &hash,
&dentry->d_name);
if (inum)
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c, 4, 1024);
- do {
- bch2_trans_begin(&trans);
- ret = bch2_link_trans(&trans,
+ ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq,
+ BTREE_INSERT_NOUNLOCK,
+ bch2_link_trans(&trans,
dir->v.i_ino,
inode->v.i_ino, &dir_u, &inode_u,
- &dentry->d_name) ?:
- bch2_trans_commit(&trans, NULL,
- &inode->ei_journal_seq,
- BTREE_INSERT_NOUNLOCK);
- } while (ret == -EINTR);
+ &dentry->d_name));
if (likely(!ret)) {
BUG_ON(inode_u.bi_inum != inode->v.i_ino);
bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
bch2_trans_init(&trans, c, 4, 1024);
- do {
- bch2_trans_begin(&trans);
-
- ret = bch2_unlink_trans(&trans,
+ ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq,
+ BTREE_INSERT_NOUNLOCK|
+ BTREE_INSERT_NOFAIL,
+ bch2_unlink_trans(&trans,
dir->v.i_ino, &dir_u,
- &inode_u, &dentry->d_name) ?:
- bch2_trans_commit(&trans, NULL,
- &dir->ei_journal_seq,
- BTREE_INSERT_NOUNLOCK|
- BTREE_INSERT_NOFAIL);
- } while (ret == -EINTR);
+ &inode_u, &dentry->d_name));
if (likely(!ret)) {
BUG_ON(inode_u.bi_inum != inode->v.i_ino);
goto err;
}
-retry:
- bch2_trans_begin(&trans);
- ret = bch2_rename_trans(&trans,
- src_dir->v.i_ino, &src_dir_u,
- dst_dir->v.i_ino, &dst_dir_u,
- &src_inode_u,
- &dst_inode_u,
- &src_dentry->d_name,
- &dst_dentry->d_name,
- mode) ?:
- bch2_trans_commit(&trans, NULL,
- &journal_seq,
- BTREE_INSERT_NOUNLOCK);
- if (ret == -EINTR)
- goto retry;
+ ret = __bch2_trans_do(&trans, NULL, &journal_seq,
+ BTREE_INSERT_NOUNLOCK,
+ bch2_rename_trans(&trans,
+ src_dir->v.i_ino, &src_dir_u,
+ dst_dir->v.i_ino, &dst_dir_u,
+ &src_inode_u,
+ &dst_inode_u,
+ &src_dentry->d_name,
+ &dst_dentry->d_name,
+ mode));
if (unlikely(ret))
goto err;
bch2_setattr_copy(inode, &inode_u, attr);
if (attr->ia_valid & ATTR_MODE) {
- ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl);
+ ret = bch2_acl_chmod(&trans, &inode_u, inode_u.bi_mode, &acl);
if (ret)
goto btree_err;
}
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) {
+ enum btree_id data_btree = BTREE_ID_extents;
+
if (!bkey_extent_is_data(k.k) &&
k.k->type != KEY_TYPE_reservation) {
bch2_btree_iter_next(iter);
bch2_bkey_buf_reassemble(&cur, c, k);
- ret = bch2_read_indirect_extent(&trans,
+ ret = bch2_read_indirect_extent(&trans, &data_btree,
&offset_into_extent, &cur);
if (ret)
break;
inode->ei_flags = 0;
inode->ei_journal_seq = 0;
inode->ei_quota_reserved = 0;
- inode->ei_str_hash = bch2_hash_info_init(c, bi);
inode->ei_qid = bch_qid(bi);
inode->v.i_mapping->a_ops = &bch_address_space_operations;
struct mutex ei_quota_lock;
struct bch_qid ei_qid;
- struct bch_hash_info ei_str_hash;
-
/* copy of inode in btree: */
struct bch_inode_unpacked ei_inode;
};
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
- iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
- rbio->pos, BTREE_ITER_SLOTS);
+ iter = bch2_trans_get_iter(&trans, rbio->data_btree,
+ rbio->read_pos, BTREE_ITER_SLOTS);
retry:
rbio->bio.bi_status = 0;
if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
- rbio->pos.offset -
+ rbio->data_pos.offset -
rbio->pick.crc.offset)) {
/* extent we wanted to read no longer exists: */
rbio->hole = true;
goto out;
}
- ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags);
+ ret = __bch2_read_extent(&trans, rbio, bvec_iter,
+ rbio->read_pos,
+ rbio->data_btree,
+ k, 0, failed, flags);
if (ret == READ_RETRY)
goto retry;
if (ret)
goto out;
}
-static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
- struct bvec_iter bvec_iter, u64 inode,
- struct bch_io_failures *failed, unsigned flags)
-{
- struct btree_trans trans;
- struct btree_iter *iter;
- struct bkey_buf sk;
- struct bkey_s_c k;
- int ret;
-
- flags &= ~BCH_READ_LAST_FRAGMENT;
- flags |= BCH_READ_MUST_CLONE;
-
- bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, 0, 0);
-retry:
- bch2_trans_begin(&trans);
-
- for_each_btree_key(&trans, iter, BTREE_ID_extents,
- POS(inode, bvec_iter.bi_sector),
- BTREE_ITER_SLOTS, k, ret) {
- unsigned bytes, sectors, offset_into_extent;
-
- bch2_bkey_buf_reassemble(&sk, c, k);
-
- offset_into_extent = iter->pos.offset -
- bkey_start_offset(k.k);
- sectors = k.k->size - offset_into_extent;
-
- ret = bch2_read_indirect_extent(&trans,
- &offset_into_extent, &sk);
- if (ret)
- break;
-
- k = bkey_i_to_s_c(sk.k);
-
- sectors = min(sectors, k.k->size - offset_into_extent);
-
- bch2_trans_unlock(&trans);
-
- bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
- swap(bvec_iter.bi_size, bytes);
-
- ret = __bch2_read_extent(&trans, rbio, bvec_iter, k,
- offset_into_extent, failed, flags);
- switch (ret) {
- case READ_RETRY:
- goto retry;
- case READ_ERR:
- goto err;
- };
-
- if (bytes == bvec_iter.bi_size)
- goto out;
-
- swap(bvec_iter.bi_size, bytes);
- bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
- }
-
- if (ret == -EINTR)
- goto retry;
- /*
- * If we get here, it better have been because there was an error
- * reading a btree node
- */
- BUG_ON(!ret);
- bch_err_inum_ratelimited(c, inode,
- "read error %i from btree lookup", ret);
-err:
- rbio->bio.bi_status = BLK_STS_IOERR;
-out:
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&sk, c);
- bch2_rbio_done(rbio);
-}
-
static void bch2_rbio_retry(struct work_struct *work)
{
struct bch_read_bio *rbio =
struct bch_fs *c = rbio->c;
struct bvec_iter iter = rbio->bvec_iter;
unsigned flags = rbio->flags;
- u64 inode = rbio->pos.inode;
+ u64 inode = rbio->read_pos.inode;
struct bch_io_failures failed = { .nr = 0 };
trace_read_retry(&rbio->bio);
flags |= BCH_READ_IN_RETRY;
flags &= ~BCH_READ_MAY_PROMOTE;
- if (flags & BCH_READ_NODECODE)
+ if (flags & BCH_READ_NODECODE) {
bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
- else
- bch2_read_retry(c, rbio, iter, inode, &failed, flags);
+ } else {
+ flags &= ~BCH_READ_LAST_FRAGMENT;
+ flags |= BCH_READ_MUST_CLONE;
+
+ __bch2_read(c, rbio, iter, inode, &failed, flags);
+ }
}
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
struct bch_read_bio *rbio)
{
struct bch_fs *c = rbio->c;
- u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
+ u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
struct bch_extent_crc_unpacked new_crc;
struct btree_iter *iter = NULL;
struct bkey_i *new;
if (crc_is_compressed(rbio->pick.crc))
return 0;
- iter = bch2_trans_get_iter(trans, BTREE_ID_extents, rbio->pos,
+ iter = bch2_trans_get_iter(trans, rbio->data_btree, rbio->data_pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter);
if ((ret = bkey_err(k)))
return;
}
- bch2_dev_inum_io_error(ca, rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
+ bch2_dev_inum_io_error(ca, rbio->read_pos.inode, (u64) rbio->bvec_iter.bi_sector,
"data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %u)",
rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
csum.hi, csum.lo, crc.csum_type);
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
return;
decompression_err:
- bch_err_inum_ratelimited(c, rbio->pos.inode,
+ bch_err_inum_ratelimited(c, rbio->read_pos.inode,
"decompression error");
bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
return;
if (!rbio->split)
rbio->bio.bi_end_io = rbio->end_io;
- /*
- * XXX: rbio->pos is not what we want here when reading from indirect
- * extents
- */
if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
- rbio->pos.inode,
- rbio->pos.offset,
+ rbio->read_pos.inode,
+ rbio->read_pos.offset,
"data read error: %s",
bch2_blk_status_to_str(bio->bi_status))) {
bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
}
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
- struct bvec_iter iter, struct bkey_s_c k,
+ struct bvec_iter iter, struct bpos read_pos,
+ enum btree_id data_btree, struct bkey_s_c k,
unsigned offset_into_extent,
struct bch_io_failures *failed, unsigned flags)
{
struct bch_dev *ca;
struct promote_op *promote = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
- struct bpos pos = bkey_start_pos(k.k);
+ struct bpos data_pos = bkey_start_pos(k.k);
int pick_ret;
if (bkey_extent_is_inline_data(k.k)) {
pick.crc.offset ||
offset_into_extent));
- pos.offset += offset_into_extent;
+ data_pos.offset += offset_into_extent;
pick.ptr.offset += pick.crc.offset +
offset_into_extent;
offset_into_extent = 0;
/* XXX: only initialize this if needed */
rbio->devs_have = bch2_bkey_devs(k);
rbio->pick = pick;
- rbio->pos = pos;
+ rbio->read_pos = read_pos;
+ rbio->data_btree = data_btree;
+ rbio->data_pos = data_pos;
rbio->version = k.k->version;
rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
ret = READ_RETRY;
}
+ if (!ret)
+ goto out_read_done;
+
return ret;
}
return 0;
}
-void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
+void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
+ struct bvec_iter bvec_iter, u64 inode,
+ struct bch_io_failures *failed, unsigned flags)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_buf sk;
struct bkey_s_c k;
- unsigned flags = BCH_READ_RETRY_IF_STALE|
- BCH_READ_MAY_PROMOTE|
- BCH_READ_USER_MAPPED;
int ret;
- BUG_ON(rbio->_state);
BUG_ON(flags & BCH_READ_NODECODE);
- BUG_ON(flags & BCH_READ_IN_RETRY);
-
- rbio->c = c;
- rbio->start_time = local_clock();
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
- POS(inode, rbio->bio.bi_iter.bi_sector),
+ POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS);
while (1) {
unsigned bytes, sectors, offset_into_extent;
+ enum btree_id data_btree = BTREE_ID_extents;
bch2_btree_iter_set_pos(iter,
- POS(inode, rbio->bio.bi_iter.bi_sector));
+ POS(inode, bvec_iter.bi_sector));
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
bch2_bkey_buf_reassemble(&sk, c, k);
- ret = bch2_read_indirect_extent(&trans,
+ ret = bch2_read_indirect_extent(&trans, &data_btree,
&offset_into_extent, &sk);
if (ret)
goto err;
*/
bch2_trans_unlock(&trans);
- bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
- swap(rbio->bio.bi_iter.bi_size, bytes);
+ bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
+ swap(bvec_iter.bi_size, bytes);
- if (rbio->bio.bi_iter.bi_size == bytes)
+ if (bvec_iter.bi_size == bytes)
flags |= BCH_READ_LAST_FRAGMENT;
- bch2_read_extent(&trans, rbio, k, offset_into_extent, flags);
+ ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos,
+ data_btree, k,
+ offset_into_extent, failed, flags);
+ switch (ret) {
+ case READ_RETRY:
+ goto retry;
+ case READ_ERR:
+ goto err;
+ };
if (flags & BCH_READ_LAST_FRAGMENT)
break;
- swap(rbio->bio.bi_iter.bi_size, bytes);
- bio_advance(&rbio->bio, bytes);
+ swap(bvec_iter.bi_size, bytes);
+ bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
}
out:
bch2_trans_exit(&trans);
struct bkey_buf *);
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
+ enum btree_id *data_btree,
unsigned *offset_into_extent,
struct bkey_buf *k)
{
- return k->k->k.type == KEY_TYPE_reflink_p
- ? __bch2_read_indirect_extent(trans, offset_into_extent, k)
- : 0;
+ if (k->k->k.type != KEY_TYPE_reflink_p)
+ return 0;
+
+ *data_btree = BTREE_ID_reflink;
+ return __bch2_read_indirect_extent(trans, offset_into_extent, k);
}
enum bch_read_flags {
};
int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
- struct bvec_iter, struct bkey_s_c, unsigned,
+ struct bvec_iter, struct bpos, enum btree_id,
+ struct bkey_s_c, unsigned,
struct bch_io_failures *, unsigned);
static inline void bch2_read_extent(struct btree_trans *trans,
- struct bch_read_bio *rbio,
- struct bkey_s_c k,
- unsigned offset_into_extent,
- unsigned flags)
+ struct bch_read_bio *rbio, struct bpos read_pos,
+ enum btree_id data_btree, struct bkey_s_c k,
+ unsigned offset_into_extent, unsigned flags)
{
- __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k,
- offset_into_extent, NULL, flags);
+ __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
+ data_btree, k, offset_into_extent, NULL, flags);
}
-void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
+void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
+ u64, struct bch_io_failures *, unsigned flags);
+
+static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
+ u64 inode)
+{
+ struct bch_io_failures failed = { .nr = 0 };
+
+ BUG_ON(rbio->_state);
+
+ rbio->c = c;
+ rbio->start_time = local_clock();
+
+ __bch2_read(c, rbio, rbio->bio.bi_iter, inode, &failed,
+ BCH_READ_RETRY_IF_STALE|
+ BCH_READ_MAY_PROMOTE|
+ BCH_READ_USER_MAPPED);
+}
static inline struct bch_read_bio *rbio_init(struct bio *bio,
struct bch_io_opts opts)
struct bch_devs_list devs_have;
struct extent_ptr_decoded pick;
- /* start pos of data we read (may not be pos of data we want) */
- struct bpos pos;
+
+ /*
+ * pos we read from - different from data_pos for indirect extents:
+ */
+ struct bpos read_pos;
+
+ /*
+ * start pos of data we read (may not be pos of data we want) - for
+ * promote, narrow extents paths:
+ */
+ enum btree_id data_btree;
+ struct bpos data_pos;
struct bversion version;
struct promote_op *promote;
BUG_ON(!m->op.wbio.bio.bi_vcnt);
m->ptr = rbio->pick.ptr;
- m->offset = rbio->pos.offset - rbio->pick.crc.offset;
+ m->offset = rbio->data_pos.offset - rbio->pick.crc.offset;
m->op.devs_have = rbio->devs_have;
- m->op.pos = rbio->pos;
+ m->op.pos = rbio->data_pos;
m->op.version = rbio->version;
m->op.crc = rbio->pick.crc;
m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
* ctxt when doing wakeup
*/
closure_get(&ctxt->cl);
- bch2_read_extent(trans, &io->rbio, k, 0,
+ bch2_read_extent(trans, &io->rbio,
+ bkey_start_pos(k.k),
+ btree_id, k, 0,
BCH_READ_NODECODE|
BCH_READ_LAST_FRAGMENT);
return 0;
stats->data_type = BCH_DATA_user;
stats->btree_id = btree_id;
- stats->pos = POS_MIN;
+ stats->pos = start;
iter = bch2_trans_get_iter(&trans, btree_id, start,
BTREE_ITER_PREFETCH);
}
int bch2_move_data(struct bch_fs *c,
+ enum btree_id start_btree_id, struct bpos start_pos,
+ enum btree_id end_btree_id, struct bpos end_pos,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
- struct bpos start,
- struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats)
{
struct moving_context ctxt = { .stats = stats };
+ enum btree_id id;
int ret;
closure_init_stack(&ctxt.cl);
stats->data_type = BCH_DATA_user;
- ret = __bch2_move_data(c, &ctxt, rate, wp, start, end,
- pred, arg, stats, BTREE_ID_extents) ?:
- __bch2_move_data(c, &ctxt, rate, wp, start, end,
- pred, arg, stats, BTREE_ID_reflink);
+ for (id = start_btree_id;
+ id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+ id++) {
+ stats->btree_id = id;
+
+ if (id != BTREE_ID_extents &&
+ id != BTREE_ID_reflink)
+ continue;
+
+ ret = __bch2_move_data(c, &ctxt, rate, wp,
+ id == start_btree_id ? start_pos : POS_MIN,
+ id == end_btree_id ? end_pos : POS_MAX,
+ pred, arg, stats, id);
+ if (ret)
+ break;
+ }
+
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl);
return ret;
}
+typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
+ struct btree *, struct bch_io_opts *,
+ struct data_opts *);
+
static int bch2_move_btree(struct bch_fs *c,
- move_pred_fn pred,
- void *arg,
+ enum btree_id start_btree_id, struct bpos start_pos,
+ enum btree_id end_btree_id, struct bpos end_pos,
+ move_btree_pred pred, void *arg,
struct bch_move_stats *stats)
{
+ bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_trans trans;
struct btree_iter *iter;
struct btree *b;
- unsigned id;
+ enum btree_id id;
struct data_opts data_opts;
enum data_cmd cmd;
int ret = 0;
stats->data_type = BCH_DATA_btree;
- for (id = 0; id < BTREE_ID_NR; id++) {
+ for (id = start_btree_id;
+ id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+ id++) {
stats->btree_id = id;
- for_each_btree_node(&trans, iter, id, POS_MIN,
+ for_each_btree_node(&trans, iter, id,
+ id == start_btree_id ? start_pos : POS_MIN,
BTREE_ITER_PREFETCH, b) {
+ if (kthread && (ret = kthread_should_stop()))
+ goto out;
+
+ if ((cmp_int(id, end_btree_id) ?:
+ bkey_cmp(b->key.k.p, end_pos)) > 0)
+ break;
+
stats->pos = iter->pos;
- switch ((cmd = pred(c, arg,
- bkey_i_to_s_c(&b->key),
- &io_opts, &data_opts))) {
+ switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
case DATA_SKIP:
goto next;
case DATA_SCRUB:
ret = bch2_trans_iter_free(&trans, iter) ?: ret;
}
-
+out:
bch2_trans_exit(&trans);
return ret;
return DATA_REWRITE;
}
+static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_opts *data_opts)
+{
+ return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_opts *data_opts)
+{
+ return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_opts *data_opts)
+{
+ if (!btree_node_need_rewrite(b))
+ return DATA_SKIP;
+
+ data_opts->target = 0;
+ data_opts->nr_replicas = 1;
+ data_opts->btree_insert_flags = 0;
+ return DATA_REWRITE;
+}
+
int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats,
struct bch_ioctl_data op)
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, -1);
- ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
+ ret = bch2_move_btree(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ rereplicate_btree_pred, c, stats) ?: ret;
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = bch2_replicas_gc2(c) ?: ret;
- ret = bch2_move_data(c, NULL,
- writepoint_hashed((unsigned long) current),
- op.start,
- op.end,
+ ret = bch2_move_data(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ NULL, writepoint_hashed((unsigned long) current),
rereplicate_pred, c, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
- ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
+ ret = bch2_move_btree(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ migrate_btree_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
- ret = bch2_move_data(c, NULL,
- writepoint_hashed((unsigned long) current),
- op.start,
- op.end,
+ ret = bch2_move_data(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ NULL, writepoint_hashed((unsigned long) current),
migrate_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
+ case BCH_DATA_OP_REWRITE_OLD_NODES:
+
+ ret = bch2_move_btree(c,
+ op.start_btree, op.start_pos,
+ op.end_btree, op.end_pos,
+ rewrite_old_nodes_pred, &op, stats) ?: ret;
+ break;
default:
ret = -EINVAL;
}
struct bkey_s_c,
struct bch_io_opts *, struct data_opts *);
-int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
+int bch2_move_data(struct bch_fs *,
+ enum btree_id, struct bpos,
+ enum btree_id, struct bpos,
+ struct bch_ratelimit *,
struct write_point_specifier,
- struct bpos, struct bpos,
move_pred_fn, void *,
struct bch_move_stats *);
sizeof(h->data[0]),
bucket_offset_cmp, NULL);
- ret = bch2_move_data(c, &c->copygc_pd.rate,
+ ret = bch2_move_data(c,
+ 0, POS_MIN,
+ BTREE_ID_NR, POS_MAX,
+ &c->copygc_pd.rate,
writepoint_ptr(&c->copygc_write_point),
- POS_MIN, POS_MAX,
copygc_pred, NULL,
&move_stats);
OPT_BOOL(), \
BCH_SB_PRJQUOTA, false, \
NULL, "Enable project quotas") \
- x(reflink, u8, \
- OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
- OPT_BOOL(), \
- BCH_SB_REFLINK, true, \
- NULL, "Enable reflink support") \
x(degraded, u8, \
OPT_MOUNT, \
OPT_BOOL(), \
rebalance_work_reset(c);
bch2_move_data(c,
+ 0, POS_MIN,
+ BTREE_ID_NR, POS_MAX,
/* ratelimiting disabled for now */
NULL, /* &r->pd.rate, */
writepoint_ptr(&c->rebalance_write_point),
- POS_MIN, POS_MAX,
rebalance_pred, NULL,
&r->move_stats);
}
bch2_trans_update(trans, reflink_iter, r_v, 0);
r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
- if (IS_ERR(r_p))
- return PTR_ERR(r_p);
+ if (IS_ERR(r_p)) {
+ ret = PTR_ERR(r_p);
+ goto err;
+ }
orig->k.type = KEY_TYPE_reflink_p;
r_p = bkey_i_to_reflink_p(orig);
u64 src_done, dst_done;
int ret = 0, ret2 = 0;
- if (!c->opts.reflink)
- return -EOPNOTSUPP;
-
if (!percpu_ref_tryget(&c->writes))
return -EROFS;
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
const char *name, void *buffer, size_t size, int type)
{
+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
bch2_trans_init(&trans, c, 0, 0);
- iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
- &inode->ei_str_hash, inode->v.i_ino,
+ iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, &hash,
+ inode->v.i_ino,
&X_SEARCH(type, name, strlen(name)),
0);
if (IS_ERR(iter)) {
}
static int bch2_xattr_list_bcachefs(struct bch_fs *c,
- struct bch_inode_info *inode,
+ struct bch_inode_unpacked *inode,
struct xattr_buf *buf,
bool all)
{
u64 v;
for (id = 0; id < Inode_opt_nr; id++) {
- v = bch2_inode_opt_get(&inode->ei_inode, id);
+ v = bch2_inode_opt_get(inode, id);
if (!v)
continue;
if (!all &&
- !(inode->ei_inode.bi_fields_set & (1 << id)))
+ !(inode->bi_fields_set & (1 << id)))
continue;
ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id],
if (ret)
return ret;
- ret = bch2_xattr_list_bcachefs(c, inode, &buf, false);
+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false);
if (ret)
return ret;
- ret = bch2_xattr_list_bcachefs(c, inode, &buf, true);
+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true);
if (ret)
return ret;
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
- bch2_xattr_set(&trans, inode->v.i_ino,
- &inode->ei_str_hash,
+ bch2_xattr_set(&trans, inode->v.i_ino, &hash,
name, value, size,
handler->flags, flags));
}