return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
}
+static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
+{
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+
+ if (d.v->d_type == DT_SUBVOL)
+ return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol;
+ return true;
+}
+
const struct bch_hash_desc bch2_dirent_hash_desc = {
.btree_id = BTREE_ID_dirents,
.key_type = KEY_TYPE_dirent,
.hash_bkey = dirent_hash_bkey,
.cmp_key = dirent_cmp_key,
.cmp_bkey = dirent_cmp_bkey,
+ .is_visible = dirent_is_visible,
};
const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
bch_scnmemcpy(out, d.v->d_name,
bch2_dirent_name_bytes(d));
- pr_buf(out, " -> %llu type %s", d.v->d_inum,
+ pr_buf(out, " -> %llu type %s",
+ d.v->d_type != DT_SUBVOL
+ ? le64_to_cpu(d.v->d_inum)
+ : le32_to_cpu(d.v->d_child_subvol),
d.v->d_type < BCH_DT_MAX
? bch2_d_types[d.v->d_type]
: "(bad d_type)");
}
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
- u8 type, const struct qstr *name, u64 dst)
+ subvol_inum dir, u8 type,
+ const struct qstr *name, u64 dst)
{
struct bkey_i_dirent *dirent;
unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
bkey_dirent_init(&dirent->k_i);
dirent->k.u64s = u64s;
- dirent->v.d_inum = cpu_to_le64(dst);
+
+ if (type != DT_SUBVOL) {
+ dirent->v.d_inum = cpu_to_le64(dst);
+ } else {
+ dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
+ dirent->v.d_child_subvol = cpu_to_le32(dst);
+ }
+
dirent->v.d_type = type;
memcpy(dirent->v.d_name, name->name, name->len);
struct bkey_i_dirent *dirent;
int ret;
- dirent = dirent_create_key(trans, type, name, dst_inum);
+ dirent = dirent_create_key(trans, dir, type, name, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
dst->v.d_type = src.v->d_type;
}
-int __bch2_dirent_read_target(struct btree_trans *trans,
- struct bkey_s_c_dirent d,
- u32 *subvol, u32 *snapshot, u64 *inum,
- bool is_fsck)
+static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
+ struct bkey_s_c_dirent d, subvol_inum *target)
{
struct bch_subvolume s;
int ret = 0;
- *subvol = 0;
- *snapshot = d.k->p.snapshot;
+ if (d.v->d_type == DT_SUBVOL &&
+ d.v->d_parent_subvol != dir.subvol)
+ return 1;
if (likely(d.v->d_type != DT_SUBVOL)) {
- *inum = le64_to_cpu(d.v->d_inum);
+ target->subvol = dir.subvol;
+ target->inum = le64_to_cpu(d.v->d_inum);
} else {
- *subvol = le64_to_cpu(d.v->d_inum);
+ target->subvol = le32_to_cpu(d.v->d_child_subvol);
- ret = bch2_subvolume_get(trans, *subvol, !is_fsck, BTREE_ITER_CACHED, &s);
+ ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_CACHED, &s);
- *snapshot = le32_to_cpu(s.snapshot);
- *inum = le64_to_cpu(s.inode);
+ target->inum = le64_to_cpu(s.inode);
}
return ret;
}
-static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
- struct bkey_s_c_dirent d, subvol_inum *target)
-{
- u32 snapshot;
- int ret = 0;
-
- ret = __bch2_dirent_read_target(trans, d, &target->subvol, &snapshot,
- &target->inum, false);
- if (!target->subvol)
- target->subvol = dir.subvol;
-
- return ret;
-}
-
int bch2_dirent_rename(struct btree_trans *trans,
subvol_inum src_dir, struct bch_hash_info *src_hash,
subvol_inum dst_dir, struct bch_hash_info *dst_hash,
struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
struct bpos dst_pos =
POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
+ unsigned src_type = 0, dst_type = 0, src_update_flags = 0;
int ret = 0;
if (src_dir.subvol != dst_dir.subvol)
memset(src_inum, 0, sizeof(*src_inum));
memset(dst_inum, 0, sizeof(*dst_inum));
- /*
- * Lookup dst:
- *
- * Note that in BCH_RENAME mode, we're _not_ checking if
- * the target already exists - we're relying on the VFS
- * to do that check for us for correctness:
- */
- ret = mode == BCH_RENAME
- ? bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
- dst_hash, dst_dir, dst_name)
- : bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
- dst_hash, dst_dir, dst_name,
- BTREE_ITER_INTENT);
- if (ret)
- goto out;
-
- old_dst = bch2_btree_iter_peek_slot(&dst_iter);
- ret = bkey_err(old_dst);
- if (ret)
- goto out;
-
- if (mode != BCH_RENAME) {
- ret = bch2_dirent_read_target(trans, dst_dir,
- bkey_s_c_to_dirent(old_dst), dst_inum);
- if (ret)
- goto out;
- }
- if (mode != BCH_RENAME_EXCHANGE)
- *src_offset = dst_iter.pos.offset;
-
/* Lookup src: */
ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
src_hash, src_dir, src_name,
if (ret)
goto out;
+ src_type = bkey_s_c_to_dirent(old_src).v->d_type;
+
+ if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE)
+ return -EOPNOTSUPP;
+
+
+ /* Lookup dst: */
+ if (mode == BCH_RENAME) {
+ /*
+ * Note that we're _not_ checking if the target already exists -
+ * we're relying on the VFS to do that check for us for
+ * correctness:
+ */
+ ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
+ dst_hash, dst_dir, dst_name);
+ if (ret)
+ goto out;
+ } else {
+ ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
+ dst_hash, dst_dir, dst_name,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto out;
+
+ old_dst = bch2_btree_iter_peek_slot(&dst_iter);
+ ret = bkey_err(old_dst);
+ if (ret)
+ goto out;
+
+ ret = bch2_dirent_read_target(trans, dst_dir,
+ bkey_s_c_to_dirent(old_dst), dst_inum);
+ if (ret)
+ goto out;
+
+ dst_type = bkey_s_c_to_dirent(old_dst).v->d_type;
+
+ if (dst_type == DT_SUBVOL)
+ return -EOPNOTSUPP;
+ }
+
+ if (mode != BCH_RENAME_EXCHANGE)
+ *src_offset = dst_iter.pos.offset;
+
/* Create new dst key: */
- new_dst = dirent_create_key(trans, 0, dst_name, 0);
+ new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
ret = PTR_ERR_OR_ZERO(new_dst);
if (ret)
goto out;
/* Create new src key: */
if (mode == BCH_RENAME_EXCHANGE) {
- new_src = dirent_create_key(trans, 0, src_name, 0);
+ new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
ret = PTR_ERR_OR_ZERO(new_src);
if (ret)
goto out;
* If we're not overwriting, we can just insert
* new_dst at the src position:
*/
- new_dst->k.p = src_iter.pos;
- bch2_trans_update(trans, &src_iter,
- &new_dst->k_i, 0);
- goto out_set_offset;
+ new_src = new_dst;
+ new_src->k.p = src_iter.pos;
+ goto out_set_src;
} else {
/* If we're overwriting, we can't insert new_dst
* at a different slot because it has to
}
}
- bch2_trans_update(trans, &src_iter, &new_src->k_i, 0);
bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
-out_set_offset:
+out_set_src:
+
+ /*
+ * If we're deleting a subvolume, we need to really delete the dirent,
+ * not just emit a whiteout in the current snapshot:
+ */
+ if (src_type == DT_SUBVOL) {
+ bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
+ ret = bch2_btree_iter_traverse(&src_iter);
+ if (ret)
+ goto out;
+
+ new_src->k.p = src_iter.pos;
+ src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE;
+ }
+
+ bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
+
if (mode == BCH_RENAME_EXCHANGE)
*src_offset = new_src->k.p.offset;
*dst_offset = new_dst->k.p.offset;
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(trans, dir, d, inum);
+ if (ret > 0)
+ ret = -ENOENT;
if (ret)
bch2_trans_iter_exit(trans, iter);
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_dirent dirent;
+ subvol_inum target;
u32 snapshot;
int ret;
dirent = bkey_s_c_to_dirent(k);
+ ret = bch2_dirent_read_target(&trans, inum, dirent, &target);
+ if (ret < 0)
+ break;
+ if (ret)
+ continue;
+
/*
* XXX: dir_emit() can fault and block, while we're holding
* locks
ctx->pos = dirent.k->p.offset;
if (!dir_emit(ctx, dirent.v->d_name,
bch2_dirent_name_bytes(dirent),
- le64_to_cpu(dirent.v->d_inum),
+ target.inum,
vfs_d_type(dirent.v->d_type)))
break;
ctx->pos = dirent.k->p.offset + 1;
if (ret)
goto err;
- *snapshot = iter.pos.snapshot;
ret = k.k->type == KEY_TYPE_inode
? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
: -ENOENT;
+ if (!ret)
+ *snapshot = iter.pos.snapshot;
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
#endif
+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos pos)
+{
+ struct bkey_s_c k;
+ int ret;
+
+ bch2_trans_iter_init(trans, iter, BTREE_ID_dirents, pos, 0);
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (!ret && k.k->type != KEY_TYPE_dirent)
+ ret = -ENOENT;
+ if (ret) {
+ bch2_trans_iter_exit(trans, iter);
+ return (struct bkey_s_c_dirent) { .k = ERR_PTR(ret) };
+ }
+
+ return bkey_s_c_to_dirent(k);
+}
+
+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
+ struct bkey_s_c_dirent d)
+{
+ return inode->bi_dir == d.k->p.inode &&
+ inode->bi_dir_offset == d.k->p.offset;
+}
+
+static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
+ struct bch_inode_unpacked *inode)
+{
+ return d.v->d_type == DT_SUBVOL
+ ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
+ : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
+}
+
static int inode_backpointer_exists(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
u32 snapshot)
{
struct btree_iter iter;
- struct bkey_s_c k;
- u32 target_subvol, target_snapshot;
- u64 target_inum;
+ struct bkey_s_c_dirent d;
int ret;
- bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
- SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot), 0);
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
+ d = dirent_get_by_pos(trans, &iter,
+ SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
+ ret = bkey_err(d.s_c);
if (ret)
- goto out;
- if (k.k->type != KEY_TYPE_dirent)
- goto out;
-
- ret = __bch2_dirent_read_target(trans, bkey_s_c_to_dirent(k),
- &target_subvol,
- &target_snapshot,
- &target_inum,
- true);
- if (ret)
- goto out;
+ return ret;
- ret = target_inum == inode->bi_inum;
-out:
+ ret = dirent_points_to_inode(d, inode);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
-static bool inode_backpointer_matches(struct bkey_s_c_dirent d,
- struct bch_inode_unpacked *inode)
-{
- return d.k->p.inode == inode->bi_dir &&
- d.k->p.offset == inode->bi_dir_offset;
-}
-
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
goto err;
}
- if (!inode_backpointer_matches(d, target)) {
+ if (!inode_points_to_dirent(target, d)) {
ret = inode_backpointer_exists(trans, target, d.k->p.snapshot);
if (ret < 0)
goto err;
BTREE_INSERT_LAZY_RW,
bch2_trans_update(trans, iter, &n->k_i, 0));
kfree(n);
- if (ret)
+
+ return ret ?: -EINTR;
+ }
+
+ if (d.v->d_type == DT_SUBVOL &&
+ target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
+ (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
+ fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u",
+ le32_to_cpu(d.v->d_parent_subvol),
+ target->bi_parent_subvol))) {
+ struct bkey_i_dirent *n;
+
+ n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
+ if (!n) {
+ ret = -ENOMEM;
goto err;
+ }
+
+ bkey_reassemble(&n->k_i, d.s_c);
+ n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
+
+ ret = __bch2_trans_do(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW,
+ bch2_trans_update(trans, iter, &n->k_i, 0));
+ kfree(n);
+
+ return ret ?: -EINTR;
}
err:
fsck_err:
struct bkey_s_c k;
struct bkey_s_c_dirent d;
struct inode_walker_entry *i;
- u32 target_snapshot;
- u32 target_subvol;
- u64 target_inum;
char buf[200];
int ret;
d = bkey_s_c_to_dirent(k);
- ret = __bch2_dirent_read_target(trans, d,
- &target_subvol,
- &target_snapshot,
- &target_inum,
- true);
- if (ret && ret != -ENOENT)
- return ret;
+ if (d.v->d_type == DT_SUBVOL) {
+ struct bch_inode_unpacked subvol_root;
+ u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
+ u32 target_snapshot;
+ u64 target_inum;
- if (fsck_err_on(ret, c,
- "dirent points to missing subvolume %llu",
- le64_to_cpu(d.v->d_inum)))
- return remove_dirent(trans, d.k->p);
+ ret = __subvol_lookup(trans, target_subvol,
+ &target_snapshot, &target_inum);
+ if (ret && ret != -ENOENT)
+ return ret;
- if (target_subvol) {
- struct bch_inode_unpacked subvol_root;
+ if (fsck_err_on(ret, c,
+ "dirent points to missing subvolume %llu",
+ le64_to_cpu(d.v->d_child_subvol)))
+ return remove_dirent(trans, d.k->p);
ret = __lookup_inode(trans, target_inum,
&subvol_root, &target_snapshot);
if (ret)
return ret;
} else {
- ret = __get_visible_inodes(trans, target, s, target_inum);
+ ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
if (ret)
return ret;
while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+ struct btree_iter dirent_iter;
+ struct bkey_s_c_dirent d;
u32 parent_snapshot = snapshot;
- if (inode->bi_parent_subvol) {
+ if (inode->bi_subvol) {
u64 inum;
ret = subvol_lookup(trans, inode->bi_parent_subvol,
}
ret = lockrestart_do(trans,
- inode_backpointer_exists(trans, inode, parent_snapshot));
- if (ret < 0)
+ PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter,
+ SPOS(inode->bi_dir, inode->bi_dir_offset,
+ parent_snapshot))).k));
+ if (ret && ret != -ENOENT)
break;
- if (!ret) {
+ if (!ret && !dirent_points_to_inode(d, inode)) {
+ bch2_trans_iter_exit(trans, &dirent_iter);
+ ret = -ENOENT;
+ }
+
+ if (ret == -ENOENT) {
if (fsck_err(c, "unreachable inode %llu:%u, type %u nlink %u backptr %llu:%llu",
inode->bi_inum, snapshot,
mode_to_type(inode->bi_mode),
ret = reattach_inode(trans, inode, snapshot);
break;
}
- ret = 0;
+
+ bch2_trans_iter_exit(trans, &dirent_iter);
if (!S_ISDIR(inode->bi_mode))
break;