]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/dirent.c
Disable pristine-tar option in gbp.conf, since there is no pristine-tar branch.
[bcachefs-tools-debian] / libbcachefs / dirent.c
index 4c85d3399fb4cf4b93f5467ec50c8d409c05b25d..d37bd07afbfe4088ebb9b92feb56ff86127ae1ab 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bkey_buf.h"
 #include "bkey_methods.h"
 #include "btree_update.h"
 #include "extents.h"
 
 #include <linux/dcache.h>
 
-unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
+static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
 {
-       unsigned len = bkey_val_bytes(d.k) -
-               offsetof(struct bch_dirent, d_name);
+       unsigned bkey_u64s = bkey_val_u64s(d.k);
+       unsigned bkey_bytes = bkey_u64s * sizeof(u64);
+       u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
+#if CPU_BIG_ENDIAN
+       unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8;
+#else
+       unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8;
+#endif
+
+       return bkey_bytes -
+               offsetof(struct bch_dirent, d_name) -
+               trailing_nuls;
+}
 
-       return strnlen(d.v->d_name, len);
+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
+{
+       return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
 }
 
 static u64 bch2_dirent_hash(const struct bch_hash_info *info,
@@ -40,7 +54,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
 static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
 {
        struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-       struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+       struct qstr name = bch2_dirent_get_name(d);
 
        return bch2_dirent_hash(info, &name);
 }
@@ -48,20 +62,20 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
 static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
 {
        struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-       int len = bch2_dirent_name_bytes(l);
-       const struct qstr *r = _r;
+       const struct qstr l_name = bch2_dirent_get_name(l);
+       const struct qstr *r_name = _r;
 
-       return len - r->len ?: memcmp(l.v->d_name, r->name, len);
+       return !qstr_eq(l_name, *r_name);
 }
 
 static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
 {
        struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
        struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
-       int l_len = bch2_dirent_name_bytes(l);
-       int r_len = bch2_dirent_name_bytes(r);
+       const struct qstr l_name = bch2_dirent_get_name(l);
+       const struct qstr r_name = bch2_dirent_get_name(r);
 
-       return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
+       return !qstr_eq(l_name, r_name);
 }
 
 static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
@@ -83,72 +97,68 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
        .is_visible     = dirent_is_visible,
 };
 
-int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
-                       unsigned flags, struct printbuf *err)
+int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k,
+                       enum bkey_invalid_flags flags,
+                       struct printbuf *err)
 {
        struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-       unsigned len;
-
-       if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) {
-               prt_printf(err, "incorrect value size (%zu < %zu)",
-                      bkey_val_bytes(k.k), sizeof(*d.v));
-               return -BCH_ERR_invalid_bkey;
-       }
-
-       len = bch2_dirent_name_bytes(d);
-       if (!len) {
-               prt_printf(err, "empty name");
-               return -BCH_ERR_invalid_bkey;
-       }
-
-       if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) {
-               prt_printf(err, "value too big (%zu > %u)",
-                      bkey_val_u64s(k.k), dirent_val_u64s(len));
-               return -BCH_ERR_invalid_bkey;
-       }
-
-       if (len > BCH_NAME_MAX) {
-               prt_printf(err, "dirent name too big (%u > %u)",
-                      len, BCH_NAME_MAX);
-               return -BCH_ERR_invalid_bkey;
-       }
-
-       if (len == 1 && !memcmp(d.v->d_name, ".", 1)) {
-               prt_printf(err, "invalid name");
-               return -BCH_ERR_invalid_bkey;
-       }
-
-       if (len == 2 && !memcmp(d.v->d_name, "..", 2)) {
-               prt_printf(err, "invalid name");
-               return -BCH_ERR_invalid_bkey;
-       }
+       struct qstr d_name = bch2_dirent_get_name(d);
+       int ret = 0;
 
-       if (memchr(d.v->d_name, '/', len)) {
-               prt_printf(err, "invalid name");
-               return -BCH_ERR_invalid_bkey;
-       }
+       bkey_fsck_err_on(!d_name.len, c, err,
+                        dirent_empty_name,
+                        "empty name");
 
-       if (d.v->d_type != DT_SUBVOL &&
-           le64_to_cpu(d.v->d_inum) == d.k->p.inode) {
-               prt_printf(err, "dirent points to own directory");
-               return -BCH_ERR_invalid_bkey;
-       }
+       bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err,
+                        dirent_val_too_big,
+                        "value too big (%zu > %u)",
+                        bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
 
-       return 0;
+       /*
+        * Check new keys don't exceed the max length
+        * (older keys may be larger.)
+        */
+       bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err,
+                        dirent_name_too_long,
+                        "dirent name too big (%u > %u)",
+                        d_name.len, BCH_NAME_MAX);
+
+       bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err,
+                        dirent_name_embedded_nul,
+                        "dirent has stray data after name's NUL");
+
+       bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
+                        (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err,
+                        dirent_name_dot_or_dotdot,
+                        "invalid name");
+
+       bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err,
+                        dirent_name_has_slash,
+                        "name with /");
+
+       bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
+                        le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err,
+                        dirent_to_itself,
+                        "dirent points to own directory");
+fsck_err:
+       return ret;
 }
 
-void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
-                        struct bkey_s_c k)
+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
 {
        struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+       struct qstr d_name = bch2_dirent_get_name(d);
+
+       prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
 
-       prt_printf(out, "%.*s -> %llu type %s",
-              bch2_dirent_name_bytes(d),
-              d.v->d_name,
-              d.v->d_type != DT_SUBVOL
-              ? le64_to_cpu(d.v->d_inum)
-              : le32_to_cpu(d.v->d_child_subvol),
-              bch2_d_type_str(d.v->d_type));
+       if (d.v->d_type != DT_SUBVOL)
+               prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
+       else
+               prt_printf(out, "%u -> %u",
+                          le32_to_cpu(d.v->d_parent_subvol),
+                          le32_to_cpu(d.v->d_child_subvol));
+
+       prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
 }
 
 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
@@ -190,10 +200,39 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
        return dirent;
 }
 
+int bch2_dirent_create_snapshot(struct btree_trans *trans,
+                       u32 dir_subvol, u64 dir, u32 snapshot,
+                       const struct bch_hash_info *hash_info,
+                       u8 type, const struct qstr *name, u64 dst_inum,
+                       u64 *dir_offset,
+                       bch_str_hash_flags_t str_hash_flags)
+{
+       subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
+       struct bkey_i_dirent *dirent;
+       int ret;
+
+       dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
+       ret = PTR_ERR_OR_ZERO(dirent);
+       if (ret)
+               return ret;
+
+       dirent->k.p.inode       = dir;
+       dirent->k.p.snapshot    = snapshot;
+
+       ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
+                                       dir_inum, snapshot,
+                                       &dirent->k_i, str_hash_flags,
+                                       BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+       *dir_offset = dirent->k.p.offset;
+
+       return ret;
+}
+
 int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
                       const struct bch_hash_info *hash_info,
                       u8 type, const struct qstr *name, u64 dst_inum,
-                      u64 *dir_offset, int flags)
+                      u64 *dir_offset,
+                      bch_str_hash_flags_t str_hash_flags)
 {
        struct bkey_i_dirent *dirent;
        int ret;
@@ -204,7 +243,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
                return ret;
 
        ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
-                           dir, &dirent->k_i, flags);
+                           dir, &dirent->k_i, str_hash_flags);
        *dir_offset = dirent->k.p.offset;
 
        return ret;
@@ -224,7 +263,7 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
        int ret = 0;
 
        if (d.v->d_type == DT_SUBVOL &&
-           d.v->d_parent_subvol != dir.subvol)
+           le32_to_cpu(d.v->d_parent_subvol) != dir.subvol)
                return 1;
 
        if (likely(d.v->d_type != DT_SUBVOL)) {
@@ -254,12 +293,10 @@ int bch2_dirent_rename(struct btree_trans *trans,
        struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
        struct bpos dst_pos =
                POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
-       unsigned src_type = 0, dst_type = 0, src_update_flags = 0;
+       unsigned src_update_flags = 0;
+       bool delete_src, delete_dst;
        int ret = 0;
 
-       if (src_dir.subvol != dst_dir.subvol)
-               return -EXDEV;
-
        memset(src_inum, 0, sizeof(*src_inum));
        memset(dst_inum, 0, sizeof(*dst_inum));
 
@@ -280,12 +317,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
        if (ret)
                goto out;
 
-       src_type = bkey_s_c_to_dirent(old_src).v->d_type;
-
-       if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE)
-               return -EOPNOTSUPP;
-
-
        /* Lookup dst: */
        if (mode == BCH_RENAME) {
                /*
@@ -313,11 +344,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
                                bkey_s_c_to_dirent(old_dst), dst_inum);
                if (ret)
                        goto out;
-
-               dst_type = bkey_s_c_to_dirent(old_dst).v->d_type;
-
-               if (dst_type == DT_SUBVOL)
-                       return -EOPNOTSUPP;
        }
 
        if (mode != BCH_RENAME_EXCHANGE)
@@ -387,28 +413,55 @@ int bch2_dirent_rename(struct btree_trans *trans,
                }
        }
 
+       if (new_dst->v.d_type == DT_SUBVOL)
+               new_dst->v.d_parent_subvol = cpu_to_le32(dst_dir.subvol);
+
+       if ((mode == BCH_RENAME_EXCHANGE) &&
+           new_src->v.d_type == DT_SUBVOL)
+               new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
+
        ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
        if (ret)
                goto out;
 out_set_src:
-
        /*
-        * If we're deleting a subvolume, we need to really delete the dirent,
-        * not just emit a whiteout in the current snapshot:
+        * If we're deleting a subvolume we need to really delete the dirent,
+        * not just emit a whiteout in the current snapshot - there can only be
+        * single dirent that points to a given subvolume.
+        *
+        * IOW, we don't maintain multiple versions in different snapshots of
+        * dirents that point to subvolumes - dirents that point to subvolumes
+        * are only visible in one particular subvolume so it's not necessary,
+        * and it would be particularly confusing for fsck to have to deal with.
         */
-       if (src_type == DT_SUBVOL) {
-               bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
-               ret = bch2_btree_iter_traverse(&src_iter);
+       delete_src = bkey_s_c_to_dirent(old_src).v->d_type == DT_SUBVOL &&
+               new_src->k.p.snapshot != old_src.k->p.snapshot;
+
+       delete_dst = old_dst.k &&
+               bkey_s_c_to_dirent(old_dst).v->d_type == DT_SUBVOL &&
+               new_dst->k.p.snapshot != old_dst.k->p.snapshot;
+
+       if (!delete_src || !bkey_deleted(&new_src->k)) {
+               ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
                if (ret)
                        goto out;
+       }
 
-               new_src->k.p = src_iter.pos;
-               src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE;
+       if (delete_src) {
+               bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
+               ret =   bch2_btree_iter_traverse(&src_iter) ?:
+                       bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+               if (ret)
+                       goto out;
        }
 
-       ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
-       if (ret)
-               goto out;
+       if (delete_dst) {
+               bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot);
+               ret =   bch2_btree_iter_traverse(&dst_iter) ?:
+                       bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+               if (ret)
+                       goto out;
+       }
 
        if (mode == BCH_RENAME_EXCHANGE)
                *src_offset = new_src->k.p.offset;
@@ -419,41 +472,29 @@ out:
        return ret;
 }
 
-int __bch2_dirent_lookup_trans(struct btree_trans *trans,
-                              struct btree_iter *iter,
-                              subvol_inum dir,
-                              const struct bch_hash_info *hash_info,
-                              const struct qstr *name, subvol_inum *inum,
-                              unsigned flags)
+int bch2_dirent_lookup_trans(struct btree_trans *trans,
+                            struct btree_iter *iter,
+                            subvol_inum dir,
+                            const struct bch_hash_info *hash_info,
+                            const struct qstr *name, subvol_inum *inum,
+                            unsigned flags)
 {
-       struct bkey_s_c k;
-       struct bkey_s_c_dirent d;
-       u32 snapshot;
-       int ret;
-
-       ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
+       int ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
+                                  hash_info, dir, name, flags);
        if (ret)
                return ret;
 
-       ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
-                              hash_info, dir, name, flags);
-       if (ret)
-               return ret;
-
-       k = bch2_btree_iter_peek_slot(iter);
+       struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
 
-       d = bkey_s_c_to_dirent(k);
-
-       ret = bch2_dirent_read_target(trans, dir, d, inum);
+       ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum);
        if (ret > 0)
                ret = -ENOENT;
 err:
        if (ret)
                bch2_trans_iter_exit(trans, iter);
-
        return ret;
 }
 
@@ -461,40 +502,30 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
                       const struct bch_hash_info *hash_info,
                       const struct qstr *name, subvol_inum *inum)
 {
-       struct btree_trans trans;
-       struct btree_iter iter;
-       int ret;
+       struct btree_trans *trans = bch2_trans_get(c);
+       struct btree_iter iter = { NULL };
 
-       bch2_trans_init(&trans, c, 0, 0);
-retry:
-       bch2_trans_begin(&trans);
-
-       ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
-                                         name, inum, 0);
-       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               goto retry;
-       if (!ret)
-               bch2_trans_iter_exit(&trans, &iter);
-       bch2_trans_exit(&trans);
+       int ret = lockrestart_do(trans,
+               bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
+       bch2_trans_iter_exit(trans, &iter);
+       bch2_trans_put(trans);
        return ret;
 }
 
-int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
+int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 snapshot)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
-       u32 snapshot;
        int ret;
 
-       ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
-       if (ret)
-               return ret;
-
        for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
-                          SPOS(dir.inum, 0, snapshot),
-                          POS(dir.inum, U64_MAX), 0, k, ret)
+                          SPOS(dir, 0, snapshot),
+                          POS(dir, U64_MAX), 0, k, ret)
                if (k.k->type == KEY_TYPE_dirent) {
-                       ret = -ENOTEMPTY;
+                       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+                       if (d.v->d_type == DT_SUBVOL && le32_to_cpu(d.v->d_parent_subvol) != subvol)
+                               continue;
+                       ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
                        break;
                }
        bch2_trans_iter_exit(trans, &iter);
@@ -502,25 +533,35 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
        return ret;
 }
 
+int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
+{
+       u32 snapshot;
+
+       return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?:
+               bch2_empty_dir_snapshot(trans, dir.inum, dir.subvol, snapshot);
+}
+
 int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
 {
-       struct btree_trans trans;
+       struct btree_trans *trans = bch2_trans_get(c);
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent dirent;
        subvol_inum target;
        u32 snapshot;
+       struct bkey_buf sk;
+       struct qstr name;
        int ret;
 
-       bch2_trans_init(&trans, c, 0, 0);
+       bch2_bkey_buf_init(&sk);
 retry:
-       bch2_trans_begin(&trans);
+       bch2_trans_begin(trans);
 
-       ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+       ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
        if (ret)
                goto err;
 
-       for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
+       for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
                           SPOS(inum.inum, ctx->pos, snapshot),
                           POS(inum.inum, U64_MAX), 0, k, ret) {
                if (k.k->type != KEY_TYPE_dirent)
@@ -528,19 +569,22 @@ retry:
 
                dirent = bkey_s_c_to_dirent(k);
 
-               ret = bch2_dirent_read_target(&trans, inum, dirent, &target);
+               ret = bch2_dirent_read_target(trans, inum, dirent, &target);
                if (ret < 0)
                        break;
                if (ret)
                        continue;
 
-               /*
-                * XXX: dir_emit() can fault and block, while we're holding
-                * locks
-                */
+               /* dir_emit() can fault and block: */
+               bch2_bkey_buf_reassemble(&sk, c, k);
+               dirent = bkey_i_to_s_c_dirent(sk.k);
+               bch2_trans_unlock(trans);
+
+               name = bch2_dirent_get_name(dirent);
+
                ctx->pos = dirent.k->p.offset;
-               if (!dir_emit(ctx, dirent.v->d_name,
-                             bch2_dirent_name_bytes(dirent),
+               if (!dir_emit(ctx, name.name,
+                             name.len,
                              target.inum,
                              vfs_d_type(dirent.v->d_type)))
                        break;
@@ -550,16 +594,17 @@ retry:
                 * read_target looks up subvolumes, we can overflow paths if the
                 * directory has many subvolumes in it
                 */
-               ret = btree_trans_too_many_iters(&trans);
+               ret = btree_trans_too_many_iters(trans);
                if (ret)
                        break;
        }
-       bch2_trans_iter_exit(&trans, &iter);
+       bch2_trans_iter_exit(trans, &iter);
 err:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
-       bch2_trans_exit(&trans);
+       bch2_trans_put(trans);
+       bch2_bkey_buf_exit(&sk, c);
 
        return ret;
 }