]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/dirent.c
Update bcachefs sources to a8115093df bcachefs: Fix divide by zero in rebalance_work()
[bcachefs-tools-debian] / libbcachefs / dirent.c
index 00dac68701f54a21d185da64b5ad657db1688193..a7559ab03802cb3a39a6dc17d563cb9f3dced5a4 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bkey_buf.h"
 #include "bkey_methods.h"
 #include "btree_update.h"
 #include "extents.h"
 
 #include <linux/dcache.h>
 
-unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
+static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
 {
-       unsigned len = bkey_val_bytes(d.k) -
-               offsetof(struct bch_dirent, d_name);
+       unsigned bkey_u64s = bkey_val_u64s(d.k);
+       unsigned bkey_bytes = bkey_u64s * sizeof(u64);
+       u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
+#if CPU_BIG_ENDIAN
+       unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8;
+#else
+       unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8;
+#endif
+
+       return bkey_bytes -
+               offsetof(struct bch_dirent, d_name) -
+               trailing_nuls;
+}
 
-       return strnlen(d.v->d_name, len);
+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
+{
+       return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
 }
 
 static u64 bch2_dirent_hash(const struct bch_hash_info *info,
@@ -40,7 +54,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
 static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
 {
        struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-       struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+       struct qstr name = bch2_dirent_get_name(d);
 
        return bch2_dirent_hash(info, &name);
 }
@@ -48,20 +62,29 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
 static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
 {
        struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-       int len = bch2_dirent_name_bytes(l);
-       const struct qstr *r = _r;
+       const struct qstr l_name = bch2_dirent_get_name(l);
+       const struct qstr *r_name = _r;
 
-       return len - r->len ?: memcmp(l.v->d_name, r->name, len);
+       return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len);
 }
 
 static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
 {
        struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
        struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
-       int l_len = bch2_dirent_name_bytes(l);
-       int r_len = bch2_dirent_name_bytes(r);
+       const struct qstr l_name = bch2_dirent_get_name(l);
+       const struct qstr r_name = bch2_dirent_get_name(r);
+
+       return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len);
+}
+
+static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
+{
+       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
 
-       return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
+       if (d.v->d_type == DT_SUBVOL)
+               return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol;
+       return true;
 }
 
 const struct bch_hash_desc bch2_dirent_hash_desc = {
@@ -71,57 +94,84 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
        .hash_bkey      = dirent_hash_bkey,
        .cmp_key        = dirent_cmp_key,
        .cmp_bkey       = dirent_cmp_bkey,
+       .is_visible     = dirent_is_visible,
 };
 
-const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
+int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                       enum bkey_invalid_flags flags,
+                       struct printbuf *err)
 {
        struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-       unsigned len;
+       struct qstr d_name = bch2_dirent_get_name(d);
 
-       if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
-               return "value too small";
+       if (!d_name.len) {
+               prt_printf(err, "empty name");
+               return -BCH_ERR_invalid_bkey;
+       }
 
-       len = bch2_dirent_name_bytes(d);
-       if (!len)
-               return "empty name";
+       if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) {
+               prt_printf(err, "value too big (%zu > %u)",
+                      bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
+               return -BCH_ERR_invalid_bkey;
+       }
 
-       if (bkey_val_u64s(k.k) > dirent_val_u64s(len))
-               return "value too big";
+       /*
+        * Check new keys don't exceed the max length
+        * (older keys may be larger.)
+        */
+       if ((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX) {
+               prt_printf(err, "dirent name too big (%u > %u)",
+                      d_name.len, BCH_NAME_MAX);
+               return -BCH_ERR_invalid_bkey;
+       }
 
-       if (len > BCH_NAME_MAX)
-               return "dirent name too big";
+       if (d_name.len != strnlen(d_name.name, d_name.len)) {
+               prt_printf(err, "dirent has stray data after name's NUL");
+               return -BCH_ERR_invalid_bkey;
+       }
 
-       if (len == 1 && !memcmp(d.v->d_name, ".", 1))
-               return "invalid name";
+       if (d_name.len == 1 && !memcmp(d_name.name, ".", 1)) {
+               prt_printf(err, "invalid name");
+               return -BCH_ERR_invalid_bkey;
+       }
 
-       if (len == 2 && !memcmp(d.v->d_name, "..", 2))
-               return "invalid name";
+       if (d_name.len == 2 && !memcmp(d_name.name, "..", 2)) {
+               prt_printf(err, "invalid name");
+               return -BCH_ERR_invalid_bkey;
+       }
 
-       if (memchr(d.v->d_name, '/', len))
-               return "invalid name";
+       if (memchr(d_name.name, '/', d_name.len)) {
+               prt_printf(err, "invalid name");
+               return -BCH_ERR_invalid_bkey;
+       }
 
        if (d.v->d_type != DT_SUBVOL &&
-           le64_to_cpu(d.v->d_inum) == d.k->p.inode)
-               return "dirent points to own directory";
+           le64_to_cpu(d.v->d_inum) == d.k->p.inode) {
+               prt_printf(err, "dirent points to own directory");
+               return -BCH_ERR_invalid_bkey;
+       }
 
-       return NULL;
+       return 0;
 }
 
 void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
                         struct bkey_s_c k)
 {
        struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-
-       bch_scnmemcpy(out, d.v->d_name,
-                     bch2_dirent_name_bytes(d));
-       pr_buf(out, " -> %llu type %s", d.v->d_inum,
-              d.v->d_type < BCH_DT_MAX
-              ? bch2_d_types[d.v->d_type]
-              : "(bad d_type)");
+       struct qstr d_name = bch2_dirent_get_name(d);
+
+       prt_printf(out, "%.*s -> %llu type %s",
+              d_name.len,
+              d_name.name,
+              d.v->d_type != DT_SUBVOL
+              ? le64_to_cpu(d.v->d_inum)
+              : le32_to_cpu(d.v->d_child_subvol),
+              bch2_d_type_str(d.v->d_type));
 }
 
 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
-                               u8 type, const struct qstr *name, u64 dst)
+                               subvol_inum dir, u8 type,
+                               const struct qstr *name, u64 dst)
 {
        struct bkey_i_dirent *dirent;
        unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
@@ -137,7 +187,14 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
 
        bkey_dirent_init(&dirent->k_i);
        dirent->k.u64s = u64s;
-       dirent->v.d_inum = cpu_to_le64(dst);
+
+       if (type != DT_SUBVOL) {
+               dirent->v.d_inum = cpu_to_le64(dst);
+       } else {
+               dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
+               dirent->v.d_child_subvol = cpu_to_le32(dst);
+       }
+
        dirent->v.d_type = type;
 
        memcpy(dirent->v.d_name, name->name, name->len);
@@ -159,7 +216,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
        struct bkey_i_dirent *dirent;
        int ret;
 
-       dirent = dirent_create_key(trans, type, name, dst_inum);
+       dirent = dirent_create_key(trans, dir, type, name, dst_inum);
        ret = PTR_ERR_OR_ZERO(dirent);
        if (ret)
                return ret;
@@ -178,45 +235,30 @@ static void dirent_copy_target(struct bkey_i_dirent *dst,
        dst->v.d_type = src.v->d_type;
 }
 
-int __bch2_dirent_read_target(struct btree_trans *trans,
-                             struct bkey_s_c_dirent d,
-                             u32 *subvol, u32 *snapshot, u64 *inum,
-                             bool is_fsck)
+int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
+                           struct bkey_s_c_dirent d, subvol_inum *target)
 {
        struct bch_subvolume s;
        int ret = 0;
 
-       *subvol         = 0;
-       *snapshot       = d.k->p.snapshot;
+       if (d.v->d_type == DT_SUBVOL &&
+           le32_to_cpu(d.v->d_parent_subvol) != dir.subvol)
+               return 1;
 
        if (likely(d.v->d_type != DT_SUBVOL)) {
-               *inum = le64_to_cpu(d.v->d_inum);
+               target->subvol  = dir.subvol;
+               target->inum    = le64_to_cpu(d.v->d_inum);
        } else {
-               *subvol = le64_to_cpu(d.v->d_inum);
+               target->subvol  = le32_to_cpu(d.v->d_child_subvol);
 
-               ret = bch2_subvolume_get(trans, *subvol, !is_fsck, BTREE_ITER_CACHED, &s);
+               ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_CACHED, &s);
 
-               *snapshot       = le32_to_cpu(s.snapshot);
-               *inum           = le64_to_cpu(s.inode);
+               target->inum    = le64_to_cpu(s.inode);
        }
 
        return ret;
 }
 
-static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
-                                  struct bkey_s_c_dirent d, subvol_inum *target)
-{
-       u32 snapshot;
-       int ret = 0;
-
-       ret = __bch2_dirent_read_target(trans, d, &target->subvol, &snapshot,
-                                       &target->inum, false);
-       if (!target->subvol)
-               target->subvol = dir.subvol;
-
-       return ret;
-}
-
 int bch2_dirent_rename(struct btree_trans *trans,
                subvol_inum src_dir, struct bch_hash_info *src_hash,
                subvol_inum dst_dir, struct bch_hash_info *dst_hash,
@@ -226,10 +268,11 @@ int bch2_dirent_rename(struct btree_trans *trans,
 {
        struct btree_iter src_iter = { NULL };
        struct btree_iter dst_iter = { NULL };
-       struct bkey_s_c old_src, old_dst;
+       struct bkey_s_c old_src, old_dst = bkey_s_c_null;
        struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
        struct bpos dst_pos =
                POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
+       unsigned src_type = 0, dst_type = 0, src_update_flags = 0;
        int ret = 0;
 
        if (src_dir.subvol != dst_dir.subvol)
@@ -238,36 +281,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
        memset(src_inum, 0, sizeof(*src_inum));
        memset(dst_inum, 0, sizeof(*dst_inum));
 
-       /*
-        * Lookup dst:
-        *
-        * Note that in BCH_RENAME mode, we're _not_ checking if
-        * the target already exists - we're relying on the VFS
-        * to do that check for us for correctness:
-        */
-       ret = mode == BCH_RENAME
-               ? bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
-                                dst_hash, dst_dir, dst_name)
-               : bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
-                                  dst_hash, dst_dir, dst_name,
-                                  BTREE_ITER_INTENT);
-       if (ret)
-               goto out;
-
-       old_dst = bch2_btree_iter_peek_slot(&dst_iter);
-       ret = bkey_err(old_dst);
-       if (ret)
-               goto out;
-
-       if (mode != BCH_RENAME) {
-               ret = bch2_dirent_read_target(trans, dst_dir,
-                               bkey_s_c_to_dirent(old_dst), dst_inum);
-               if (ret)
-                       goto out;
-       }
-       if (mode != BCH_RENAME_EXCHANGE)
-               *src_offset = dst_iter.pos.offset;
-
        /* Lookup src: */
        ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
                               src_hash, src_dir, src_name,
@@ -285,8 +298,51 @@ int bch2_dirent_rename(struct btree_trans *trans,
        if (ret)
                goto out;
 
+       src_type = bkey_s_c_to_dirent(old_src).v->d_type;
+
+       if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE)
+               return -EOPNOTSUPP;
+
+
+       /* Lookup dst: */
+       if (mode == BCH_RENAME) {
+               /*
+                * Note that we're _not_ checking if the target already exists -
+                * we're relying on the VFS to do that check for us for
+                * correctness:
+                */
+               ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
+                                    dst_hash, dst_dir, dst_name);
+               if (ret)
+                       goto out;
+       } else {
+               ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
+                                      dst_hash, dst_dir, dst_name,
+                                      BTREE_ITER_INTENT);
+               if (ret)
+                       goto out;
+
+               old_dst = bch2_btree_iter_peek_slot(&dst_iter);
+               ret = bkey_err(old_dst);
+               if (ret)
+                       goto out;
+
+               ret = bch2_dirent_read_target(trans, dst_dir,
+                               bkey_s_c_to_dirent(old_dst), dst_inum);
+               if (ret)
+                       goto out;
+
+               dst_type = bkey_s_c_to_dirent(old_dst).v->d_type;
+
+               if (dst_type == DT_SUBVOL)
+                       return -EOPNOTSUPP;
+       }
+
+       if (mode != BCH_RENAME_EXCHANGE)
+               *src_offset = dst_iter.pos.offset;
+
        /* Create new dst key: */
-       new_dst = dirent_create_key(trans, 0, dst_name, 0);
+       new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
        ret = PTR_ERR_OR_ZERO(new_dst);
        if (ret)
                goto out;
@@ -296,7 +352,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
 
        /* Create new src key: */
        if (mode == BCH_RENAME_EXCHANGE) {
-               new_src = dirent_create_key(trans, 0, src_name, 0);
+               new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
                ret = PTR_ERR_OR_ZERO(new_src);
                if (ret)
                        goto out;
@@ -312,8 +368,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
                bkey_init(&new_src->k);
                new_src->k.p = src_iter.pos;
 
-               if (bkey_cmp(dst_pos, src_iter.pos) <= 0 &&
-                   bkey_cmp(src_iter.pos, dst_iter.pos) < 0) {
+               if (bkey_le(dst_pos, src_iter.pos) &&
+                   bkey_lt(src_iter.pos, dst_iter.pos)) {
                        /*
                         * We have a hash collision for the new dst key,
                         * and new_src - the key we're deleting - is between
@@ -326,10 +382,9 @@ int bch2_dirent_rename(struct btree_trans *trans,
                                 * If we're not overwriting, we can just insert
                                 * new_dst at the src position:
                                 */
-                               new_dst->k.p = src_iter.pos;
-                               bch2_trans_update(trans, &src_iter,
-                                                 &new_dst->k_i, 0);
-                               goto out_set_offset;
+                               new_src = new_dst;
+                               new_src->k.p = src_iter.pos;
+                               goto out_set_src;
                        } else {
                                /* If we're overwriting, we can't insert new_dst
                                 * at a different slot because it has to
@@ -350,9 +405,29 @@ int bch2_dirent_rename(struct btree_trans *trans,
                }
        }
 
-       bch2_trans_update(trans, &src_iter, &new_src->k_i, 0);
-       bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
-out_set_offset:
+       ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
+       if (ret)
+               goto out;
+out_set_src:
+
+       /*
+        * If we're deleting a subvolume, we need to really delete the dirent,
+        * not just emit a whiteout in the current snapshot:
+        */
+       if (src_type == DT_SUBVOL) {
+               bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
+               ret = bch2_btree_iter_traverse(&src_iter);
+               if (ret)
+                       goto out;
+
+               new_src->k.p = src_iter.pos;
+               src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE;
+       }
+
+       ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
+       if (ret)
+               goto out;
+
        if (mode == BCH_RENAME_EXCHANGE)
                *src_offset = new_src->k.p.offset;
        *dst_offset = new_dst->k.p.offset;
@@ -385,14 +460,15 @@ int __bch2_dirent_lookup_trans(struct btree_trans *trans,
 
        k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
-       if (ret) {
-               bch2_trans_iter_exit(trans, iter);
-               return ret;
-       }
+       if (ret)
+               goto err;
 
        d = bkey_s_c_to_dirent(k);
 
        ret = bch2_dirent_read_target(trans, dir, d, inum);
+       if (ret > 0)
+               ret = -ENOENT;
+err:
        if (ret)
                bch2_trans_iter_exit(trans, iter);
 
@@ -413,10 +489,10 @@ retry:
 
        ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
                                          name, inum, 0);
-
-       bch2_trans_iter_exit(&trans, &iter);
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
+       if (!ret)
+               bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -432,16 +508,13 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
        if (ret)
                return ret;
 
-       for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents,
-                          SPOS(dir.inum, 0, snapshot), 0, k, ret) {
-               if (k.k->p.inode > dir.inum)
-                       break;
-
+       for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
+                          SPOS(dir.inum, 0, snapshot),
+                          POS(dir.inum, U64_MAX), 0, k, ret)
                if (k.k->type == KEY_TYPE_dirent) {
                        ret = -ENOTEMPTY;
                        break;
                }
-       }
        bch2_trans_iter_exit(trans, &iter);
 
        return ret;
@@ -453,9 +526,13 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent dirent;
+       subvol_inum target;
        u32 snapshot;
+       struct bkey_buf sk;
+       struct qstr name;
        int ret;
 
+       bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
@@ -464,34 +541,50 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents,
-                          SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) {
-               if (k.k->p.inode > inum.inum)
-                       break;
-
+       for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
+                          SPOS(inum.inum, ctx->pos, snapshot),
+                          POS(inum.inum, U64_MAX), 0, k, ret) {
                if (k.k->type != KEY_TYPE_dirent)
                        continue;
 
                dirent = bkey_s_c_to_dirent(k);
 
-               /*
-                * XXX: dir_emit() can fault and block, while we're holding
-                * locks
-                */
+               ret = bch2_dirent_read_target(&trans, inum, dirent, &target);
+               if (ret < 0)
+                       break;
+               if (ret)
+                       continue;
+
+               /* dir_emit() can fault and block: */
+               bch2_bkey_buf_reassemble(&sk, c, k);
+               dirent = bkey_i_to_s_c_dirent(sk.k);
+               bch2_trans_unlock(&trans);
+
+               name = bch2_dirent_get_name(dirent);
+
                ctx->pos = dirent.k->p.offset;
-               if (!dir_emit(ctx, dirent.v->d_name,
-                             bch2_dirent_name_bytes(dirent),
-                             le64_to_cpu(dirent.v->d_inum),
+               if (!dir_emit(ctx, name.name,
+                             name.len,
+                             target.inum,
                              vfs_d_type(dirent.v->d_type)))
                        break;
                ctx->pos = dirent.k->p.offset + 1;
+
+               /*
+                * read_target looks up subvolumes, we can overflow paths if the
+                * directory has many subvolumes in it
+                */
+               ret = btree_trans_too_many_iters(&trans);
+               if (ret)
+                       break;
        }
        bch2_trans_iter_exit(&trans, &iter);
 err:
-       if (ret == -EINTR)
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                goto retry;
 
        bch2_trans_exit(&trans);
+       bch2_bkey_buf_exit(&sk, c);
 
        return ret;
 }