]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 940d6ca657 bcachefs: acl code improvements
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 13 Jul 2018 04:43:23 +0000 (00:43 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 13 Jul 2018 04:45:28 +0000 (00:45 -0400)
27 files changed:
.bcachefs_revision
cmd_migrate.c
include/linux/dcache.h
include/linux/kernel.h
libbcachefs/acl.c
libbcachefs/acl.h
libbcachefs/bkey.h
libbcachefs/bset.c
libbcachefs/bset.h
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_interior.h
libbcachefs/btree_update_leaf.c
libbcachefs/dirent.c
libbcachefs/extents.c
libbcachefs/fs-io.c
libbcachefs/fsck.c
libbcachefs/journal_seq_blacklist.c
libbcachefs/tests.c
libbcachefs/xattr.c
libbcachefs/xattr.h

index a8916efbd2e823b87b069b898bff4326ce5aae95..f1807172b5f87fde0f5116607fa728cb1b60b0ae 100644 (file)
@@ -1 +1 @@
-2cb70a82bc0ca05d8c3cf666d221badd5724e339
+940d6ca657ea70758f3f43323bfd531019a40d3c
index db20b71c15dfe811547bec097f60430c129b450e..6186653427e7449a18cce66d9c3e77b7aef31297 100644 (file)
@@ -239,8 +239,8 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
 
                const struct xattr_handler *h = xattr_resolve_name(&attr);
 
-               int ret = __bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
-                                          val, val_size, 0, h->flags, NULL);
+               int ret = bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
+                                        val, val_size, 0, h->flags, NULL);
                if (ret < 0)
                        die("error creating xattr: %s", strerror(-ret));
        }
index 15b803eadc72c64815ac15f7e8679d71d1672fde..7637854db3e4a26d9ee479902a22ffeb04038509 100644 (file)
@@ -4,25 +4,6 @@
 struct super_block;
 struct inode;
 
-/* The hash is always the low bits of hash_len */
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- #define HASH_LEN_DECLARE u32 hash; u32 len
-#else
- #define HASH_LEN_DECLARE u32 len; u32 hash
-#endif
-
-struct qstr {
-       union {
-               struct {
-                       HASH_LEN_DECLARE;
-               };
-               u64 hash_len;
-       };
-       const unsigned char *name;
-};
-
-#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
-
 struct dentry {
        struct super_block *d_sb;
        struct inode *d_inode;
index b6afea43750e00b96a8b55ec81b56cf688e854a7..a4c8149eb2f3826d142a8c4b0172609889eb07db 100644 (file)
@@ -222,4 +222,23 @@ static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *
         BUILD_BUG_ON_ZERO((perms) & 2) +                                       \
         (perms))
 
+/* The hash is always the low bits of hash_len */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define HASH_LEN_DECLARE u32 hash; u32 len
+#else
+ #define HASH_LEN_DECLARE u32 len; u32 hash
+#endif
+
+struct qstr {
+       union {
+               struct {
+                       HASH_LEN_DECLARE;
+               };
+               u64 hash_len;
+       };
+       const unsigned char *name;
+};
+
+#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
+
 #endif
index 29774e5d94f9fd4c04c4fd8ff46276996c1755bb..a8735bc04b4d04e4c3508d4e93792f47de2d4123 100644 (file)
 #include "fs.h"
 #include "xattr.h"
 
+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
+{
+       return sizeof(bch_acl_header) +
+               sizeof(bch_acl_entry_short) * nr_short +
+               sizeof(bch_acl_entry) * nr_long;
+}
+
+static inline int acl_to_xattr_type(int type)
+{
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               return BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
+       case ACL_TYPE_DEFAULT:
+               return BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
+       default:
+               BUG();
+       }
+}
+
 /*
  * Convert from filesystem to in-memory representation.
  */
 static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size)
 {
-       const char *end = (char *)value + size;
-       int n, count;
+       const void *p, *end = value + size;
        struct posix_acl *acl;
+       struct posix_acl_entry *out;
+       unsigned count = 0;
 
        if (!value)
                return NULL;
        if (size < sizeof(bch_acl_header))
-               return ERR_PTR(-EINVAL);
+               goto invalid;
        if (((bch_acl_header *)value)->a_version !=
            cpu_to_le32(BCH_ACL_VERSION))
-               return ERR_PTR(-EINVAL);
-       value = (char *)value + sizeof(bch_acl_header);
-       count = bch2_acl_count(size);
-       if (count < 0)
-               return ERR_PTR(-EINVAL);
-       if (count == 0)
+               goto invalid;
+
+       p = value + sizeof(bch_acl_header);
+       while (p < end) {
+               const bch_acl_entry *entry = p;
+
+               if (p + sizeof(bch_acl_entry_short) > end)
+                       goto invalid;
+
+               switch (le16_to_cpu(entry->e_tag)) {
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       p += sizeof(bch_acl_entry_short);
+                       break;
+               case ACL_USER:
+               case ACL_GROUP:
+                       p += sizeof(bch_acl_entry);
+                       break;
+               default:
+                       goto invalid;
+               }
+
+               count++;
+       }
+
+       if (p > end)
+               goto invalid;
+
+       if (!count)
                return NULL;
+
        acl = posix_acl_alloc(count, GFP_KERNEL);
        if (!acl)
                return ERR_PTR(-ENOMEM);
-       for (n = 0; n < count; n++) {
-               bch_acl_entry *entry =
-                       (bch_acl_entry *)value;
-               if ((char *)value + sizeof(bch_acl_entry_short) > end)
-                       goto fail;
-               acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
-               acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
-               switch (acl->a_entries[n].e_tag) {
+
+       out = acl->a_entries;
+
+       p = value + sizeof(bch_acl_header);
+       while (p < end) {
+               const bch_acl_entry *in = p;
+
+               out->e_tag  = le16_to_cpu(in->e_tag);
+               out->e_perm = le16_to_cpu(in->e_perm);
+
+               switch (out->e_tag) {
                case ACL_USER_OBJ:
                case ACL_GROUP_OBJ:
                case ACL_MASK:
                case ACL_OTHER:
-                       value = (char *)value +
-                               sizeof(bch_acl_entry_short);
+                       p += sizeof(bch_acl_entry_short);
                        break;
-
                case ACL_USER:
-                       value = (char *)value + sizeof(bch_acl_entry);
-                       if ((char *)value > end)
-                               goto fail;
-                       acl->a_entries[n].e_uid =
-                               make_kuid(&init_user_ns,
-                                         le32_to_cpu(entry->e_id));
+                       out->e_uid = make_kuid(&init_user_ns,
+                                              le32_to_cpu(in->e_id));
+                       p += sizeof(bch_acl_entry);
                        break;
                case ACL_GROUP:
-                       value = (char *)value + sizeof(bch_acl_entry);
-                       if ((char *)value > end)
-                               goto fail;
-                       acl->a_entries[n].e_gid =
-                               make_kgid(&init_user_ns,
-                                         le32_to_cpu(entry->e_id));
+                       out->e_gid = make_kgid(&init_user_ns,
+                                              le32_to_cpu(in->e_id));
+                       p += sizeof(bch_acl_entry);
                        break;
-
-               default:
-                       goto fail;
                }
+
+               out++;
        }
-       if (value != end)
-               goto fail;
-       return acl;
 
-fail:
-       posix_acl_release(acl);
+       BUG_ON(out != acl->a_entries + acl->a_count);
+
+       return acl;
+invalid:
+       pr_err("invalid acl entry");
        return ERR_PTR(-EINVAL);
 }
 
+#define acl_for_each_entry(acl, acl_e)                 \
+       for (acl_e = acl->a_entries;                    \
+            acl_e < acl->a_entries + acl->a_count;     \
+            acl_e++)
+
 /*
  * Convert from in-memory to filesystem representation.
  */
-static void *bch2_acl_to_disk(const struct posix_acl *acl, size_t *size)
+static struct bkey_i_xattr *
+bch2_acl_to_xattr(const struct posix_acl *acl,
+                 int type)
 {
-       bch_acl_header *ext_acl;
-       char *e;
-       size_t n;
-
-       *size = bch2_acl_size(acl->a_count);
-       ext_acl = kmalloc(sizeof(bch_acl_header) + acl->a_count *
-                       sizeof(bch_acl_entry), GFP_KERNEL);
-       if (!ext_acl)
-               return ERR_PTR(-ENOMEM);
-       ext_acl->a_version = cpu_to_le32(BCH_ACL_VERSION);
-       e = (char *)ext_acl + sizeof(bch_acl_header);
-       for (n = 0; n < acl->a_count; n++) {
-               const struct posix_acl_entry *acl_e = &acl->a_entries[n];
-               bch_acl_entry *entry = (bch_acl_entry *)e;
+       struct bkey_i_xattr *xattr;
+       bch_acl_header *acl_header;
+       const struct posix_acl_entry *acl_e;
+       void *outptr;
+       unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
+
+       acl_for_each_entry(acl, acl_e) {
+               switch (acl_e->e_tag) {
+               case ACL_USER:
+               case ACL_GROUP:
+                       nr_long++;
+                       break;
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       nr_short++;
+                       break;
+               default:
+                       return ERR_PTR(-EINVAL);
+               }
+       }
+
+       acl_len = bch2_acl_size(nr_short, nr_long);
+       u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
+
+       if (u64s > U8_MAX)
+               return ERR_PTR(-E2BIG);
+
+       xattr = kmalloc(u64s * sizeof(u64), GFP_KERNEL);
+       if (IS_ERR(xattr))
+               return xattr;
+
+       bkey_xattr_init(&xattr->k_i);
+       xattr->k.u64s           = u64s;
+       xattr->v.x_type         = acl_to_xattr_type(type);
+       xattr->v.x_name_len     = 0,
+       xattr->v.x_val_len      = cpu_to_le16(acl_len);
+
+       acl_header = xattr_val(&xattr->v);
+       acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
+
+       outptr = (void *) acl_header + sizeof(*acl_header);
+
+       acl_for_each_entry(acl, acl_e) {
+               bch_acl_entry *entry = outptr;
 
                entry->e_tag = cpu_to_le16(acl_e->e_tag);
                entry->e_perm = cpu_to_le16(acl_e->e_perm);
@@ -109,70 +188,54 @@ static void *bch2_acl_to_disk(const struct posix_acl *acl, size_t *size)
                case ACL_USER:
                        entry->e_id = cpu_to_le32(
                                from_kuid(&init_user_ns, acl_e->e_uid));
-                       e += sizeof(bch_acl_entry);
+                       outptr += sizeof(bch_acl_entry);
                        break;
                case ACL_GROUP:
                        entry->e_id = cpu_to_le32(
                                from_kgid(&init_user_ns, acl_e->e_gid));
-                       e += sizeof(bch_acl_entry);
+                       outptr += sizeof(bch_acl_entry);
                        break;
 
                case ACL_USER_OBJ:
                case ACL_GROUP_OBJ:
                case ACL_MASK:
                case ACL_OTHER:
-                       e += sizeof(bch_acl_entry_short);
+                       outptr += sizeof(bch_acl_entry_short);
                        break;
-
-               default:
-                       goto fail;
                }
        }
-       return (char *)ext_acl;
 
-fail:
-       kfree(ext_acl);
-       return ERR_PTR(-EINVAL);
+       BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
+
+       return xattr;
 }
 
 struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
 {
        struct bch_inode_info *inode = to_bch_ei(vinode);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       int name_index;
-       char *value = NULL;
-       struct posix_acl *acl;
-       int ret;
-
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               name_index = BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
-               break;
-       case ACL_TYPE_DEFAULT:
-               name_index = BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
-               break;
-       default:
-               BUG();
+       struct btree_iter iter;
+       struct bkey_s_c_xattr xattr;
+       struct bkey_s_c k;
+       struct posix_acl *acl = NULL;
+       int name_index = acl_to_xattr_type(type);
+
+       k = bch2_xattr_get_iter(c, &iter, inode, "", name_index);
+       if (IS_ERR(k.k)) {
+               if (PTR_ERR(k.k) != -ENOENT)
+                       acl = ERR_CAST(k.k);
+               goto out;
        }
-       ret = bch2_xattr_get(c, inode, "", NULL, 0, name_index);
-       if (ret > 0) {
-               value = kmalloc(ret, GFP_KERNEL);
-               if (!value)
-                       return ERR_PTR(-ENOMEM);
-               ret = bch2_xattr_get(c, inode, "", value,
-                                   ret, name_index);
-       }
-       if (ret > 0)
-               acl = bch2_acl_from_disk(value, ret);
-       else if (ret == -ENODATA || ret == -ENOSYS)
-               acl = NULL;
-       else
-               acl = ERR_PTR(ret);
-       kfree(value);
+
+       xattr = bkey_s_c_to_xattr(k);
+
+       acl = bch2_acl_from_disk(xattr_val(xattr.v),
+                       le16_to_cpu(xattr.v->x_val_len));
 
        if (!IS_ERR(acl))
                set_cached_acl(&inode->v, type, acl);
-
+out:
+       bch2_btree_iter_unlock(&iter);
        return acl;
 }
 
@@ -180,37 +243,31 @@ int __bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
 {
        struct bch_inode_info *inode = to_bch_ei(vinode);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       int name_index;
-       void *value = NULL;
-       size_t size = 0;
        int ret;
 
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               name_index = BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
-               break;
-       case ACL_TYPE_DEFAULT:
-               name_index = BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
-               if (!S_ISDIR(inode->v.i_mode))
-                       return acl ? -EACCES : 0;
-               break;
-
-       default:
-               return -EINVAL;
-       }
+       if (type == ACL_TYPE_DEFAULT &&
+           !S_ISDIR(inode->v.i_mode))
+               return acl ? -EACCES : 0;
 
        if (acl) {
-               value = bch2_acl_to_disk(acl, &size);
-               if (IS_ERR(value))
-                       return (int)PTR_ERR(value);
+               struct bkey_i_xattr *xattr =
+                       bch2_acl_to_xattr(acl, type);
+               if (IS_ERR(xattr))
+                       return PTR_ERR(xattr);
+
+               ret = bch2_hash_set(bch2_xattr_hash_desc, &inode->ei_str_hash,
+                                   c, inode->v.i_ino, &inode->ei_journal_seq,
+                                   &xattr->k_i, 0);
+               kfree(xattr);
+       } else {
+               struct xattr_search_key search =
+                       X_SEARCH(acl_to_xattr_type(type), "", 0);
+
+               ret = bch2_hash_delete(bch2_xattr_hash_desc, &inode->ei_str_hash,
+                                      c, inode->v.i_ino, &inode->ei_journal_seq,
+                                      &search);
        }
 
-       ret = bch2_xattr_set(c, inode, "", value, size, 0, name_index);
-       kfree(value);
-
-       if (ret == -ERANGE)
-               ret = -E2BIG;
-
        if (!ret)
                set_cached_acl(&inode->v, type, acl);
 
index a66338d4171ee8303ab651fbc3c8a9650a0afa7c..0be31ee9e59d3270f0e563d403b66e242954c3ab 100644 (file)
@@ -20,35 +20,6 @@ typedef struct {
        __le32          a_version;
 } bch_acl_header;
 
-static inline size_t bch2_acl_size(int count)
-{
-       if (count <= 4) {
-               return sizeof(bch_acl_header) +
-                      count * sizeof(bch_acl_entry_short);
-       } else {
-               return sizeof(bch_acl_header) +
-                      4 * sizeof(bch_acl_entry_short) +
-                      (count - 4) * sizeof(bch_acl_entry);
-       }
-}
-
-static inline int bch2_acl_count(size_t size)
-{
-       ssize_t s;
-
-       size -= sizeof(bch_acl_header);
-       s = size - 4 * sizeof(bch_acl_entry_short);
-       if (s < 0) {
-               if (size % sizeof(bch_acl_entry_short))
-                       return -1;
-               return size / sizeof(bch_acl_entry_short);
-       } else {
-               if (s % sizeof(bch_acl_entry))
-                       return -1;
-               return s / sizeof(bch_acl_entry) + 4;
-       }
-}
-
 struct posix_acl;
 
 extern struct posix_acl *bch2_get_acl(struct inode *, int);
index 2d6c8a230a735c84e05dff7598f6c93bac601f8a..2f62bd8e32582c570df0bd4fcb87d4cd9494ed09 100644 (file)
@@ -52,21 +52,6 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
        k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
 }
 
-/*
- * Mark a key as deleted without changing the size of the value (i.e. modifying
- * keys in the btree in place)
- */
-static inline void __set_bkey_deleted(struct bkey *k)
-{
-       k->type = KEY_TYPE_DELETED;
-}
-
-static inline void set_bkey_deleted(struct bkey *k)
-{
-       __set_bkey_deleted(k);
-       set_bkey_val_u64s(k, 0);
-}
-
 #define bkey_deleted(_k)       ((_k)->type == KEY_TYPE_DELETED)
 
 #define bkey_whiteout(_k)                              \
@@ -284,6 +269,16 @@ static inline struct bpos bkey_successor(struct bpos p)
        return ret;
 }
 
+static inline struct bpos bkey_predecessor(struct bpos p)
+{
+       struct bpos ret = p;
+
+       if (!ret.offset--)
+               BUG_ON(!ret.inode--);
+
+       return ret;
+}
+
 static inline u64 bkey_start_offset(const struct bkey *k)
 {
        return k->p.offset - k->size;
index 9a27477409bad42ff5c70e214819f11c9f896d38..5c77787214c77e145ce1a62d6c6e00d1c81171ee 100644 (file)
@@ -987,6 +987,10 @@ void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
        set_btree_bset(b, t, i);
 }
 
+/*
+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the
+ * immediate predecessor:
+ */
 static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
                                       struct bkey_packed *k)
 {
@@ -1025,40 +1029,31 @@ static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
        return p;
 }
 
-struct bkey_packed *bch2_bkey_prev_all(struct btree *b, struct bset_tree *t,
-                                      struct bkey_packed *k)
-{
-       struct bkey_packed *p;
-
-       p = __bkey_prev(b, t, k);
-       if (!p)
-               return NULL;
-
-       while (bkey_next(p) != k)
-               p = bkey_next(p);
-
-       return p;
-}
-
-struct bkey_packed *bch2_bkey_prev(struct btree *b, struct bset_tree *t,
-                                  struct bkey_packed *k)
+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
+                                         struct bset_tree *t,
+                                         struct bkey_packed *k,
+                                         unsigned min_key_type)
 {
-       while (1) {
-               struct bkey_packed *p, *i, *ret = NULL;
-
-               p = __bkey_prev(b, t, k);
-               if (!p)
-                       return NULL;
+       struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
 
+       while ((p = __bkey_prev(b, t, k)) && !ret) {
                for (i = p; i != k; i = bkey_next(i))
-                       if (!bkey_deleted(i))
+                       if (i->type >= min_key_type)
                                ret = i;
 
-               if (ret)
-                       return ret;
-
                k = p;
        }
+
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+               BUG_ON(ret >= orig_k);
+
+               for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t);
+                    i != orig_k;
+                    i = bkey_next(i))
+                       BUG_ON(i->type >= min_key_type);
+       }
+
+       return ret;
 }
 
 /* Insert */
@@ -1677,7 +1672,7 @@ void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
 #endif
 }
 
-static inline bool __btree_node_iter_used(struct btree_node_iter *iter)
+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
 {
        unsigned n = ARRAY_SIZE(iter->data);
 
@@ -1690,67 +1685,66 @@ static inline bool __btree_node_iter_used(struct btree_node_iter *iter)
 /*
  * Expensive:
  */
-struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
-                                                 struct btree *b)
+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
+                                                    struct btree *b,
+                                                    unsigned min_key_type)
 {
        struct bkey_packed *k, *prev = NULL;
+       struct bkey_packed *orig_pos = bch2_btree_node_iter_peek_all(iter, b);
        struct btree_node_iter_set *set;
        struct bset_tree *t;
-       struct bset_tree *prev_t;
-       unsigned end, used;
+       unsigned end;
 
        bch2_btree_node_iter_verify(iter, b);
 
        for_each_bset(b, t) {
-               k = bch2_bkey_prev_all(b, t,
-                       bch2_btree_node_iter_bset_pos(iter, b, t));
+               k = bch2_bkey_prev_filter(b, t,
+                       bch2_btree_node_iter_bset_pos(iter, b, t),
+                       min_key_type);
                if (k &&
                    (!prev || __btree_node_iter_cmp(iter->is_extents, b,
                                                    k, prev) > 0)) {
                        prev = k;
-                       prev_t = t;
+                       end = t->end_offset;
                }
        }
 
        if (!prev)
-               return NULL;
+               goto out;
 
        /*
         * We're manually memmoving instead of just calling sort() to ensure the
         * prev we picked ends up in slot 0 - sort won't necessarily put it
         * there because of duplicate deleted keys:
         */
-       end = __btree_node_key_to_offset(b, btree_bkey_last(b, prev_t));
        btree_node_iter_for_each(iter, set)
-               if (set->end == end) {
-                       memmove(&iter->data[1],
-                               &iter->data[0],
-                               (void *) set - (void *) &iter->data[0]);
-                       goto out;
-               }
+               if (set->end == end)
+                       goto found;
 
-       used = __btree_node_iter_used(iter);
-       BUG_ON(used >= ARRAY_SIZE(iter->data));
+       BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
+found:
+       BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
 
        memmove(&iter->data[1],
                &iter->data[0],
-               (void *) &iter->data[used] - (void *) &iter->data[0]);
-out:
+               (void *) set - (void *) &iter->data[0]);
+
        iter->data[0].k = __btree_node_key_to_offset(b, prev);
        iter->data[0].end = end;
-       return prev;
-}
+out:
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+               struct btree_node_iter iter2 = *iter;
 
-struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *iter,
-                                             struct btree *b)
-{
-       struct bkey_packed *k;
+               if (prev)
+                       bch2_btree_node_iter_advance(&iter2, b);
 
-       do {
-               k = bch2_btree_node_iter_prev_all(iter, b);
-       } while (k && bkey_deleted(k));
+               while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) {
+                       BUG_ON(k->type >= min_key_type);
+                       bch2_btree_node_iter_advance(&iter2, b);
+               }
+       }
 
-       return k;
+       return prev;
 }
 
 struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
index 153e2b3f787f8263cfeb3cbe7cd9f2e77fd60f80..296c05b4f07a1fd675014f21987144e2a35d5e61 100644 (file)
@@ -393,10 +393,21 @@ static inline bool btree_iter_pos_cmp_p_or_unp(const struct btree *b,
 }
 
 struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
-struct bkey_packed *bch2_bkey_prev_all(struct btree *, struct bset_tree *,
-                                 struct bkey_packed *);
-struct bkey_packed *bch2_bkey_prev(struct btree *, struct bset_tree *,
-                                  struct bkey_packed *);
+
+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
+                                         struct bkey_packed *, unsigned);
+
+static inline struct bkey_packed *
+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
+{
+       return bch2_bkey_prev_filter(b, t, k, 0);
+}
+
+static inline struct bkey_packed *
+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
+{
+       return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_DISCARD + 1);
+}
 
 enum bch_extent_overlap {
        BCH_EXTENT_OVERLAP_ALL          = 0,
@@ -471,9 +482,11 @@ static inline int __btree_node_iter_cmp(bool is_extents,
         * For extents, bkey_deleted() is used as a proxy for k->size == 0, so
         * deleted keys have to sort last.
         */
-       return bkey_cmp_packed(b, l, r) ?: is_extents
-               ? (int) bkey_deleted(l) - (int) bkey_deleted(r)
-               : (int) bkey_deleted(r) - (int) bkey_deleted(l);
+       return bkey_cmp_packed(b, l, r)
+               ?: (is_extents
+                   ? (int) bkey_deleted(l) - (int) bkey_deleted(r)
+                   : (int) bkey_deleted(r) - (int) bkey_deleted(l))
+               ?: (l > r) - (l < r);
 }
 
 static inline int btree_node_iter_cmp(struct btree_node_iter *iter,
@@ -512,25 +525,34 @@ __bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
        return __btree_node_offset_to_key(b, iter->data->k);
 }
 
+static inline struct bkey_packed *
+bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter,
+                                struct btree *b,
+                                unsigned min_key_type)
+{
+       while (!bch2_btree_node_iter_end(iter)) {
+               struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b);
+
+               if (k->type >= min_key_type)
+                       return k;
+
+               bch2_btree_node_iter_advance(iter, b);
+       }
+
+       return NULL;
+}
+
 static inline struct bkey_packed *
 bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
                              struct btree *b)
 {
-       return bch2_btree_node_iter_end(iter)
-               ? NULL
-               : __bch2_btree_node_iter_peek_all(iter, b);
+       return bch2_btree_node_iter_peek_filter(iter, b, 0);
 }
 
 static inline struct bkey_packed *
 bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
 {
-       struct bkey_packed *ret;
-
-       while ((ret = bch2_btree_node_iter_peek_all(iter, b)) &&
-              bkey_deleted(ret))
-               bch2_btree_node_iter_advance(iter, b);
-
-       return ret;
+       return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_DISCARD + 1);
 }
 
 static inline struct bkey_packed *
@@ -544,10 +566,20 @@ bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
        return ret;
 }
 
-struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
-                                                struct btree *);
-struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *,
-                                            struct btree *);
+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *,
+                                                    struct btree *, unsigned);
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b)
+{
+       return bch2_btree_node_iter_prev_filter(iter, b, 0);
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
+{
+       return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
+}
 
 /*
  * Iterates over all _live_ keys - skipping deleted (and potentially
index b0dc4c8a85cb0edf3360a02934857e403422d5e2..f15a415e37e359951c64649d4bfdde335ec0b161 100644 (file)
@@ -577,10 +577,11 @@ err:
 
 /* Slowpath, don't want it inlined into btree_iter_traverse() */
 static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
-                                                  struct btree_iter *iter,
-                                                  const struct bkey_i *k,
-                                                  unsigned level,
-                                                  enum six_lock_type lock_type)
+                               struct btree_iter *iter,
+                               const struct bkey_i *k,
+                               unsigned level,
+                               enum six_lock_type lock_type,
+                               bool sync)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
@@ -590,6 +591,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
         * been freed:
         */
        BUG_ON(!btree_node_locked(iter, level + 1));
+       BUG_ON(level >= BTREE_MAX_DEPTH);
 
        b = bch2_btree_node_mem_alloc(c);
        if (IS_ERR(b))
@@ -623,9 +625,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
        if (btree_node_read_locked(iter, level + 1))
                btree_node_unlock(iter, level + 1);
 
-       bch2_btree_node_read(c, b, true);
+       bch2_btree_node_read(c, b, sync);
+
        six_unlock_write(&b->lock);
 
+       if (!sync) {
+               six_unlock_intent(&b->lock);
+               return NULL;
+       }
+
        if (lock_type == SIX_LOCK_read)
                six_lock_downgrade(&b->lock);
 
@@ -643,7 +651,8 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
  */
 struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
                                  const struct bkey_i *k, unsigned level,
-                                 enum six_lock_type lock_type)
+                                 enum six_lock_type lock_type,
+                                 bool may_drop_locks)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
@@ -670,7 +679,7 @@ retry:
                 * else we could read in a btree node from disk that's been
                 * freed:
                 */
-               b = bch2_btree_node_fill(c, iter, k, level, lock_type);
+               b = bch2_btree_node_fill(c, iter, k, level, lock_type, true);
 
                /* We raced and found the btree node in the cache */
                if (!b)
@@ -710,7 +719,8 @@ retry:
                if (btree_node_read_locked(iter, level + 1))
                        btree_node_unlock(iter, level + 1);
 
-               if (!btree_node_lock(b, k->k.p, level, iter, lock_type))
+               if (!btree_node_lock(b, k->k.p, level, iter,
+                                    lock_type, may_drop_locks))
                        return ERR_PTR(-EINTR);
 
                if (unlikely(PTR_HASH(&b->key) != PTR_HASH(k) ||
@@ -778,18 +788,17 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
        k = bch2_btree_node_iter_peek_all(&node_iter, parent);
        BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
 
-       do {
-               k = sib == btree_prev_sib
-                       ? bch2_btree_node_iter_prev_all(&node_iter, parent)
-                       : (bch2_btree_node_iter_advance(&node_iter, parent),
-                          bch2_btree_node_iter_peek_all(&node_iter, parent));
-               if (!k)
-                       goto out;
-       } while (bkey_deleted(k));
+       k = sib == btree_prev_sib
+               ? bch2_btree_node_iter_prev(&node_iter, parent)
+               : (bch2_btree_node_iter_advance(&node_iter, parent),
+                  bch2_btree_node_iter_peek(&node_iter, parent));
+       if (!k)
+               goto out;
 
        bch2_bkey_unpack(parent, &tmp.k, k);
 
-       ret = bch2_btree_node_get(c, iter, &tmp.k, level, SIX_LOCK_intent);
+       ret = bch2_btree_node_get(c, iter, &tmp.k, level,
+                                 SIX_LOCK_intent, may_drop_locks);
 
        if (PTR_ERR_OR_ZERO(ret) == -EINTR && may_drop_locks) {
                struct btree_iter *linked;
@@ -809,7 +818,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                        btree_node_unlock(iter, level);
 
                ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-                                         SIX_LOCK_intent);
+                                         SIX_LOCK_intent, may_drop_locks);
 
                /*
                 * before btree_iter_relock() calls btree_iter_verify_locks():
@@ -838,20 +847,32 @@ out:
               (iter->uptodate >= BTREE_ITER_NEED_RELOCK ||
                !btree_node_locked(iter, level)));
 
+       if (!IS_ERR_OR_NULL(ret)) {
+               struct btree *n1 = ret, *n2 = b;
+
+               if (sib != btree_prev_sib)
+                       swap(n1, n2);
+
+               BUG_ON(bkey_cmp(btree_type_successor(n1->btree_id,
+                                                    n1->key.k.p),
+                               n2->data->min_key));
+       }
+
        return ret;
 out_upgrade:
        if (may_drop_locks)
-               bch2_btree_iter_upgrade(iter, level + 2);
+               bch2_btree_iter_upgrade(iter, level + 2, true);
        ret = ERR_PTR(-EINTR);
        goto out;
 }
 
-void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
-                             unsigned level, enum btree_id btree_id)
+void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
+                             const struct bkey_i *k, unsigned level)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
 
+       BUG_ON(!btree_node_locked(iter, level + 1));
        BUG_ON(level >= BTREE_MAX_DEPTH);
 
        rcu_read_lock();
@@ -861,27 +882,7 @@ void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
        if (b)
                return;
 
-       b = bch2_btree_node_mem_alloc(c);
-       if (IS_ERR(b))
-               return;
-
-       bkey_copy(&b->key, k);
-       if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
-               /* raced with another fill: */
-
-               /* mark as unhashed... */
-               bkey_i_to_extent(&b->key)->v._data[0] = 0;
-
-               mutex_lock(&bc->lock);
-               list_add(&b->list, &bc->freeable);
-               mutex_unlock(&bc->lock);
-               goto out;
-       }
-
-       bch2_btree_node_read(c, b, false);
-out:
-       six_unlock_write(&b->lock);
-       six_unlock_intent(&b->lock);
+       bch2_btree_node_fill(c, iter, k, level, SIX_LOCK_read, false);
 }
 
 int bch2_print_btree_node(struct bch_fs *c, struct btree *b,
index 43109d086479d3775e5fd91947747b434bdda788..96d134f4d0fc5ae4b1652fc024f66711f5f3651f 100644 (file)
@@ -23,14 +23,14 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
 
 struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
                                  const struct bkey_i *, unsigned,
-                                 enum six_lock_type);
+                                 enum six_lock_type, bool);
 
 struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
                                          struct btree *, bool,
                                          enum btree_node_sibling);
 
-void bch2_btree_node_prefetch(struct bch_fs *, const struct bkey_i *,
-                             unsigned, enum btree_id);
+void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
+                             const struct bkey_i *, unsigned);
 
 void bch2_fs_btree_cache_exit(struct bch_fs *);
 int bch2_fs_btree_cache_init(struct bch_fs *);
index 0c825bcbc45ca1331b41fb487b92561bd727f5ea..847dfd685eacda5af224a19b98ac7f10c272b767 100644 (file)
@@ -1547,7 +1547,7 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
 
        __bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p,
                               BTREE_MAX_DEPTH,
-                              b->level, 0);
+                              b->level, BTREE_ITER_NODES);
 retry:
        ret = bch2_btree_iter_traverse(&iter);
        if (ret)
index 682a91434775a5150d4a0d1562b5bb42020e67ec..097b68e073992dc7887508bf98f21e2e657e8d09 100644 (file)
@@ -18,7 +18,9 @@ static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *,
 
 static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
 {
-       return iter->l[l].b && iter->l[l].b != BTREE_ITER_NOT_END;
+       return l < BTREE_MAX_DEPTH &&
+               iter->l[l].b &&
+               iter->l[l].b != BTREE_ITER_NOT_END;
 }
 
 /* Btree node locking: */
@@ -88,10 +90,10 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter,
 
 bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
 {
-       struct btree *b = iter->l[level].b;
+       struct btree *b = btree_iter_node(iter, level);
        int want = __btree_lock_want(iter, level);
 
-       if (!is_btree_node(iter, level))
+       if (!b || b == BTREE_ITER_NOT_END)
                return false;
 
        if (race_fault())
@@ -115,12 +117,12 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
        if (!is_btree_node(iter, level))
                return false;
 
-       if (race_fault())
-               return false;
-
        if (btree_node_intent_locked(iter, level))
                return true;
 
+       if (race_fault())
+               return false;
+
        if (btree_node_locked(iter, level)
            ? six_lock_tryupgrade(&b->lock)
            : six_relock_type(&b->lock, SIX_LOCK_intent, iter->lock_seq[level]))
@@ -180,7 +182,8 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
 bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                           unsigned level,
                           struct btree_iter *iter,
-                          enum six_lock_type type)
+                          enum six_lock_type type,
+                          bool may_drop_locks)
 {
        struct bch_fs *c = iter->c;
        struct btree_iter *linked;
@@ -231,10 +234,12 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                 */
                if (type == SIX_LOCK_intent &&
                    linked->nodes_locked != linked->nodes_intent_locked) {
-                       linked->locks_want = max_t(unsigned,
-                                       linked->locks_want,
-                                       __fls(linked->nodes_locked) + 1);
-                       btree_iter_get_locks(linked, true);
+                       if (may_drop_locks) {
+                               linked->locks_want = max_t(unsigned,
+                                               linked->locks_want,
+                                               __fls(linked->nodes_locked) + 1);
+                               btree_iter_get_locks(linked, true);
+                       }
                        ret = false;
                }
 
@@ -245,10 +250,12 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                 */
                if (linked->btree_id == iter->btree_id &&
                    level > __fls(linked->nodes_locked)) {
-                       linked->locks_want = max_t(unsigned,
-                                                  linked->locks_want,
-                                                  iter->locks_want);
-                       btree_iter_get_locks(linked, true);
+                       if (may_drop_locks) {
+                               linked->locks_want = max_t(unsigned,
+                                                          linked->locks_want,
+                                                          iter->locks_want);
+                               btree_iter_get_locks(linked, true);
+                       }
                        ret = false;
                }
        }
@@ -265,11 +272,6 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter)
 {
        unsigned l;
 
-       if (iter->uptodate == BTREE_ITER_END) {
-               BUG_ON(iter->nodes_locked);
-               return;
-       }
-
        for (l = 0; btree_iter_node(iter, l); l++) {
                if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
                    !btree_node_locked(iter, l))
@@ -284,13 +286,9 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter)
 __flatten
 static bool __bch2_btree_iter_relock(struct btree_iter *iter)
 {
-       if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
-               return true;
-
-       if (iter->uptodate > BTREE_ITER_NEED_TRAVERSE)
-               return false;
-
-       return btree_iter_get_locks(iter, false);
+       return iter->uptodate >= BTREE_ITER_NEED_RELOCK
+               ? btree_iter_get_locks(iter, false)
+               : true;
 }
 
 bool bch2_btree_iter_relock(struct btree_iter *iter)
@@ -332,6 +330,30 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
        return false;
 }
 
+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
+                                       unsigned new_locks_want)
+{
+       unsigned l = iter->level;
+
+       EBUG_ON(iter->locks_want >= new_locks_want);
+
+       iter->locks_want = new_locks_want;
+
+       do {
+               if (!btree_iter_node(iter, l))
+                       break;
+
+               if (!bch2_btree_node_upgrade(iter, l)) {
+                       iter->locks_want = l;
+                       return false;
+               }
+
+               l++;
+       } while (l < iter->locks_want);
+
+       return true;
+}
+
 void __bch2_btree_iter_downgrade(struct btree_iter *iter,
                                 unsigned downgrade_to)
 {
@@ -419,6 +441,12 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
                panic("next key should be before iter pos:\n%llu:%llu\n%s\n",
                      iter->pos.inode, iter->pos.offset, buf);
        }
+
+       if (iter->uptodate == BTREE_ITER_UPTODATE &&
+           (iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) {
+               BUG_ON(!bkey_whiteout(&iter->k) &&
+                      bch2_btree_node_iter_end(&l->iter));
+       }
 }
 
 void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
@@ -453,6 +481,8 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
        if (new_u64s &&
            btree_iter_pos_cmp_packed(b, &iter->pos, where,
                                      iter->flags & BTREE_ITER_IS_EXTENTS)) {
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+
                bch2_btree_node_iter_push(node_iter, b, where, end);
 
                if (!b->level &&
@@ -482,6 +512,8 @@ found:
                goto iter_current_key_not_modified;
        }
 
+       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+
        bch2_btree_node_iter_sort(node_iter, b);
        if (!b->level && node_iter == &iter->l[0].iter)
                __btree_iter_peek_all(iter, &iter->l[0], &iter->k);
@@ -666,7 +698,8 @@ static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
 static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
                                             struct btree *b)
 {
-       return !btree_iter_pos_cmp(iter, &b->key.k);
+       return !btree_iter_pos_cmp(iter, &b->key.k) &&
+               bkey_cmp(b->key.k.p, POS_MAX);
 }
 
 static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
@@ -788,7 +821,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
 
                lock_type = __btree_lock_want(iter, iter->level);
                if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
-                                             iter, lock_type)))
+                                             iter, lock_type, true)))
                        return -EINTR;
 
                if (likely(b == c->btree_roots[iter->btree_id].b &&
@@ -830,9 +863,8 @@ static void btree_iter_prefetch(struct btree_iter *iter)
                        break;
 
                bch2_bkey_unpack(l->b, &tmp.k, k);
-               bch2_btree_node_prefetch(iter->c, &tmp.k,
-                                        iter->level - 1,
-                                        iter->btree_id);
+               bch2_btree_node_prefetch(iter->c, iter, &tmp.k,
+                                        iter->level - 1);
        }
 
        if (!was_locked)
@@ -852,7 +884,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
        bch2_bkey_unpack(l->b, &tmp.k,
                         bch2_btree_node_iter_peek(&l->iter, l->b));
 
-       b = bch2_btree_node_get(iter->c, iter, &tmp.k, level, lock_type);
+       b = bch2_btree_node_get(iter->c, iter, &tmp.k, level, lock_type, true);
        if (unlikely(IS_ERR(b)))
                return PTR_ERR(b);
 
@@ -872,12 +904,6 @@ static void btree_iter_up(struct btree_iter *iter)
        btree_node_unlock(iter, iter->level++);
 }
 
-static void btree_iter_set_end(struct btree_iter *iter)
-{
-       iter->uptodate = BTREE_ITER_END;
-       __bch2_btree_iter_unlock(iter);
-}
-
 int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
 
 static int btree_iter_traverse_error(struct btree_iter *iter, int ret)
@@ -954,6 +980,24 @@ io_error:
        goto out;
 }
 
+static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
+                                          bool check_pos)
+{
+       unsigned l = iter->level;
+
+       while (btree_iter_node(iter, l) &&
+              !(is_btree_node(iter, l) &&
+                bch2_btree_node_relock(iter, l) &&
+                (!check_pos ||
+                 btree_iter_pos_in_node(iter, iter->l[l].b)))) {
+               btree_node_unlock(iter, l);
+               iter->l[l].b = BTREE_ITER_NOT_END;
+               l++;
+       }
+
+       return l;
+}
+
 /*
  * This is the main state machine for walking down the btree - walks down to a
  * specified depth
@@ -967,45 +1011,19 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
 {
        unsigned depth_want = iter->level;
 
-       if (unlikely(iter->uptodate == BTREE_ITER_END))
+       if (unlikely(iter->level >= BTREE_MAX_DEPTH))
                return 0;
 
-       BUG_ON(iter->level >= BTREE_MAX_DEPTH);
-       BUG_ON(!iter->l[iter->level].b);
+       if (__bch2_btree_iter_relock(iter))
+               return 0;
 
        iter->flags &= ~BTREE_ITER_AT_END_OF_LEAF;
 
-       /* make sure we have all the intent locks we need - ugh */
-       if (unlikely(iter->l[iter->level].b &&
-                    iter->level + 1 < iter->locks_want)) {
-               unsigned i;
-
-               for (i = iter->level + 1;
-                    i < iter->locks_want && iter->l[i].b;
-                    i++)
-                       if (!bch2_btree_node_relock(iter, i)) {
-                               while (iter->level < BTREE_MAX_DEPTH &&
-                                      iter->l[iter->level].b &&
-                                      iter->level + 1 < iter->locks_want)
-                                       btree_iter_up(iter);
-                               break;
-                       }
-       }
-
        /*
-        * If the current node isn't locked, go up until we have a locked node
-        * or run out of nodes:
+        * XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
+        * here unnecessary
         */
-       while (btree_iter_node(iter, iter->level) &&
-              !(is_btree_node(iter, iter->level) &&
-                bch2_btree_node_relock(iter, iter->level) &&
-
-                /*
-                 * XXX: correctly using BTREE_ITER_UPTODATE should make
-                 * comparing iter->pos against node's key unnecessary
-                 */
-                btree_iter_pos_in_node(iter, iter->l[iter->level].b)))
-               btree_iter_up(iter);
+       iter->level = btree_iter_up_until_locked(iter, true);
 
        /*
         * If we've got a btree node locked (i.e. we aren't about to relock the
@@ -1049,9 +1067,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
 {
        int ret;
 
-       if (__bch2_btree_iter_relock(iter))
-               return 0;
-
        ret = __bch2_btree_iter_traverse(iter);
        if (unlikely(ret))
                ret = btree_iter_traverse_error(iter, ret);
@@ -1061,6 +1076,18 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
        return ret;
 }
 
+static inline void bch2_btree_iter_checks(struct btree_iter *iter,
+                                         enum btree_iter_type type)
+{
+       EBUG_ON(iter->btree_id >= BTREE_ID_NR);
+       EBUG_ON((iter->flags & BTREE_ITER_TYPE) != type);
+       EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
+               (iter->btree_id == BTREE_ID_EXTENTS &&
+                type != BTREE_ITER_NODES));
+
+       bch2_btree_iter_verify_locks(iter);
+}
+
 /* Iterate across nodes (leaf and interior nodes) */
 
 struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
@@ -1068,24 +1095,18 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
        struct btree *b;
        int ret;
 
-       EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_iter_checks(iter, BTREE_ITER_NODES);
 
        if (iter->uptodate == BTREE_ITER_UPTODATE)
                return iter->l[iter->level].b;
 
-       if (unlikely(iter->uptodate == BTREE_ITER_END))
-               return NULL;
-
        ret = bch2_btree_iter_traverse(iter);
        if (ret)
-               return ERR_PTR(ret);
+               return NULL;
 
-       b = iter->l[iter->level].b;
-       if (!b) {
-               btree_iter_set_end(iter);
+       b = btree_iter_node(iter, iter->level);
+       if (!b)
                return NULL;
-       }
 
        BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
 
@@ -1100,25 +1121,25 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
        struct btree *b;
        int ret;
 
-       EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_iter_checks(iter, BTREE_ITER_NODES);
+
+       /* already got to end? */
+       if (!btree_iter_node(iter, iter->level))
+               return NULL;
 
        btree_iter_up(iter);
 
-       if (!btree_iter_node(iter, iter->level)) {
-               btree_iter_set_end(iter);
-               return NULL;
-       }
+       if (!bch2_btree_node_relock(iter, iter->level))
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
 
-       if (!bch2_btree_node_relock(iter, iter->level)) {
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-               ret = bch2_btree_iter_traverse(iter);
-               if (ret)
-                       return NULL;
-       }
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               return NULL;
 
-       b = iter->l[iter->level].b;
-       BUG_ON(!b);
+       /* got to end? */
+       b = btree_iter_node(iter, iter->level);
+       if (!b)
+               return NULL;
 
        if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
                /*
@@ -1150,6 +1171,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
        }
 
        iter->pos = b->key.k.p;
+       iter->uptodate = BTREE_ITER_UPTODATE;
 
        return b;
 }
@@ -1182,10 +1204,68 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
 
 void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
 {
-       EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0); /* XXX handle this */
+       int cmp = bkey_cmp(new_pos, iter->pos);
+       unsigned level;
+
+       if (!cmp)
+               return;
+
        iter->pos = new_pos;
 
-       btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+       level = btree_iter_up_until_locked(iter, true);
+
+       if (btree_iter_node(iter, level)) {
+               unsigned nr_advanced = 0;
+               struct btree_iter_level *l = &iter->l[level];
+               struct bkey_s_c k;
+               struct bkey u;
+
+               /*
+                * We might have to skip over many keys, or just a few: try
+                * advancing the node iterator, and if we have to skip over too
+                * many keys just reinit it (or if we're rewinding, since that
+                * is expensive).
+                */
+               if (cmp > 0) {
+                       while ((k = __btree_iter_peek_all(iter, l, &u)).k &&
+                              !btree_iter_pos_cmp(iter, k.k)) {
+                               if (nr_advanced > 8)
+                                       goto reinit_node;
+
+                               __btree_iter_advance(l);
+                               nr_advanced++;
+                       }
+               } else {
+reinit_node:
+                       __btree_iter_init(iter, iter->l[level].b);
+               }
+
+               /* Don't leave it locked if we're not supposed to: */
+               if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED)
+                       btree_node_unlock(iter, level);
+       }
+
+       if (level != iter->level)
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+       else
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+}
+
+static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_s_c ret = { .k = &iter->k };
+
+       if (!bkey_deleted(&iter->k)) {
+               EBUG_ON(bch2_btree_node_iter_end(&l->iter));
+               ret.v = bkeyp_val(&l->b->format,
+                       __bch2_btree_node_iter_peek_all(&l->iter, l->b));
+       }
+
+       if (debug_check_bkeys(iter->c) &&
+           !bkey_deleted(ret.k))
+               bch2_bkey_debugcheck(iter->c, l->b, ret);
+       return ret;
 }
 
 struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
@@ -1194,26 +1274,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
        struct bkey_s_c k;
        int ret;
 
-       EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
-               (iter->btree_id == BTREE_ID_EXTENTS));
-       EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
 
-       if (iter->uptodate == BTREE_ITER_UPTODATE) {
-               struct bkey_packed *k =
-                       __bch2_btree_node_iter_peek_all(&l->iter, l->b);
-               struct bkey_s_c ret = {
-                       .k = &iter->k,
-                       .v = bkeyp_val(&l->b->format, k)
-               };
-
-               if (debug_check_bkeys(iter->c))
-                       bch2_bkey_debugcheck(iter->c, l->b, ret);
-               return ret;
-       }
-
-       if (iter->uptodate == BTREE_ITER_END)
-               return bkey_s_c_null;
+       if (iter->uptodate == BTREE_ITER_UPTODATE)
+               return btree_iter_peek_uptodate(iter);
 
        while (1) {
                ret = bch2_btree_iter_traverse(iter);
@@ -1225,14 +1289,13 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                        break;
 
                /* got to the end of the leaf, iterator needs to be traversed: */
-               iter->pos = l->b->key.k.p;
-               if (!bkey_cmp(iter->pos, POS_MAX)) {
-                       btree_iter_set_end(iter);
+               iter->pos       = l->b->key.k.p;
+               iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+
+               if (!bkey_cmp(iter->pos, POS_MAX))
                        return bkey_s_c_null;
-               }
 
                iter->pos = btree_type_successor(iter->btree_id, iter->pos);
-               iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
        }
 
        /*
@@ -1252,14 +1315,13 @@ struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
 {
        struct btree_iter_level *l = &iter->l[0];
 
-       iter->pos = l->b->key.k.p;
-       if (!bkey_cmp(iter->pos, POS_MAX)) {
-               btree_iter_set_end(iter);
+       iter->pos       = l->b->key.k.p;
+       iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+
+       if (!bkey_cmp(iter->pos, POS_MAX))
                return bkey_s_c_null;
-       }
 
        iter->pos = btree_type_successor(iter->btree_id, iter->pos);
-       iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
 
        return bch2_btree_iter_peek(iter);
 }
@@ -1270,10 +1332,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
        struct bkey_packed *p;
        struct bkey_s_c k;
 
-       EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
-               (iter->btree_id == BTREE_ID_EXTENTS));
-       EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
 
        if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
                k = bch2_btree_iter_peek(iter);
@@ -1286,7 +1345,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
                p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
                if (unlikely(!p))
                        return bch2_btree_iter_peek_next_leaf(iter);
-       } while (bkey_deleted(p));
+       } while (bkey_whiteout(p));
 
        k = __btree_iter_unpack(iter, l, &iter->k, p);
 
@@ -1295,6 +1354,51 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
        return k;
 }
 
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_packed *p;
+       struct bkey_s_c k;
+       int ret;
+
+       bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
+
+       if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
+               k = bch2_btree_iter_peek(iter);
+               if (IS_ERR(k.k))
+                       return k;
+       }
+
+       while (1) {
+               p = bch2_btree_node_iter_prev(&l->iter, l->b);
+               if (likely(p))
+                       break;
+
+               iter->pos = l->b->data->min_key;
+               if (!bkey_cmp(iter->pos, POS_MIN))
+                       return bkey_s_c_null;
+
+               bch2_btree_iter_set_pos(iter,
+                       btree_type_predecessor(iter->btree_id, iter->pos));
+
+               ret = bch2_btree_iter_traverse(iter);
+               if (unlikely(ret))
+                       return bkey_s_c_err(ret);
+
+               p = bch2_btree_node_iter_peek(&l->iter, l->b);
+               if (p)
+                       break;
+       }
+
+       k = __btree_iter_unpack(iter, l, &iter->k, p);
+
+       EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
+
+       iter->pos       = bkey_start_pos(k.k);
+       iter->uptodate  = BTREE_ITER_UPTODATE;
+       return k;
+}
+
 static inline struct bkey_s_c
 __bch2_btree_iter_peek_slot(struct btree_iter *iter)
 {
@@ -1309,13 +1413,6 @@ recheck:
               bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
                __btree_iter_advance(l);
 
-       if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
-               EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
-               EBUG_ON(bkey_deleted(k.k));
-               iter->uptodate = BTREE_ITER_UPTODATE;
-               return k;
-       }
-
        /*
         * If we got to the end of the node, check if we need to traverse to the
         * next node:
@@ -1329,21 +1426,35 @@ recheck:
                goto recheck;
        }
 
+       if (k.k &&
+           !bkey_whiteout(k.k) &&
+           bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
+               EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
+               EBUG_ON(bkey_deleted(k.k));
+               iter->uptodate = BTREE_ITER_UPTODATE;
+               return k;
+       }
+
        /* hole */
        bkey_init(&n);
        n.p = iter->pos;
 
        if (iter->flags & BTREE_ITER_IS_EXTENTS) {
                if (n.p.offset == KEY_OFFSET_MAX) {
-                       if (n.p.inode == KEY_INODE_MAX) {
-                               btree_iter_set_end(iter);
+                       if (n.p.inode == KEY_INODE_MAX)
                                return bkey_s_c_null;
-                       }
 
                        iter->pos = bkey_successor(iter->pos);
                        goto recheck;
                }
 
+               if (k.k && bkey_whiteout(k.k)) {
+                       struct btree_node_iter node_iter = l->iter;
+
+                       k = __btree_iter_unpack(iter, l, &iter->k,
+                               bch2_btree_node_iter_peek(&node_iter, l->b));
+               }
+
                if (!k.k)
                        k.k = &l->b->key.k;
 
@@ -1357,35 +1468,19 @@ recheck:
                EBUG_ON(!n.size);
        }
 
-       iter->k = n;
+       iter->k = n;
        iter->uptodate = BTREE_ITER_UPTODATE;
        return (struct bkey_s_c) { &iter->k, NULL };
 }
 
 struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 {
-       struct btree_iter_level *l = &iter->l[0];
        int ret;
 
-       EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
-               (iter->btree_id == BTREE_ID_EXTENTS));
-       EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_iter_checks(iter, BTREE_ITER_SLOTS);
 
-       if (iter->uptodate == BTREE_ITER_UPTODATE) {
-               struct bkey_s_c ret = { .k = &iter->k };
-
-               if (!bkey_deleted(&iter->k))
-                       ret.v = bkeyp_val(&l->b->format,
-                               __bch2_btree_node_iter_peek_all(&l->iter, l->b));
-
-               if (debug_check_bkeys(iter->c))
-                       bch2_bkey_debugcheck(iter->c, l->b, ret);
-               return ret;
-       }
-
-       if (iter->uptodate == BTREE_ITER_END)
-               return bkey_s_c_null;
+       if (iter->uptodate == BTREE_ITER_UPTODATE)
+               return btree_iter_peek_uptodate(iter);
 
        ret = bch2_btree_iter_traverse(iter);
        if (unlikely(ret))
@@ -1396,10 +1491,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 
 struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
 {
-       EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
-               (iter->btree_id == BTREE_ID_EXTENTS));
-       EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_iter_checks(iter, BTREE_ITER_SLOTS);
 
        iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
 
@@ -1417,6 +1509,8 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
        if (!bkey_deleted(&iter->k))
                __btree_iter_advance(&iter->l[0]);
 
+       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+
        return __bch2_btree_iter_peek_slot(iter);
 }
 
@@ -1446,10 +1540,6 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
        iter->l[iter->level].b          = BTREE_ITER_NOT_END;
        iter->next                      = iter;
 
-       if (unlikely((flags & BTREE_ITER_IS_EXTENTS) &&
-                    !bkey_cmp(pos, POS_MAX)))
-               iter->uptodate = BTREE_ITER_END;
-
        prefetch(c->btree_roots[btree_id].b);
 }
 
index 99e51b27675dc62f422fb76dc4acfc49971eb7b9..5db1cc581f56a91f79bf2009c5c17ff5e18b9c60 100644 (file)
@@ -106,14 +106,18 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
 int bch2_btree_iter_unlock(struct btree_iter *);
 
 bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
 
 static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
-                                          unsigned new_locks_want)
+                                          unsigned new_locks_want,
+                                          bool may_drop_locks)
 {
        new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
 
        return iter->locks_want < new_locks_want
-               ?  __bch2_btree_iter_upgrade(iter, new_locks_want)
+               ? (may_drop_locks
+                  ? __bch2_btree_iter_upgrade(iter, new_locks_want)
+                  : __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
                : iter->uptodate <= BTREE_ITER_NEED_PEEK;
 }
 
@@ -137,6 +141,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
 
 struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
 
 struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
@@ -175,6 +180,19 @@ static inline struct bpos btree_type_successor(enum btree_id id,
        return pos;
 }
 
+static inline struct bpos btree_type_predecessor(enum btree_id id,
+                                              struct bpos pos)
+{
+       if (id == BTREE_ID_INODES) {
+               --pos.inode;
+               pos.offset = 0;
+       } else /* if (id != BTREE_ID_EXTENTS) */ {
+               pos = bkey_predecessor(pos);
+       }
+
+       return pos;
+}
+
 static inline int __btree_iter_cmp(enum btree_id id,
                                   struct bpos pos,
                                   const struct btree_iter *r)
@@ -207,7 +225,8 @@ static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter)
 #define __for_each_btree_node(_iter, _c, _btree_id, _start,            \
                              _locks_want, _depth, _flags, _b)          \
        for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \
-                                   _locks_want, _depth, _flags),       \
+                                   _locks_want, _depth,                \
+                                   _flags|BTREE_ITER_NODES),           \
             _b = bch2_btree_iter_peek_node(_iter);                     \
             (_b);                                                      \
             (_b) = bch2_btree_iter_next_node(_iter, _depth))
index 1d975207a16353b5db507cec7070decf3e1fd9cf..419d0e815a2527d77cf3dcab10b35d1b5413cf9a 100644 (file)
@@ -147,17 +147,19 @@ static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
 }
 
 bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
-                          struct btree_iter *, enum six_lock_type);
+                           struct btree_iter *, enum six_lock_type, bool);
 
 static inline bool btree_node_lock(struct btree *b, struct bpos pos,
                                   unsigned level,
                                   struct btree_iter *iter,
-                                  enum six_lock_type type)
+                                  enum six_lock_type type,
+                                  bool may_drop_locks)
 {
        EBUG_ON(level >= BTREE_MAX_DEPTH);
 
        return likely(six_trylock_type(&b->lock, type)) ||
-               __bch2_btree_node_lock(b, pos, level, iter, type);
+               __bch2_btree_node_lock(b, pos, level, iter,
+                                      type, may_drop_locks);
 }
 
 bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
index aed8d69347a5c9e0bbbea6b7581bbbd0b6b541cc..daa648c639d359fa3cd12727adbbed765f51cbc5 100644 (file)
@@ -182,26 +182,32 @@ struct btree_node_iter {
        } data[MAX_BSETS];
 };
 
-#define BTREE_ITER_SLOTS               (1 << 0)
-#define BTREE_ITER_INTENT              (1 << 1)
-#define BTREE_ITER_PREFETCH            (1 << 2)
+enum btree_iter_type {
+       BTREE_ITER_KEYS,
+       BTREE_ITER_SLOTS,
+       BTREE_ITER_NODES,
+};
+
+#define BTREE_ITER_TYPE                        ((1 << 2) - 1)
+
+#define BTREE_ITER_INTENT              (1 << 2)
+#define BTREE_ITER_PREFETCH            (1 << 3)
 /*
  * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
  * @pos or the first key strictly greater than @pos
  */
-#define BTREE_ITER_IS_EXTENTS          (1 << 3)
+#define BTREE_ITER_IS_EXTENTS          (1 << 4)
 /*
  * indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
  */
-#define BTREE_ITER_AT_END_OF_LEAF      (1 << 4)
-#define BTREE_ITER_ERROR               (1 << 5)
+#define BTREE_ITER_AT_END_OF_LEAF      (1 << 5)
+#define BTREE_ITER_ERROR               (1 << 6)
 
 enum btree_iter_uptodate {
        BTREE_ITER_UPTODATE             = 0,
        BTREE_ITER_NEED_PEEK            = 1,
        BTREE_ITER_NEED_RELOCK          = 2,
        BTREE_ITER_NEED_TRAVERSE        = 3,
-       BTREE_ITER_END                  = 4,
 };
 
 /*
@@ -216,7 +222,7 @@ struct btree_iter {
        struct bpos             pos;
 
        u8                      flags;
-       unsigned                uptodate:4;
+       enum btree_iter_uptodate uptodate:4;
        enum btree_id           btree_id:4;
        unsigned                level:4,
                                locks_want:4,
index 3e13f78476a29eadc16f32a785488083d9fdcaaf..392ee0a0659294720f1b75f276fc1e615abee787 100644 (file)
@@ -1586,7 +1586,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
         * XXX: figure out how far we might need to split,
         * instead of locking/reserving all the way to the root:
         */
-       if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX,
+                       !(flags & BTREE_INSERT_NOUNLOCK))) {
                ret = -EINTR;
                goto out;
        }
@@ -1694,7 +1695,8 @@ retry:
        if (!down_read_trylock(&c->gc_lock))
                goto err_cycle_gc_lock;
 
-       if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX,
+                       !(flags & BTREE_INSERT_NOUNLOCK))) {
                ret = -EINTR;
                goto err_unlock;
        }
@@ -1857,7 +1859,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
 
        closure_init_stack(&cl);
 
-       bch2_btree_iter_upgrade(iter, U8_MAX);
+       bch2_btree_iter_upgrade(iter, U8_MAX, true);
 
        if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
                if (!down_read_trylock(&c->gc_lock)) {
@@ -2000,7 +2002,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
 
        closure_init_stack(&cl);
 
-       if (!bch2_btree_iter_upgrade(iter, U8_MAX))
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX, true))
                return -EINTR;
 
        if (!down_read_trylock(&c->gc_lock)) {
index 3a17de5ca43e49433e263c08f54048795d48a6d0..e6f050718586b1dcd9a143863c3d725c86f60cad 100644 (file)
@@ -199,14 +199,17 @@ void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
 static inline unsigned btree_update_reserve_required(struct bch_fs *c,
                                                     struct btree *b)
 {
-       unsigned depth = btree_node_root(c, b)->level - b->level + 1;
+       unsigned depth = btree_node_root(c, b)->level + 1;
 
        /*
         * Number of nodes we might have to allocate in a worst case btree
         * split operation - we split all the way up to the root, then allocate
-        * a new root.
+        * a new root, unless we're already at max depth:
         */
-       return depth * 2 + 1;
+       if (depth < BTREE_MAX_DEPTH)
+               return (depth - b->level) * 2 + 1;
+       else
+               return (depth - b->level) * 2 - 1;
 }
 
 static inline void btree_node_reset_sib_u64s(struct btree *b)
index a62d8307036790c8ab930a54ac7335d52e969a23..588a1997e5eee1c9ad441034ccad782ad9ff211c 100644 (file)
@@ -205,8 +205,6 @@ btree_insert_key_leaf(struct btree_insert *trans,
        int old_live_u64s = b->nr.live_u64s;
        int live_u64s_added, u64s_added;
 
-       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
        ret = !btree_node_is_extents(b)
                ? bch2_insert_fixup_key(trans, insert)
                : bch2_insert_fixup_extent(trans, insert);
@@ -430,9 +428,9 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
                BUG_ON(i->iter->level);
                BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
                BUG_ON(debug_check_bkeys(c) &&
+                      !bkey_deleted(&i->k->k) &&
                       bch2_bkey_invalid(c, i->iter->btree_id,
                                         bkey_i_to_s_c(i->k)));
-               BUG_ON(i->iter->uptodate == BTREE_ITER_END);
        }
 
        bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
@@ -444,7 +442,7 @@ retry:
        cycle_gc_lock = false;
 
        trans_for_each_entry(trans, i) {
-               if (!bch2_btree_iter_upgrade(i->iter, 1)) {
+               if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
                        ret = -EINTR;
                        goto err;
                }
@@ -647,11 +645,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
                if (bkey_cmp(iter.pos, end) >= 0)
                        break;
 
-               if (k.k->type == KEY_TYPE_DISCARD) {
-                       bch2_btree_iter_next(&iter);
-                       continue;
-               }
-
                bkey_init(&delete.k);
 
                /*
@@ -668,15 +661,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
                delete.k.version = version;
 
                if (iter.flags & BTREE_ITER_IS_EXTENTS) {
-                       /*
-                        * The extents btree is special - KEY_TYPE_DISCARD is
-                        * used for deletions, not KEY_TYPE_DELETED. This is an
-                        * internal implementation detail that probably
-                        * shouldn't be exposed (internally, KEY_TYPE_DELETED is
-                        * used as a proxy for k->size == 0):
-                        */
-                       delete.k.type = KEY_TYPE_DISCARD;
-
                        /* create the biggest key we can */
                        bch2_key_resize(&delete.k, max_sectors);
                        bch2_cut_back(end, &delete.k);
index 36dca6b22a9144fac247ccd1a22bdac7734c5c1a..d3dd3eb71837062b9d2f44e1b3d026efe35e540c 100644 (file)
@@ -97,7 +97,11 @@ const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
                if (!len)
                        return "empty name";
 
-               if (bkey_val_u64s(k.k) > dirent_val_u64s(len))
+               /*
+                * older versions of bcachefs were buggy and creating dirent
+                * keys that were bigger than necessary:
+                */
+               if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
                        return "value too big";
 
                if (len > BCH_NAME_MAX)
index b85af711b9f92ed9aec3bcb51490ae5a87ab61ea..fe4bb52717cf9f518d2fa7f361e2f143438639e8 100644 (file)
@@ -778,7 +778,7 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k)
         * cause offset to point to the next bucket:
         */
        if (!len)
-               __set_bkey_deleted(k.k);
+               k.k->type = KEY_TYPE_DELETED;
        else if (bkey_extent_is_data(k.k)) {
                struct bkey_s_extent e = bkey_s_to_extent(k);
                union bch_extent_entry *entry;
@@ -833,7 +833,7 @@ bool bch2_cut_back(struct bpos where, struct bkey *k)
        k->size = len;
 
        if (!len)
-               __set_bkey_deleted(k);
+               k->type = KEY_TYPE_DELETED;
 
        return true;
 }
@@ -1103,7 +1103,7 @@ static void bch2_drop_subtract(struct extent_insert_state *s, struct bkey_s k)
                bch2_subtract_sectors(s, k.s_c,
                                     bkey_start_offset(k.k), k.k->size);
        k.k->size = 0;
-       __set_bkey_deleted(k.k);
+       k.k->type = KEY_TYPE_DELETED;
 }
 
 static bool bch2_extent_merge_inline(struct bch_fs *,
@@ -1143,10 +1143,13 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
        struct bset_tree *t = bset_tree_last(l->b);
        struct bkey_packed *where =
                bch2_btree_node_iter_bset_pos(&l->iter, l->b, t);
-       struct bkey_packed *prev = bch2_bkey_prev(l->b, t, where);
+       struct bkey_packed *prev = bch2_bkey_prev_filter(l->b, t, where,
+                                                        KEY_TYPE_DISCARD);
        struct bkey_packed *next_live_key = where;
        unsigned clobber_u64s;
 
+       EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
+
        if (prev)
                where = bkey_next(prev);
 
@@ -1188,6 +1191,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
                : &s->whiteout;
        BKEY_PADDED(k) split;
 
+       EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
        EBUG_ON(bkey_cmp(insert->k.p, s->committed) < 0);
        EBUG_ON(bkey_cmp(s->committed, bkey_start_pos(&insert->k)) < 0);
 
@@ -1246,8 +1250,6 @@ __extent_insert_advance_pos(struct extent_insert_state *s,
        else
                ret = BTREE_INSERT_OK;
 
-       EBUG_ON(bkey_deleted(&s->insert->k->k) || !s->insert->k->k.size);
-
        if (ret == BTREE_INSERT_OK)
                s->committed = next_pos;
 
@@ -1446,6 +1448,7 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
        EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
 
        s->whiteout = *insert;
+       s->whiteout.k.type = KEY_TYPE_DISCARD;
 
        while (bkey_cmp(s->committed, insert->k.p) < 0 &&
               (ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
@@ -1488,6 +1491,8 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
                           bset_written(b, bset(b, t))) {
                        struct bkey_i discard = *insert;
 
+                       discard.k.type = KEY_TYPE_DISCARD;
+
                        switch (overlap) {
                        case BCH_EXTENT_OVERLAP_FRONT:
                                bch2_cut_front(bkey_start_pos(k.k), &discard);
@@ -1634,7 +1639,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
        };
 
        EBUG_ON(iter->level);
-       EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
+       EBUG_ON(!insert->k->k.size);
 
        /*
         * As we process overlapping extents, we advance @iter->pos both to
@@ -1979,11 +1984,11 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
                return false;
 
        case KEY_TYPE_DELETED:
-       case KEY_TYPE_COOKIE:
                return true;
-
        case KEY_TYPE_DISCARD:
                return bversion_zero(k.k->version);
+       case KEY_TYPE_COOKIE:
+               return false;
 
        case BCH_EXTENT:
        case BCH_EXTENT_CACHED:
@@ -2051,11 +2056,6 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
        int ret;
 
        switch (k.k->type) {
-       case KEY_TYPE_DELETED:
-       case KEY_TYPE_DISCARD:
-       case KEY_TYPE_COOKIE:
-               return 0;
-
        case KEY_TYPE_ERROR:
                return -EIO;
 
@@ -2069,11 +2069,8 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
 
                return ret;
 
-       case BCH_RESERVATION:
-               return 0;
-
        default:
-               BUG();
+               return 0;
        }
 }
 
@@ -2099,7 +2096,6 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
                return BCH_MERGE_NOMERGE;
 
        switch (l->k.type) {
-       case KEY_TYPE_DELETED:
        case KEY_TYPE_DISCARD:
        case KEY_TYPE_ERROR:
                /* These types are mergeable, and no val to check */
index 737b9be33e0af3b8ffcc5f89baa10789064404c2..9e78798a4d08de7f7b58eb3d52723fff80e6e8e6 100644 (file)
@@ -2078,6 +2078,29 @@ out:
 
 /* truncate: */
 
+static inline int range_has_data(struct bch_fs *c,
+                                 struct bpos start,
+                                 struct bpos end)
+{
+
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+                          start, 0, k) {
+               if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
+                       break;
+
+               if (bkey_extent_is_data(k.k)) {
+                       ret = 1;
+                       break;
+               }
+       }
+
+       return bch2_btree_iter_unlock(&iter) ?: ret;
+}
+
 static int __bch2_truncate_page(struct bch_inode_info *inode,
                                pgoff_t index, loff_t start, loff_t end)
 {
@@ -2099,30 +2122,16 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
 
        page = find_lock_page(mapping, index);
        if (!page) {
-               struct btree_iter iter;
-               struct bkey_s_c k = bkey_s_c_null;
-
                /*
                 * XXX: we're doing two index lookups when we end up reading the
                 * page
                 */
-               for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
-                                  POS(inode->v.i_ino,
-                                      index << PAGE_SECTOR_SHIFT), 0, k) {
-                       if (bkey_cmp(bkey_start_pos(k.k),
-                                    POS(inode->v.i_ino,
-                                        (index + 1) << PAGE_SECTOR_SHIFT)) >= 0)
-                               break;
+               ret = range_has_data(c,
+                               POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT),
+                               POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT));
+               if (ret <= 0)
+                       return ret;
 
-                       if (k.k->type != KEY_TYPE_DISCARD &&
-                           k.k->type != BCH_RESERVATION) {
-                               bch2_btree_iter_unlock(&iter);
-                               goto create;
-                       }
-               }
-               bch2_btree_iter_unlock(&iter);
-               return 0;
-create:
                page = find_or_create_page(mapping, index, GFP_KERNEL);
                if (unlikely(!page)) {
                        ret = -ENOMEM;
@@ -2389,9 +2398,6 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
 
                bkey_reassemble(&copy.k, k);
 
-               if (bkey_deleted(&copy.k.k))
-                       copy.k.k.type = KEY_TYPE_DISCARD;
-
                bch2_cut_front(src.pos, &copy.k);
                copy.k.k.p.offset -= len >> 9;
 
index 048b5c103eb085858124518723dfdaf6de53a079..edf714f7b98d864698c2c59b76dbdcd4c8217044 100644 (file)
@@ -252,9 +252,6 @@ static int check_extents(struct bch_fs *c)
 
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
                           POS(BCACHEFS_ROOT_INO, 0), 0, k) {
-               if (k.k->type == KEY_TYPE_DISCARD)
-                       continue;
-
                ret = walk_inode(c, &w, k.k->p.inode);
                if (ret)
                        break;
index 567289e22ca0b73f7f92b02521b9191d381a6128..dd0e8d2fce991d88876459cfeab1930ec128c30b 100644 (file)
@@ -72,7 +72,8 @@ static void journal_seq_blacklist_flush(struct journal *j,
                n = bl->entries[i];
                mutex_unlock(&j->blacklist_lock);
 
-               __bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, 0, 0, 0);
+               __bch2_btree_iter_init(&iter, c, n.btree_id, n.pos,
+                                      0, 0, BTREE_ITER_NODES);
 
                b = bch2_btree_iter_peek_node(&iter);
 
index 9dcadd20f8ce4a96897728171821f83e65f82207..31847a94a2c70fdc205b65800dfe3de47eb13151 100644 (file)
@@ -2,11 +2,29 @@
 
 #include "bcachefs.h"
 #include "btree_update.h"
+#include "journal_reclaim.h"
 #include "tests.h"
 
 #include "linux/kthread.h"
 #include "linux/random.h"
 
+static void delete_test_keys(struct bch_fs *c)
+{
+       int ret;
+
+       ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
+                                     POS(0, 0), POS(0, U64_MAX),
+                                     ZERO_VERSION, NULL, NULL, NULL);
+       BUG_ON(ret);
+
+       ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
+                                     POS(0, 0), POS(0, U64_MAX),
+                                     ZERO_VERSION, NULL, NULL, NULL);
+       BUG_ON(ret);
+}
+
+/* unit tests */
+
 static void test_delete(struct bch_fs *c, u64 nr)
 {
        struct btree_iter iter;
@@ -36,6 +54,224 @@ static void test_delete(struct bch_fs *c, u64 nr)
        bch2_btree_iter_unlock(&iter);
 }
 
+static void test_delete_written(struct bch_fs *c, u64 nr)
+{
+       struct btree_iter iter;
+       struct bkey_i_cookie k;
+       int ret;
+
+       bkey_cookie_init(&k.k_i);
+
+       bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, k.k.p,
+                            BTREE_ITER_INTENT);
+
+       ret = bch2_btree_iter_traverse(&iter);
+       BUG_ON(ret);
+
+       ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
+                                  BTREE_INSERT_ENTRY(&iter, &k.k_i));
+       BUG_ON(ret);
+
+       bch2_journal_flush_all_pins(&c->journal);
+
+       ret = bch2_btree_delete_at(&iter, 0);
+       BUG_ON(ret);
+
+       bch2_btree_iter_unlock(&iter);
+}
+
+static void test_iterate(struct bch_fs *c, u64 nr)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       delete_test_keys(c);
+
+       pr_info("inserting test keys");
+
+       for (i = 0; i < nr; i++) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i;
+
+               ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i,
+                                       NULL, NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k)
+               BUG_ON(k.k->p.offset != i++);
+       bch2_btree_iter_unlock(&iter);
+
+       BUG_ON(i != nr);
+
+       pr_info("iterating backwards");
+
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k))
+               BUG_ON(k.k->p.offset != --i);
+       bch2_btree_iter_unlock(&iter);
+
+       BUG_ON(i);
+}
+
+static void test_iterate_extents(struct bch_fs *c, u64 nr)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       delete_test_keys(c);
+
+       pr_info("inserting test extents");
+
+       for (i = 0; i < nr; i += 8) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i + 8;
+               k.k.size = 8;
+
+               ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
+                                       NULL, NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
+               BUG_ON(bkey_start_offset(k.k) != i);
+               i = k.k->p.offset;
+       }
+       bch2_btree_iter_unlock(&iter);
+
+       BUG_ON(i != nr);
+
+       pr_info("iterating backwards");
+
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) {
+               BUG_ON(k.k->p.offset != i);
+               i = bkey_start_offset(k.k);
+       }
+       bch2_btree_iter_unlock(&iter);
+
+       BUG_ON(i);
+}
+
+static void test_iterate_slots(struct bch_fs *c, u64 nr)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       delete_test_keys(c);
+
+       pr_info("inserting test keys");
+
+       for (i = 0; i < nr; i++) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i * 2;
+
+               ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i,
+                                       NULL, NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) {
+               BUG_ON(k.k->p.offset != i);
+               i += 2;
+       }
+       bch2_btree_iter_unlock(&iter);
+
+       BUG_ON(i != nr * 2);
+
+       pr_info("iterating forwards by slots");
+
+       i = 0;
+
+       for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0),
+                          BTREE_ITER_SLOTS, k) {
+               BUG_ON(bkey_deleted(k.k) != (i & 1));
+               BUG_ON(k.k->p.offset != i++);
+
+               if (i == nr * 2)
+                       break;
+       }
+       bch2_btree_iter_unlock(&iter);
+}
+
+static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       delete_test_keys(c);
+
+       pr_info("inserting test keys");
+
+       for (i = 0; i < nr; i += 16) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i + 16;
+               k.k.size = 8;
+
+               ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
+                                       NULL, NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
+               BUG_ON(bkey_start_offset(k.k) != i + 8);
+               BUG_ON(k.k->size != 8);
+               i += 16;
+       }
+       bch2_btree_iter_unlock(&iter);
+
+       BUG_ON(i != nr);
+
+       pr_info("iterating forwards by slots");
+
+       i = 0;
+
+       for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0),
+                          BTREE_ITER_SLOTS, k) {
+               BUG_ON(bkey_deleted(k.k) != !(i % 16));
+
+               BUG_ON(bkey_start_offset(k.k) != i);
+               BUG_ON(k.k->size != 8);
+               i = k.k->p.offset;
+
+               if (i == nr)
+                       break;
+       }
+       bch2_btree_iter_unlock(&iter);
+}
+
+/* perf tests */
+
 static u64 test_rand(void)
 {
        u64 v;
@@ -183,7 +419,7 @@ static void seq_delete(struct bch_fs *c, u64 nr)
        int ret;
 
        ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
-                                     POS_MIN, POS_MAX,
+                                     POS(0, 0), POS(0, U64_MAX),
                                      ZERO_VERSION, NULL, NULL, NULL);
        BUG_ON(ret);
 }
@@ -256,6 +492,11 @@ void bch2_btree_perf_test(struct bch_fs *c, const char *testname,
 
        /* a unit test, not a perf test: */
        perf_test(test_delete);
+       perf_test(test_delete_written);
+       perf_test(test_iterate);
+       perf_test(test_iterate_extents);
+       perf_test(test_iterate_slots);
+       perf_test(test_iterate_slots_extents);
 
        if (!j.fn) {
                pr_err("unknown test %s", testname);
index de95480c8b088aa15e448a4fd9492def4c031a7a..c6b5015a0087c145227365bad2e0b4667eaf101e 100644 (file)
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
 
-static unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
-{
-       return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
-                           name_len + val_len, sizeof(u64));
-}
-
-#define xattr_val(_xattr)      ((_xattr)->x_name + (_xattr)->x_name_len)
-
 static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
 
-struct xattr_search_key {
-       u8              type;
-       struct qstr     name;
-};
-
-#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key)        \
-       { .type = _type, .name = QSTR_INIT(_name, _len) })
-
 static u64 bch2_xattr_hash(const struct bch_hash_info *info,
                          const struct xattr_search_key *key)
 {
@@ -158,6 +142,17 @@ void bch2_xattr_to_text(struct bch_fs *c, char *buf,
        }
 }
 
+struct bkey_s_c bch2_xattr_get_iter(struct bch_fs *c,
+                                   struct btree_iter *iter,
+                                   struct bch_inode_info *inode,
+                                   const char *name, int type)
+{
+       return bch2_hash_lookup(bch2_xattr_hash_desc,
+                               &inode->ei_str_hash,
+                               c, inode->v.i_ino, iter,
+                               &X_SEARCH(type, name, strlen(name)));
+}
+
 int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
                  const char *name, void *buffer, size_t size, int type)
 {
@@ -185,19 +180,15 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
        return ret;
 }
 
-int __bch2_xattr_set(struct bch_fs *c, u64 inum,
-                   const struct bch_hash_info *hash_info,
-                   const char *name, const void *value, size_t size,
-                   int flags, int type, u64 *journal_seq)
+int bch2_xattr_set(struct bch_fs *c, u64 inum,
+                  const struct bch_hash_info *hash_info,
+                  const char *name, const void *value, size_t size,
+                  int flags, int type, u64 *journal_seq)
 {
        struct xattr_search_key search = X_SEARCH(type, name, strlen(name));
        int ret;
 
-       if (!value) {
-               ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info,
-                                     c, inum,
-                                     journal_seq, &search);
-       } else {
+       if (value) {
                struct bkey_i_xattr *xattr;
                unsigned u64s = BKEY_U64s +
                        xattr_val_u64s(search.name.len, size);
@@ -223,6 +214,9 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
                                (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
                                (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
                kfree(xattr);
+       } else {
+               ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info,
+                                      c, inum, journal_seq, &search);
        }
 
        if (ret == -ENOENT)
@@ -231,15 +225,6 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
        return ret;
 }
 
-int bch2_xattr_set(struct bch_fs *c, struct bch_inode_info *inode,
-                  const char *name, const void *value, size_t size,
-                  int flags, int type)
-{
-       return __bch2_xattr_set(c, inode->v.i_ino, &inode->ei_str_hash,
-                               name, value, size, flags, type,
-                               &inode->ei_journal_seq);
-}
-
 static size_t bch2_xattr_emit(struct dentry *dentry,
                             const struct bch_xattr *xattr,
                             char *buffer, size_t buffer_size)
@@ -323,8 +308,9 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
        struct bch_inode_info *inode = to_bch_ei(vinode);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
 
-       return bch2_xattr_set(c, inode, name, value, size, flags,
-                             handler->flags);
+       return bch2_xattr_set(c, inode->v.i_ino, &inode->ei_str_hash,
+                             name, value, size, flags, handler->flags,
+                             &inode->ei_journal_seq);
 }
 
 static const struct xattr_handler bch_xattr_user_handler = {
index a58e7e30342181b0442392879d6f33cc6eb5a808..1365032d56c39aa19e298d48e78f06562ffbdb7d 100644 (file)
@@ -13,17 +13,37 @@ void bch2_xattr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
        .val_to_text    = bch2_xattr_to_text,           \
 }
 
+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
+{
+       return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
+                           name_len + val_len, sizeof(u64));
+}
+
+#define xattr_val(_xattr)                                      \
+       ((void *) (_xattr)->x_name + (_xattr)->x_name_len)
+
+struct xattr_search_key {
+       u8              type;
+       struct qstr     name;
+};
+
+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key)        \
+       { .type = _type, .name = QSTR_INIT(_name, _len) })
+
 struct dentry;
 struct xattr_handler;
 struct bch_hash_info;
 struct bch_inode_info;
 
+struct bkey_s_c bch2_xattr_get_iter(struct bch_fs *,
+                                   struct btree_iter *,
+                                   struct bch_inode_info *,
+                                   const char *, int);
 int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
                  const char *, void *, size_t, int);
-int __bch2_xattr_set(struct bch_fs *, u64, const struct bch_hash_info *,
-                 const char *, const void *, size_t, int, int, u64 *);
-int bch2_xattr_set(struct bch_fs *, struct bch_inode_info *,
-                 const char *, const void *, size_t, int, int);
+
+int bch2_xattr_set(struct bch_fs *, u64, const struct bch_hash_info *,
+                  const char *, const void *, size_t, int, int, u64 *);
 ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
 
 extern const struct xattr_handler *bch2_xattr_handlers[];