]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 69be0dae31 bcachefs: Always zero memory from bch2_trans_km...
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 11 Jun 2021 00:21:18 +0000 (20:21 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 11 Jun 2021 00:25:55 +0000 (20:25 -0400)
25 files changed:
.bcachefs_revision
include/linux/blkdev.h
include/linux/xattr.h
include/trace/events/bcachefs.h
libbcachefs/acl.c
libbcachefs/acl.h
libbcachefs/bkey_methods.c
libbcachefs/bkey_methods.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/ec.c
libbcachefs/extents.c
libbcachefs/extents.h
libbcachefs/fs-io.c
libbcachefs/fs-ioctl.c
libbcachefs/fs.c
libbcachefs/reflink.c
libbcachefs/reflink.h
libbcachefs/xattr.c

index 45b79dea3e133ad4d465453a05141cab8bae81ac..193e424145c24012dfe6558473a5009fcbff2e10 100644 (file)
@@ -1 +1 @@
-e3a7cee5034f0f218f593a0a970e8ccd8bf99565
+69be0dae3162e1651a5d5fcce08562e6e2af971a
index 4300c4da3d1253d0ff3a1a065cdaec60534257dd..f60972c76b14488770eb1068d9133d04e946edbe 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/kobject.h>
 #include <linux/types.h>
 
-#define BIO_MAX_PAGES  256
+#define BIO_MAX_VECS   256
 
 typedef unsigned fmode_t;
 
index fbc1e1f5597fc3e658d5e5be9d6e8bc4b5a98a60..222c72fecb227087b57d3b7ad3e603e6f2c3c17c 100644 (file)
@@ -26,6 +26,7 @@
 
 struct inode;
 struct dentry;
+struct user_namespace;
 
 /*
  * struct xattr_handler: When @name is set, match attributes with exactly that
@@ -40,7 +41,8 @@ struct xattr_handler {
        int (*get)(const struct xattr_handler *, struct dentry *dentry,
                   struct inode *inode, const char *name, void *buffer,
                   size_t size);
-       int (*set)(const struct xattr_handler *, struct dentry *dentry,
+       int (*set)(const struct xattr_handler *,
+                  struct user_namespace *mnt_userns, struct dentry *dentry,
                   struct inode *inode, const char *name, const void *buffer,
                   size_t size, int flags);
 };
index b5fcda9e65d8f41710b1054b52370e91aa7206a0..4c0d9b7660eea7c4d80ebb5c35d58bebb7b1700e 100644 (file)
@@ -71,10 +71,10 @@ DECLARE_EVENT_CLASS(bio,
        ),
 
        TP_fast_assign(
-               __entry->dev            = bio->bi_disk ? bio_dev(bio) : 0;
+               __entry->dev            = bio->bi_bdev ? bio_dev(bio) : 0;
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
        ),
 
        TP_printk("%d,%d  %s %llu + %u",
index 594e1f1a12915477c83db5c242b787a6f3a90913..74cb188f74c603e59ce6a565e9f11bf3fd713b11 100644 (file)
@@ -281,7 +281,8 @@ int bch2_set_acl_trans(struct btree_trans *trans,
        return ret == -ENOENT ? 0 : ret;
 }
 
-int bch2_set_acl(struct inode *vinode, struct posix_acl *_acl, int type)
+int bch2_set_acl(struct user_namespace *mnt_userns,
+                struct inode *vinode, struct posix_acl *_acl, int type)
 {
        struct bch_inode_info *inode = to_bch_ei(vinode);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
@@ -308,7 +309,7 @@ retry:
        mode = inode_u.bi_mode;
 
        if (type == ACL_TYPE_ACCESS) {
-               ret = posix_acl_update_mode(&inode->v, &mode, &acl);
+               ret = posix_acl_update_mode(mnt_userns, &inode->v, &mode, &acl);
                if (ret)
                        goto btree_err;
        }
index ba210c26d5c13b3a2110103831c90396c6fa57c7..25fc54dd08845884dd0b8e0fad064920ae230741 100644 (file)
@@ -32,7 +32,7 @@ int bch2_set_acl_trans(struct btree_trans *,
                       struct bch_inode_unpacked *,
                       const struct bch_hash_info *,
                       struct posix_acl *, int);
-int bch2_set_acl(struct inode *, struct posix_acl *, int);
+int bch2_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int);
 int bch2_acl_chmod(struct btree_trans *, struct bch_inode_unpacked *,
                   umode_t, struct posix_acl **);
 
index 9f869bed9f1c1458449af5ef8dfb8514607785ad..09e5dbf112c29f984e07cfc43e1805651c45bd79 100644 (file)
@@ -84,7 +84,7 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
        .val_to_text    = key_type_inline_data_to_text, \
 }
 
-static const struct bkey_ops bch2_bkey_ops[] = {
+const struct bkey_ops bch2_bkey_ops[] = {
 #define x(name, nr) [KEY_TYPE_##name]  = bch2_bkey_ops_##name,
        BCH_BKEY_TYPES()
 #undef x
@@ -290,24 +290,11 @@ bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
                : false;
 }
 
-enum merge_result bch2_bkey_merge(struct bch_fs *c,
-                                 struct bkey_s l, struct bkey_s r)
+bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
 {
        const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
-       enum merge_result ret;
 
-       if (bch2_key_merging_disabled ||
-           !ops->key_merge ||
-           l.k->type != r.k->type ||
-           bversion_cmp(l.k->version, r.k->version) ||
-           bpos_cmp(l.k->p, bkey_start_pos(r.k)))
-               return BCH_MERGE_NOMERGE;
-
-       ret = ops->key_merge(c, l, r);
-
-       if (ret != BCH_MERGE_NOMERGE)
-               l.k->needs_whiteout |= r.k->needs_whiteout;
-       return ret;
+       return bch2_bkey_maybe_mergable(l.k, r.k) && ops->key_merge(c, l, r);
 }
 
 static const struct old_bkey_type {
index bfa6f112aeed17519677d96bc41ca9177f91abe4..3012035db1a33b3acd902563c0a7ea70c2718108 100644 (file)
@@ -11,17 +11,6 @@ enum btree_node_type;
 
 extern const char * const bch2_bkey_types[];
 
-enum merge_result {
-       BCH_MERGE_NOMERGE,
-
-       /*
-        * The keys were mergeable, but would have overflowed size - so instead
-        * l was changed to the maximum size, and both keys were modified:
-        */
-       BCH_MERGE_PARTIAL,
-       BCH_MERGE_MERGE,
-};
-
 struct bkey_ops {
        /* Returns reason for being invalid if invalid, else NULL: */
        const char *    (*key_invalid)(const struct bch_fs *,
@@ -30,13 +19,14 @@ struct bkey_ops {
                                       struct bkey_s_c);
        void            (*swab)(struct bkey_s);
        bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
-       enum merge_result (*key_merge)(struct bch_fs *,
-                                      struct bkey_s, struct bkey_s);
+       bool            (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
        void            (*compat)(enum btree_id id, unsigned version,
                                  unsigned big_endian, int write,
                                  struct bkey_s);
 };
 
+extern const struct bkey_ops bch2_bkey_ops[];
+
 const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
 const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
                                enum btree_node_type);
@@ -57,8 +47,17 @@ void bch2_bkey_swab_val(struct bkey_s);
 
 bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
 
-enum merge_result bch2_bkey_merge(struct bch_fs *,
-                                 struct bkey_s, struct bkey_s);
+static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
+{
+       return l->type == r->type &&
+               !bversion_cmp(l->version, r->version) &&
+               !bpos_cmp(l->p, bkey_start_pos(r)) &&
+               (u64) l->size + r->size <= KEY_SIZE_MAX &&
+               bch2_bkey_ops[l->type].key_merge &&
+               !bch2_key_merging_disabled;
+}
+
+bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
 void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
 
index 013cf0b53857db8b543679e61d8f0a79e48a844c..12bc294654a13b82ef66259d769d79ff4fb9ffe6 100644 (file)
@@ -654,13 +654,9 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
                return NULL;
        }
 
-       /*
-        * Unlock before doing IO:
-        *
-        * XXX: ideally should be dropping all btree node locks here
-        */
-       if (iter && btree_node_read_locked(iter, level + 1))
-               btree_node_unlock(iter, level + 1);
+       /* Unlock before doing IO: */
+       if (iter && sync)
+               bch2_trans_unlock(iter->trans);
 
        bch2_btree_node_read(c, b, sync);
 
@@ -671,6 +667,16 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
                return NULL;
        }
 
+       /*
+        * XXX: this will probably always fail because btree_iter_relock()
+        * currently fails for iterators that aren't pointed at a valid btree
+        * node
+        */
+       if (iter && !bch2_trans_relock(iter->trans)) {
+               six_unlock_intent(&b->c.lock);
+               return ERR_PTR(-EINTR);
+       }
+
        if (lock_type == SIX_LOCK_read)
                six_lock_downgrade(&b->c.lock);
 
@@ -815,9 +821,22 @@ lock_node:
                }
        }
 
-       /* XXX: waiting on IO with btree locks held: */
-       wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-                      TASK_UNINTERRUPTIBLE);
+       if (unlikely(btree_node_read_in_flight(b))) {
+               six_unlock_type(&b->c.lock, lock_type);
+               bch2_trans_unlock(iter->trans);
+
+               wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+                              TASK_UNINTERRUPTIBLE);
+
+               /*
+                * XXX: check if this always fails - btree_iter_relock()
+                * currently fails for iterators that aren't pointed at a valid
+                * btree node
+                */
+               if (iter && !bch2_trans_relock(iter->trans))
+                       return ERR_PTR(-EINTR);
+               goto retry;
+       }
 
        prefetch(b->aux_data);
 
index ba560fbd5f36c2e2a4848ee21108a8c753e18b36..911196f0587c529065a165d59a769d38ae1c0a37 100644 (file)
@@ -36,6 +36,9 @@
 #include <linux/sched/task.h>
 #include <trace/events/bcachefs.h>
 
+#define DROP_THIS_NODE         10
+#define DROP_PREV_NODE         11
+
 static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
 {
        preempt_disable();
@@ -229,11 +232,19 @@ static int btree_repair_node_start(struct bch_fs *c, struct btree *b,
                        (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&cur->key)), buf2))) {
                if (prev &&
                    bpos_cmp(expected_start, cur->data->min_key) > 0 &&
-                   BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data))
+                   BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
+                       if (bkey_cmp(prev->data->min_key,
+                                    cur->data->min_key) <= 0)
+                               return DROP_PREV_NODE;
+
                        ret = set_node_max(c, prev,
-                               bpos_predecessor(cur->data->min_key));
-               else
+                                          bpos_predecessor(cur->data->min_key));
+               } else {
+                       if (bkey_cmp(expected_start, b->data->max_key) >= 0)
+                               return DROP_THIS_NODE;
+
                        ret = set_node_min(c, cur, expected_start);
+               }
                if (ret)
                        return ret;
        }
@@ -262,13 +273,11 @@ fsck_err:
        return ret;
 }
 
-#define DROP_THIS_NODE         10
-
 static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
 {
        struct btree_and_journal_iter iter;
        struct bkey_s_c k;
-       struct bkey_buf tmp;
+       struct bkey_buf prev_k, cur_k;
        struct btree *prev = NULL, *cur = NULL;
        bool have_child, dropped_children = false;
        char buf[200];
@@ -278,14 +287,15 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
                return 0;
 again:
        have_child = dropped_children = false;
-       bch2_bkey_buf_init(&tmp);
+       bch2_bkey_buf_init(&prev_k);
+       bch2_bkey_buf_init(&cur_k);
        bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
 
        while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                bch2_btree_and_journal_iter_advance(&iter);
-               bch2_bkey_buf_reassemble(&tmp, c, k);
+               bch2_bkey_buf_reassemble(&cur_k, c, k);
 
-               cur = bch2_btree_node_get_noiter(c, tmp.k,
+               cur = bch2_btree_node_get_noiter(c, cur_k.k,
                                        b->c.btree_id, b->c.level - 1,
                                        false);
                ret = PTR_ERR_OR_ZERO(cur);
@@ -295,10 +305,10 @@ again:
                                "  %s",
                                bch2_btree_ids[b->c.btree_id],
                                b->c.level - 1,
-                               (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(tmp.k)), buf))) {
-                       bch2_btree_node_evict(c, tmp.k);
+                               (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur_k.k)), buf))) {
+                       bch2_btree_node_evict(c, cur_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
-                                                     b->c.level, tmp.k->k.p);
+                                                     b->c.level, cur_k.k->k.p);
                        if (ret)
                                goto err;
                        continue;
@@ -313,11 +323,27 @@ again:
                ret = btree_repair_node_start(c, b, prev, cur);
                if (prev)
                        six_unlock_read(&prev->c.lock);
-               prev = cur;
-               cur = NULL;
 
-               if (ret)
+               if (ret == DROP_PREV_NODE) {
+                       bch2_btree_node_evict(c, prev_k.k);
+                       ret = bch2_journal_key_delete(c, b->c.btree_id,
+                                                     b->c.level, prev_k.k->k.p);
+                       if (ret)
+                               goto err;
+                       goto again;
+               } else if (ret == DROP_THIS_NODE) {
+                       bch2_btree_node_evict(c, cur_k.k);
+                       ret = bch2_journal_key_delete(c, b->c.btree_id,
+                                                     b->c.level, cur_k.k->k.p);
+                       if (ret)
+                               goto err;
+                       continue;
+               } else if (ret)
                        break;
+
+               prev = cur;
+               cur = NULL;
+               bch2_bkey_buf_copy(&prev_k, c, cur_k.k);
        }
 
        if (!ret && !IS_ERR_OR_NULL(prev)) {
@@ -339,10 +365,10 @@ again:
        bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
 
        while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-               bch2_bkey_buf_reassemble(&tmp, c, k);
+               bch2_bkey_buf_reassemble(&cur_k, c, k);
                bch2_btree_and_journal_iter_advance(&iter);
 
-               cur = bch2_btree_node_get_noiter(c, tmp.k,
+               cur = bch2_btree_node_get_noiter(c, cur_k.k,
                                        b->c.btree_id, b->c.level - 1,
                                        false);
                ret = PTR_ERR_OR_ZERO(cur);
@@ -358,9 +384,9 @@ again:
                cur = NULL;
 
                if (ret == DROP_THIS_NODE) {
-                       bch2_btree_node_evict(c, tmp.k);
+                       bch2_btree_node_evict(c, cur_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
-                                                     b->c.level, tmp.k->k.p);
+                                                     b->c.level, cur_k.k->k.p);
                        dropped_children = true;
                }
 
@@ -385,7 +411,8 @@ fsck_err:
                six_unlock_read(&cur->c.lock);
 
        bch2_btree_and_journal_iter_exit(&iter);
-       bch2_bkey_buf_exit(&tmp, c);
+       bch2_bkey_buf_exit(&prev_k, c);
+       bch2_bkey_buf_exit(&cur_k, c);
 
        if (!ret && dropped_children)
                goto again;
index cd714dc2df3cb6391bc17c237211cdb165ba0eee..38609422ec210a7435aab9ce1a562c83b2789a17 100644 (file)
@@ -18,6 +18,9 @@
 #include <trace/events/bcachefs.h>
 
 static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
+static struct btree_iter *btree_iter_child_alloc(struct btree_iter *, unsigned long);
+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *);
+static void btree_iter_copy(struct btree_iter *, struct btree_iter *);
 
 static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
 {
@@ -854,10 +857,9 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
 
 /* peek_all() doesn't skip deleted keys */
 static inline struct bkey_s_c btree_iter_level_peek_all(struct btree_iter *iter,
-                                                       struct btree_iter_level *l,
-                                                       struct bkey *u)
+                                                       struct btree_iter_level *l)
 {
-       return __btree_iter_unpack(iter, l, u,
+       return __btree_iter_unpack(iter, l, &iter->k,
                        bch2_btree_node_iter_peek_all(&l->iter, l->b));
 }
 
@@ -1184,7 +1186,11 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
        if (iter->flags & BTREE_ITER_PREFETCH)
                btree_iter_prefetch(iter);
 
+       if (btree_node_read_locked(iter, level + 1))
+               btree_node_unlock(iter, level + 1);
        iter->level = level;
+
+       bch2_btree_iter_verify_locks(iter);
 err:
        bch2_bkey_buf_exit(&tmp, c);
        return ret;
@@ -1637,15 +1643,18 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
        return ret;
 }
 
-static struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
-                                              enum btree_id btree_id, struct bpos pos)
+static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter,
+                                                     struct bpos pos)
 {
        struct btree_insert_entry *i;
 
-       trans_for_each_update2(trans, i)
-               if ((cmp_int(btree_id,  i->iter->btree_id) ?:
-                    bkey_cmp(pos,      i->k->k.p)) <= 0) {
-                       if (btree_id == i->iter->btree_id)
+       if (!(iter->flags & BTREE_ITER_WITH_UPDATES))
+               return NULL;
+
+       trans_for_each_update(iter->trans, i)
+               if ((cmp_int(iter->btree_id,    i->iter->btree_id) ?:
+                    bkey_cmp(pos,              i->k->k.p)) <= 0) {
+                       if (iter->btree_id ==   i->iter->btree_id)
                                return i->k;
                        break;
                }
@@ -1653,7 +1662,11 @@ static struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
        return NULL;
 }
 
-static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool with_updates)
+/**
+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
+ * current position
+ */
+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 {
        struct bpos search_key = btree_iter_search_key(iter);
        struct bkey_i *next_update;
@@ -1664,9 +1677,7 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
 start:
-       next_update = with_updates
-               ? btree_trans_peek_updates(iter->trans, iter->btree_id, search_key)
-               : NULL;
+       next_update = btree_trans_peek_updates(iter, search_key);
        btree_iter_set_search_pos(iter, search_key);
 
        while (1) {
@@ -1677,8 +1688,10 @@ start:
                k = btree_iter_level_peek(iter, &iter->l[0]);
 
                if (next_update &&
-                   bpos_cmp(next_update->k.p, iter->real_pos) <= 0)
+                   bpos_cmp(next_update->k.p, iter->real_pos) <= 0) {
+                       iter->k = next_update->k;
                        k = bkey_i_to_s_c(next_update);
+               }
 
                if (likely(k.k)) {
                        if (bkey_deleted(k.k)) {
@@ -1708,15 +1721,6 @@ start:
        return k;
 }
 
-/**
- * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
- * current position
- */
-struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
-{
-       return __btree_iter_peek(iter, false);
-}
-
 /**
  * bch2_btree_iter_next: returns first key greater than iterator's current
  * position
@@ -1729,19 +1733,6 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
        return bch2_btree_iter_peek(iter);
 }
 
-struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-{
-       return __btree_iter_peek(iter, true);
-}
-
-struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
-{
-       if (!bch2_btree_iter_advance(iter))
-               return bkey_s_c_null;
-
-       return bch2_btree_iter_peek_with_updates(iter);
-}
-
 /**
  * bch2_btree_iter_peek_prev: returns first key less than or equal to
  * iterator's current position
@@ -1753,6 +1744,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
        int ret;
 
        EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
+       EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
 
@@ -1814,52 +1806,9 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
        return bch2_btree_iter_peek_prev(iter);
 }
 
-static inline struct bkey_s_c
-__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
-{
-       struct bkey_s_c k;
-       struct bpos pos, next_start;
-
-       /* keys & holes can't span inode numbers: */
-       if (iter->pos.offset == KEY_OFFSET_MAX) {
-               if (iter->pos.inode == KEY_INODE_MAX)
-                       return bkey_s_c_null;
-
-               bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos));
-       }
-
-       pos = iter->pos;
-       k = bch2_btree_iter_peek(iter);
-       iter->pos = pos;
-
-       if (bkey_err(k))
-               return k;
-
-       if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0)
-               return k;
-
-       next_start = k.k ? bkey_start_pos(k.k) : POS_MAX;
-
-       bkey_init(&iter->k);
-       iter->k.p = iter->pos;
-       bch2_key_resize(&iter->k,
-                       min_t(u64, KEY_SIZE_MAX,
-                             (next_start.inode == iter->pos.inode
-                              ? next_start.offset
-                              : KEY_OFFSET_MAX) -
-                             iter->pos.offset));
-
-       EBUG_ON(!iter->k.size);
-
-       bch2_btree_iter_verify_entry_exit(iter);
-       bch2_btree_iter_verify(iter);
-
-       return (struct bkey_s_c) { &iter->k, NULL };
-}
-
 struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 {
-       struct btree_iter_level *l = &iter->l[0];
+       struct bpos search_key = btree_iter_search_key(iter);
        struct bkey_s_c k;
        int ret;
 
@@ -1867,24 +1816,78 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
 
-       btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
+       btree_iter_set_search_pos(iter, search_key);
 
-       if (iter->flags & BTREE_ITER_IS_EXTENTS)
-               return __bch2_btree_iter_peek_slot_extents(iter);
+       /* extents can't span inode numbers: */
+       if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+           iter->pos.offset == KEY_OFFSET_MAX) {
+               if (iter->pos.inode == KEY_INODE_MAX)
+                       return bkey_s_c_null;
+
+               bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
+       }
 
        ret = btree_iter_traverse(iter);
        if (unlikely(ret))
                return bkey_s_c_err(ret);
 
-       k = btree_iter_level_peek_all(iter, l, &iter->k);
+       if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) {
+               struct bkey_i *next_update = btree_trans_peek_updates(iter, search_key);
 
-       EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
+               k = btree_iter_level_peek_all(iter, &iter->l[0]);
+               EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
 
-       if (!k.k || bkey_cmp(iter->pos, k.k->p)) {
-               /* hole */
-               bkey_init(&iter->k);
-               iter->k.p = iter->pos;
-               k = (struct bkey_s_c) { &iter->k, NULL };
+               if (next_update &&
+                   (!k.k || bpos_cmp(next_update->k.p, k.k->p) <= 0)) {
+                       iter->k = next_update->k;
+                       k = bkey_i_to_s_c(next_update);
+               }
+       } else {
+               if ((iter->flags & BTREE_ITER_INTENT)) {
+                       struct btree_iter *child =
+                               btree_iter_child_alloc(iter, _THIS_IP_);
+
+                       btree_iter_copy(child, iter);
+                       k = bch2_btree_iter_peek(child);
+
+                       if (k.k && !bkey_err(k))
+                               iter->k = child->k;
+               } else {
+                       struct bpos pos = iter->pos;
+
+                       k = bch2_btree_iter_peek(iter);
+                       iter->pos = pos;
+               }
+
+               if (unlikely(bkey_err(k)))
+                       return k;
+       }
+
+       if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) {
+               if (!k.k ||
+                   ((iter->flags & BTREE_ITER_ALL_SNAPSHOTS)
+                    ? bpos_cmp(iter->pos, k.k->p)
+                    : bkey_cmp(iter->pos, k.k->p))) {
+                       bkey_init(&iter->k);
+                       iter->k.p = iter->pos;
+                       k = (struct bkey_s_c) { &iter->k, NULL };
+               }
+       } else {
+               struct bpos next = k.k ? bkey_start_pos(k.k) : POS_MAX;
+
+               if (bkey_cmp(iter->pos, next) < 0) {
+                       bkey_init(&iter->k);
+                       iter->k.p = iter->pos;
+                       bch2_key_resize(&iter->k,
+                                       min_t(u64, KEY_SIZE_MAX,
+                                             (next.inode == iter->pos.inode
+                                              ? next.offset
+                                              : KEY_OFFSET_MAX) -
+                                             iter->pos.offset));
+
+                       k = (struct bkey_s_c) { &iter->k, NULL };
+                       EBUG_ON(!k.k->size);
+               }
        }
 
        bch2_btree_iter_verify_entry_exit(iter);
@@ -1912,12 +1915,17 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
 
 struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
 {
+       struct bkey_i *next_update;
        struct bkey_cached *ck;
        int ret;
 
        EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
        bch2_btree_iter_verify(iter);
 
+       next_update = btree_trans_peek_updates(iter, iter->pos);
+       if (next_update && !bpos_cmp(next_update->k.p, iter->pos))
+               return bkey_i_to_s_c(next_update);
+
        ret = btree_iter_traverse(iter);
        if (unlikely(ret))
                return bkey_s_c_err(ret);
@@ -1956,9 +1964,39 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
 
 /* new transactional stuff: */
 
+static void btree_iter_child_free(struct btree_iter *iter)
+{
+       struct btree_iter *child = btree_iter_child(iter);
+
+       if (child) {
+               bch2_trans_iter_free(iter->trans, child);
+               iter->child_idx = U8_MAX;
+       }
+}
+
+static struct btree_iter *btree_iter_child_alloc(struct btree_iter *iter,
+                                                unsigned long ip)
+{
+       struct btree_trans *trans = iter->trans;
+       struct btree_iter *child = btree_iter_child(iter);
+
+       if (!child) {
+               child = btree_trans_iter_alloc(trans);
+               child->ip_allocated     = ip;
+               iter->child_idx         = child->idx;
+
+               trans->iters_live       |= 1ULL << child->idx;
+               trans->iters_touched    |= 1ULL << child->idx;
+       }
+
+       return child;
+}
+
 static inline void __bch2_trans_iter_free(struct btree_trans *trans,
                                          unsigned idx)
 {
+       btree_iter_child_free(&trans->iters[idx]);
+
        __bch2_btree_iter_unlock(&trans->iters[idx]);
        trans->iters_linked             &= ~(1ULL << idx);
        trans->iters_live               &= ~(1ULL << idx);
@@ -2026,6 +2064,7 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
 
 static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
 {
+       struct btree_iter *iter;
        unsigned idx;
 
        if (unlikely(trans->iters_linked ==
@@ -2033,21 +2072,27 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
                btree_trans_iter_alloc_fail(trans);
 
        idx = __ffs64(~trans->iters_linked);
-
+       iter = &trans->iters[idx];
+
+       iter->trans             = trans;
+       iter->idx               = idx;
+       iter->child_idx         = U8_MAX;
+       iter->flags             = 0;
+       iter->nodes_locked      = 0;
+       iter->nodes_intent_locked = 0;
        trans->iters_linked     |= 1ULL << idx;
-       trans->iters[idx].idx    = idx;
-       trans->iters[idx].flags  = 0;
-       return &trans->iters[idx];
+       return iter;
 }
 
-static inline void btree_iter_copy(struct btree_iter *dst,
-                                  struct btree_iter *src)
+static void btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
 {
-       unsigned i, idx = dst->idx;
+       unsigned i;
 
-       *dst = *src;
-       dst->idx = idx;
-       dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
+       __bch2_btree_iter_unlock(dst);
+       btree_iter_child_free(dst);
+
+       memcpy(&dst->flags, &src->flags,
+              sizeof(struct btree_iter) - offsetof(struct btree_iter, flags));
 
        for (i = 0; i < BTREE_MAX_DEPTH; i++)
                if (btree_node_locked(dst, i))
@@ -2237,6 +2282,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
 
        p = trans->mem + trans->mem_top;
        trans->mem_top += size;
+       memset(p, 0, size);
        return p;
 }
 
@@ -2267,7 +2313,6 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
        trans->iters_touched &= trans->iters_live;
 
        trans->nr_updates               = 0;
-       trans->nr_updates2              = 0;
        trans->mem_top                  = 0;
 
        trans->hooks                    = NULL;
@@ -2305,7 +2350,6 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
 
        trans->iters            = p; p += iters_bytes;
        trans->updates          = p; p += updates_bytes;
-       trans->updates2         = p; p += updates_bytes;
 }
 
 void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
@@ -2351,6 +2395,13 @@ int bch2_trans_exit(struct btree_trans *trans)
        bch2_trans_unlock(trans);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
+       if (trans->iters_live) {
+               struct btree_iter *iter;
+
+               trans_for_each_iter(trans, iter)
+                       btree_iter_child_free(iter);
+       }
+
        if (trans->iters_live) {
                struct btree_iter *iter;
 
@@ -2502,7 +2553,6 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
        return  init_srcu_struct(&c->btree_trans_barrier) ?:
                mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
                        sizeof(struct btree_iter) * nr +
-                       sizeof(struct btree_insert_entry) * nr +
                        sizeof(struct btree_insert_entry) * nr) ?:
                mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
                                          BTREE_TRANS_MEM_MAX);
index a2ce711fd61f0b981df5c940cfb3eb0c01420c7e..ba98cfea4d6086785ce044d10d0a506d9d728cc6 100644 (file)
@@ -153,9 +153,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
 
-struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *);
-struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *);
-
 struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
 
@@ -181,6 +178,12 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos
        iter->should_be_locked = false;
 }
 
+static inline struct btree_iter *btree_iter_child(struct btree_iter *iter)
+{
+       return iter->child_idx == U8_MAX ? NULL
+               : iter->trans->iters + iter->child_idx;
+}
+
 /* Sort order for locking btree iterators: */
 static inline int btree_iter_lock_cmp(const struct btree_iter *l,
                                      const struct btree_iter *r)
index bc0f482b53d2a1dd2b6d9f11e76911ba3086f878..b37096f740892de42185c3bdfa639db5defdf1b1 100644 (file)
@@ -209,12 +209,13 @@ enum btree_iter_type {
  * @pos or the first key strictly greater than @pos
  */
 #define BTREE_ITER_IS_EXTENTS          (1 << 6)
-#define BTREE_ITER_ERROR               (1 << 7)
-#define BTREE_ITER_SET_POS_AFTER_COMMIT        (1 << 8)
-#define BTREE_ITER_CACHED_NOFILL       (1 << 9)
-#define BTREE_ITER_CACHED_NOCREATE     (1 << 10)
-#define BTREE_ITER_NOT_EXTENTS         (1 << 11)
-#define BTREE_ITER_ALL_SNAPSHOTS       (1 << 12)
+#define BTREE_ITER_NOT_EXTENTS         (1 << 7)
+#define BTREE_ITER_ERROR               (1 << 8)
+#define BTREE_ITER_SET_POS_AFTER_COMMIT        (1 << 9)
+#define BTREE_ITER_CACHED_NOFILL       (1 << 10)
+#define BTREE_ITER_CACHED_NOCREATE     (1 << 11)
+#define BTREE_ITER_WITH_UPDATES                (1 << 12)
+#define BTREE_ITER_ALL_SNAPSHOTS       (1 << 13)
 
 enum btree_iter_uptodate {
        BTREE_ITER_UPTODATE             = 0,
@@ -241,15 +242,20 @@ enum btree_iter_uptodate {
  */
 struct btree_iter {
        struct btree_trans      *trans;
-       struct bpos             pos;
-       /* what we're searching for/what the iterator actually points to: */
-       struct bpos             real_pos;
-       struct bpos             pos_after_commit;
+       unsigned long           ip_allocated;
+
+       u8                      idx;
+       u8                      child_idx;
+
+       /* btree_iter_copy starts here: */
+       u16                     flags;
+
        /* When we're filtering by snapshot, the snapshot ID we're looking for: */
        unsigned                snapshot;
 
-       u16                     flags;
-       u8                      idx;
+       struct bpos             pos;
+       struct bpos             real_pos;
+       struct bpos             pos_after_commit;
 
        enum btree_id           btree_id:4;
        enum btree_iter_uptodate uptodate:3;
@@ -276,7 +282,6 @@ struct btree_iter {
         * bch2_btree_iter_next_slot() can correctly advance pos.
         */
        struct bkey             k;
-       unsigned long           ip_allocated;
 };
 
 static inline enum btree_iter_type
@@ -340,7 +345,6 @@ struct btree_insert_entry {
        enum btree_id           btree_id:8;
        u8                      level;
        unsigned                trans_triggers_run:1;
-       unsigned                is_extent:1;
        struct bkey_i           *k;
        struct btree_iter       *iter;
 };
@@ -376,7 +380,6 @@ struct btree_trans {
        int                     srcu_idx;
 
        u8                      nr_updates;
-       u8                      nr_updates2;
        unsigned                used_mempool:1;
        unsigned                error:1;
        unsigned                in_traverse_all:1;
@@ -391,7 +394,6 @@ struct btree_trans {
 
        struct btree_iter       *iters;
        struct btree_insert_entry *updates;
-       struct btree_insert_entry *updates2;
 
        /* update path: */
        struct btree_trans_commit_hook *hooks;
index 56131ac516ce4b74e5f357f1f923b4ab7cdad7fd..cbfc8544def490df821d986c8dbd9c669c5adf78 100644 (file)
@@ -140,9 +140,4 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
             (_i) < (_trans)->updates + (_trans)->nr_updates;           \
             (_i)++)
 
-#define trans_for_each_update2(_trans, _i)                             \
-       for ((_i) = (_trans)->updates2;                                 \
-            (_i) < (_trans)->updates2 + (_trans)->nr_updates2;         \
-            (_i)++)
-
 #endif /* _BCACHEFS_BTREE_UPDATE_H */
index 0d566be7455e1c3cb8b459d761b762c0968ce5be..482d583e9b6ecf79c829e89945a0a8833a21a625 100644 (file)
@@ -32,7 +32,7 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
 static inline bool same_leaf_as_prev(struct btree_trans *trans,
                                     struct btree_insert_entry *i)
 {
-       return i != trans->updates2 &&
+       return i != trans->updates &&
                iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
 }
 
@@ -222,7 +222,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
 static inline void btree_insert_entry_checks(struct btree_trans *trans,
                                             struct btree_insert_entry *i)
 {
-       BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
+       BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos));
        BUG_ON(i->level         != i->iter->level);
        BUG_ON(i->btree_id      != i->iter->btree_id);
 }
@@ -400,7 +400,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
                h = h->next;
        }
 
-       trans_for_each_update2(trans, i) {
+       trans_for_each_update(trans, i) {
                /* Multiple inserts might go to same leaf: */
                if (!same_leaf_as_prev(trans, i))
                        u64s = 0;
@@ -458,10 +458,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
 
        if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
                if (bch2_journal_seq_verify)
-                       trans_for_each_update2(trans, i)
+                       trans_for_each_update(trans, i)
                                i->k->k.version.lo = trans->journal_res.seq;
                else if (bch2_inject_invalid_keys)
-                       trans_for_each_update2(trans, i)
+                       trans_for_each_update(trans, i)
                                i->k->k.version = MAX_VERSION;
        }
 
@@ -476,7 +476,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
        if (unlikely(c->gc_pos.phase))
                bch2_trans_mark_gc(trans);
 
-       trans_for_each_update2(trans, i)
+       trans_for_each_update(trans, i)
                do_btree_insert_one(trans, i);
 err:
        if (marking) {
@@ -504,7 +504,7 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree
 
        BUG_ON(iter->level);
 
-       trans_for_each_update2(trans, i) {
+       trans_for_each_update(trans, i) {
                if (iter_l(i->iter)->b != b)
                        continue;
 
@@ -535,7 +535,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
        struct btree_iter *iter;
        int ret;
 
-       trans_for_each_update2(trans, i) {
+       trans_for_each_update(trans, i) {
                struct btree *b;
 
                BUG_ON(!btree_node_intent_locked(i->iter, i->level));
@@ -552,7 +552,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                }
        }
 
-       trans_for_each_update2(trans, i)
+       trans_for_each_update(trans, i)
                BUG_ON(!btree_node_intent_locked(i->iter, i->level));
 
        ret = bch2_journal_preres_get(&c->journal,
@@ -592,7 +592,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                }
        }
 
-       trans_for_each_update2(trans, i) {
+       trans_for_each_update(trans, i) {
                const char *invalid = bch2_bkey_invalid(c,
                                bkey_i_to_s_c(i->k), i->bkey_type);
                if (invalid) {
@@ -606,14 +606,14 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
        }
        bch2_btree_trans_verify_locks(trans);
 
-       trans_for_each_update2(trans, i)
+       trans_for_each_update(trans, i)
                if (!same_leaf_as_prev(trans, i))
                        bch2_btree_node_lock_for_insert(c,
                                        iter_l(i->iter)->b, i->iter);
 
        ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
 
-       trans_for_each_update2(trans, i)
+       trans_for_each_update(trans, i)
                if (!same_leaf_as_prev(trans, i))
                        bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
                                                             i->iter);
@@ -775,132 +775,117 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
        return 0;
 }
 
-static void __bch2_trans_update2(struct btree_trans *trans,
-                                struct btree_insert_entry n)
-{
-       struct btree_insert_entry *i;
-
-       btree_insert_entry_checks(trans, &n);
-
-       EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
-
-       n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-
-       trans_for_each_update2(trans, i)
-               if (btree_insert_entry_cmp(&n, i) <= 0)
-                       break;
-
-       if (i < trans->updates2 + trans->nr_updates2 &&
-           !btree_insert_entry_cmp(&n, i))
-               *i = n;
-       else
-               array_insert_item(trans->updates2, trans->nr_updates2,
-                                 i - trans->updates2, n);
-}
-
-static void bch2_trans_update2(struct btree_trans *trans,
-                              struct btree_iter *iter,
-                              struct bkey_i *insert)
-{
-       __bch2_trans_update2(trans, (struct btree_insert_entry) {
-               .bkey_type      = __btree_node_type(iter->level, iter->btree_id),
-               .btree_id       = iter->btree_id,
-               .level          = iter->level,
-               .iter           = iter,
-               .k              = insert,
-       });
-}
-
-static int extent_update_to_keys(struct btree_trans *trans,
-                                struct btree_insert_entry n)
+static int __btree_delete_at(struct btree_trans *trans, enum btree_id btree_id,
+                            struct bpos pos, unsigned trigger_flags)
 {
+       struct btree_iter *iter;
+       struct bkey_i *update;
        int ret;
 
-       ret = bch2_extent_can_insert(trans, n.iter, n.k);
-       if (ret)
+       update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
+       if ((ret = PTR_ERR_OR_ZERO(update)))
                return ret;
 
-       if (bkey_deleted(&n.k->k))
-               return 0;
-
-       n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p,
-                                    BTREE_ITER_INTENT|
-                                    BTREE_ITER_NOT_EXTENTS);
-       n.is_extent = false;
+       bkey_init(&update->k);
+       update->k.p = pos;
 
-       __bch2_trans_update2(trans, n);
-       bch2_trans_iter_put(trans, n.iter);
+       iter = bch2_trans_get_iter(trans, btree_id, pos,
+                                  BTREE_ITER_NOT_EXTENTS|
+                                  BTREE_ITER_INTENT);
+       bch2_trans_update(trans, iter, update, trigger_flags);
+       bch2_trans_iter_put(trans, iter);
        return 0;
 }
 
 static int extent_handle_overwrites(struct btree_trans *trans,
-                                   enum btree_id btree_id,
-                                   struct bkey_i *insert)
+                                   struct btree_insert_entry *i)
 {
+       struct bch_fs *c = trans->c;
        struct btree_iter *iter, *update_iter;
-       struct bpos start = bkey_start_pos(&insert->k);
+       struct bpos start = bkey_start_pos(&i->k->k);
        struct bkey_i *update;
        struct bkey_s_c k;
        int ret = 0;
 
-       iter = bch2_trans_get_iter(trans, btree_id, start,
-                                  BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_with_updates(iter);
+       iter = bch2_trans_get_iter(trans, i->btree_id, start,
+                                  BTREE_ITER_INTENT|
+                                  BTREE_ITER_WITH_UPDATES|
+                                  BTREE_ITER_NOT_EXTENTS);
+       k = bch2_btree_iter_peek(iter);
+       if (!k.k || (ret = bkey_err(k)))
+               goto out;
 
-       while (k.k && !(ret = bkey_err(k))) {
-               if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
-                       break;
+       if (bch2_bkey_maybe_mergable(k.k, &i->k->k)) {
+               struct bpos l_pos = k.k->p;
+
+               update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+               if ((ret = PTR_ERR_OR_ZERO(update)))
+                       goto out;
+
+               bkey_reassemble(update, k);
 
+               if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(i->k))) {
+                       ret = __btree_delete_at(trans, i->btree_id, l_pos,
+                                               i->trigger_flags);
+                       if (ret)
+                               goto out;
+
+                       i->k = update;
+                       goto next;
+               }
+       }
+
+       if (!bkey_cmp(k.k->p, bkey_start_pos(&i->k->k)))
+               goto next;
+
+       while (bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) > 0) {
                if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
                        update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
                        if ((ret = PTR_ERR_OR_ZERO(update)))
-                               break;
+                               goto out;
 
                        bkey_reassemble(update, k);
 
                        bch2_cut_back(start, update);
 
-                       update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
+                       update_iter = bch2_trans_get_iter(trans, i->btree_id, update->k.p,
                                                          BTREE_ITER_NOT_EXTENTS|
                                                          BTREE_ITER_INTENT);
-                       bch2_trans_update2(trans, update_iter, update);
+                       bch2_trans_update(trans, update_iter, update, i->trigger_flags);
                        bch2_trans_iter_put(trans, update_iter);
                }
 
-               if (bkey_cmp(k.k->p, insert->k.p) < 0 ||
-                   (!bkey_cmp(k.k->p, insert->k.p) && bkey_deleted(&insert->k))) {
-                       update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
-                       if ((ret = PTR_ERR_OR_ZERO(update)))
-                               break;
-
-                       bkey_init(&update->k);
-                       update->k.p = k.k->p;
-
-                       update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
-                                                         BTREE_ITER_NOT_EXTENTS|
-                                                         BTREE_ITER_INTENT);
-                       bch2_trans_update2(trans, update_iter, update);
-                       bch2_trans_iter_put(trans, update_iter);
+               if (bkey_cmp(k.k->p, i->k->k.p) <= 0) {
+                       ret = __btree_delete_at(trans, i->btree_id, k.k->p,
+                                               i->trigger_flags);
+                       if (ret)
+                               goto out;
                }
 
-               if (bkey_cmp(k.k->p, insert->k.p) > 0) {
+               if (bkey_cmp(k.k->p, i->k->k.p) > 0) {
                        update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
                        if ((ret = PTR_ERR_OR_ZERO(update)))
-                               break;
+                               goto out;
 
                        bkey_reassemble(update, k);
-                       bch2_cut_front(insert->k.p, update);
+                       bch2_cut_front(i->k->k.p, update);
 
-                       update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
+                       update_iter = bch2_trans_get_iter(trans, i->btree_id, update->k.p,
                                                          BTREE_ITER_NOT_EXTENTS|
                                                          BTREE_ITER_INTENT);
-                       bch2_trans_update2(trans, update_iter, update);
+                       bch2_trans_update(trans, update_iter, update,
+                                         i->trigger_flags);
                        bch2_trans_iter_put(trans, update_iter);
-                       break;
+                       goto out;
                }
-
-               k = bch2_btree_iter_next_with_updates(iter);
+next:
+               k = bch2_btree_iter_next(iter);
+               if (!k.k || (ret = bkey_err(k)))
+                       goto out;
        }
+
+       bch2_bkey_merge(c, bkey_i_to_s(i->k), k);
+out:
        bch2_trans_iter_put(trans, iter);
 
        return ret;
@@ -966,23 +951,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
                }
        } while (trans_trigger_run);
 
-       /* Turn extents updates into keys: */
-       trans_for_each_update(trans, i)
-               if (i->is_extent) {
-                       ret = extent_handle_overwrites(trans, i->btree_id, i->k);
-                       if (unlikely(ret))
-                               goto out;
-               }
-
        trans_for_each_update(trans, i) {
-               ret = i->is_extent
-                       ? extent_update_to_keys(trans, *i)
-                       : (__bch2_trans_update2(trans, *i), 0);
-               if (unlikely(ret))
-                       goto out;
-       }
-
-       trans_for_each_update2(trans, i) {
                ret = bch2_btree_iter_traverse(i->iter);
                if (unlikely(ret)) {
                        trace_trans_restart_traverse(trans->ip, _RET_IP_,
@@ -1051,117 +1020,66 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
                .bkey_type      = __btree_node_type(iter->level, iter->btree_id),
                .btree_id       = iter->btree_id,
                .level          = iter->level,
-               .is_extent      = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0,
                .iter           = iter,
                .k              = k
        };
+       bool is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0;
+       int ret = 0;
 
        BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
        BUG_ON(bkey_cmp(iter->pos,
-                       n.is_extent ? bkey_start_pos(&k->k) : k->k.p));
+                       is_extent ? bkey_start_pos(&k->k) : k->k.p));
 
        trans_for_each_update(trans, i) {
-               BUG_ON(bkey_cmp(i->iter->pos,
-                               i->is_extent ? bkey_start_pos(&i->k->k) : i->k->k.p));
+               BUG_ON(bkey_cmp(i->iter->pos, i->k->k.p));
 
                BUG_ON(i != trans->updates &&
                       btree_insert_entry_cmp(i - 1, i) >= 0);
        }
 #endif
 
-       iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+       if (is_extent) {
+               ret = bch2_extent_can_insert(trans, n.iter, n.k);
+               if (ret)
+                       return ret;
+
+               ret = extent_handle_overwrites(trans, &n);
+               if (ret)
+                       return ret;
 
-       if (n.is_extent) {
                iter->pos_after_commit = k->k.p;
                iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
+
+               if (bkey_deleted(&n.k->k))
+                       return 0;
+
+               n.iter = bch2_trans_get_iter(trans, n.btree_id, n.k->k.p,
+                                            BTREE_ITER_INTENT|
+                                            BTREE_ITER_NOT_EXTENTS);
+               bch2_trans_iter_put(trans, n.iter);
        }
 
+       BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS);
+
+       n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+
        /*
         * Pending updates are kept sorted: first, find position of new update,
         * then delete/trim any updates the new update overwrites:
         */
-       if (!n.is_extent) {
-               trans_for_each_update(trans, i)
-                       if (btree_insert_entry_cmp(&n, i) <= 0)
-                               break;
-
-               if (i < trans->updates + trans->nr_updates &&
-                   !btree_insert_entry_cmp(&n, i))
-                       *i = n;
-               else
-                       array_insert_item(trans->updates, trans->nr_updates,
-                                         i - trans->updates, n);
-       } else {
-               trans_for_each_update(trans, i)
-                       if (btree_insert_entry_cmp(&n, i) < 0)
-                               break;
-
-               while (i > trans->updates &&
-                      i[-1].btree_id == n.btree_id &&
-                      bkey_cmp(bkey_start_pos(&n.k->k),
-                               bkey_start_pos(&i[-1].k->k)) <= 0) {
-                       --i;
-                       array_remove_item(trans->updates, trans->nr_updates,
-                                         i - trans->updates);
-               }
-
-               if (i > trans->updates &&
-                   i[-1].btree_id == n.btree_id &&
-                   bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0)
-                       bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k);
-
-               if (i < trans->updates + trans->nr_updates &&
-                   i->btree_id == n.btree_id &&
-                   bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) {
-                       if (bkey_cmp(bkey_start_pos(&n.k->k),
-                                    bkey_start_pos(&i->k->k)) > 0) {
-                               struct btree_insert_entry split = *i;
-                               int ret;
-
-                               BUG_ON(trans->nr_updates + 1 >= BTREE_ITER_MAX);
-
-                               split.k = bch2_trans_kmalloc(trans, bkey_bytes(&i->k->k));
-                               ret = PTR_ERR_OR_ZERO(split.k);
-                               if (ret)
-                                       return ret;
-
-                               bkey_copy(split.k, i->k);
-                               bch2_cut_back(bkey_start_pos(&n.k->k), split.k);
-
-                               split.iter = bch2_trans_get_iter(trans, split.btree_id,
-                                                                bkey_start_pos(&split.k->k),
-                                                                BTREE_ITER_INTENT);
-                               split.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-                               bch2_trans_iter_put(trans, split.iter);
-                               array_insert_item(trans->updates, trans->nr_updates,
-                                                 i - trans->updates, split);
-                               i++;
-                       }
-
-                       /*
-                        * When we have an extent that overwrites the start of another
-                        * update, trimming that extent will mean the iterator's
-                        * position has to change since the iterator position has to
-                        * match the extent's start pos - but we don't want to change
-                        * the iterator pos if some other code is using it, so we may
-                        * need to clone it:
-                        */
-                       if (btree_iter_live(trans, i->iter)) {
-                               i->iter = bch2_trans_copy_iter(trans, i->iter);
-
-                               i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-                               bch2_trans_iter_put(trans, i->iter);
-                       }
-
-                       bch2_cut_front(n.k->k.p, i->k);
-                       bch2_btree_iter_set_pos(i->iter, n.k->k.p);
-               }
+       trans_for_each_update(trans, i)
+               if (btree_insert_entry_cmp(&n, i) <= 0)
+                       break;
 
+       if (i < trans->updates + trans->nr_updates &&
+           !btree_insert_entry_cmp(&n, i)) {
+               BUG_ON(i->trans_triggers_run);
+               *i = n;
+       } else
                array_insert_item(trans->updates, trans->nr_updates,
                                  i - trans->updates, n);
-       }
 
        return 0;
 }
index 76d15a5dc62fba066cbe9ba83c99c231e8e8b3ad..20862a4a77f251f4a66c188d160a06f269a8efaf 100644 (file)
@@ -1079,32 +1079,6 @@ static int bch2_mark_stripe(struct bch_fs *c,
        return 0;
 }
 
-static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p,
-                                      u64 p_start, u64 p_end,
-                                      u64 v_start, u64 v_end)
-{
-       if (p_start == p_end)
-               return false;
-
-       p_start += le64_to_cpu(p.v->idx);
-       p_end   += le64_to_cpu(p.v->idx);
-
-       if (p_end <= v_start)
-               return false;
-       if (p_start >= v_end)
-               return false;
-       return true;
-}
-
-static int reflink_p_frag_references(struct bkey_s_c_reflink_p p,
-                                    u64 start, u64 end,
-                                    struct bkey_s_c k)
-{
-       return __reflink_p_frag_references(p, start, end,
-                                          bkey_start_offset(k.k),
-                                          k.k->p.offset);
-}
-
 static int __bch2_mark_reflink_p(struct bch_fs *c,
                        struct bkey_s_c_reflink_p p,
                        u64 idx, unsigned sectors,
@@ -1115,7 +1089,6 @@ static int __bch2_mark_reflink_p(struct bch_fs *c,
 {
        struct reflink_gc *r;
        int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
-       int frags_referenced;
 
        while (1) {
                if (*r_idx >= c->reflink_gc_nr)
@@ -1128,20 +1101,6 @@ static int __bch2_mark_reflink_p(struct bch_fs *c,
                (*r_idx)++;
        }
 
-       frags_referenced =
-               __reflink_p_frag_references(p, 0, front_frag,
-                                           r->offset - r->size, r->offset) +
-               __reflink_p_frag_references(p, back_frag, p.k->size,
-                                           r->offset - r->size, r->offset);
-
-       if (frags_referenced == 2) {
-               BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
-               add = -add;
-       } else if (frags_referenced == 1) {
-               BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
-               add = 0;
-       }
-
        BUG_ON((s64) r->refcount + add < 0);
 
        r->refcount += add;
@@ -1515,29 +1474,6 @@ static struct btree_iter *trans_get_update(struct btree_trans *trans,
        return NULL;
 }
 
-static int trans_get_key(struct btree_trans *trans,
-                        enum btree_id btree_id, struct bpos pos,
-                        struct btree_iter **iter,
-                        struct bkey_s_c *k)
-{
-       unsigned flags = btree_id != BTREE_ID_alloc
-               ? BTREE_ITER_SLOTS
-               : BTREE_ITER_CACHED;
-       int ret;
-
-       *iter = trans_get_update(trans, btree_id, pos, k);
-       if (*iter)
-               return 1;
-
-       *iter = bch2_trans_get_iter(trans, btree_id, pos,
-                                   flags|BTREE_ITER_INTENT);
-       *k = __bch2_btree_iter_peek(*iter, flags);
-       ret = bkey_err(*k);
-       if (ret)
-               bch2_trans_iter_put(trans, *iter);
-       return ret;
-}
-
 static struct bkey_alloc_buf *
 bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter,
                              const struct bch_extent_ptr *ptr,
@@ -1617,9 +1553,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
        struct bch_replicas_padded r;
        int ret = 0;
 
-       ret = trans_get_key(trans, BTREE_ID_stripes, POS(0, p.ec.idx), &iter, &k);
-       if (ret < 0)
-               return ret;
+       iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, POS(0, p.ec.idx),
+                                  BTREE_ITER_INTENT|
+                                  BTREE_ITER_WITH_UPDATES);
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
 
        if (k.k->type != KEY_TYPE_stripe) {
                bch2_fs_inconsistent(c,
@@ -1627,7 +1567,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                        (u64) p.ec.idx);
                bch2_inconsistent_error(c);
                ret = -EIO;
-               goto out;
+               goto err;
        }
 
        if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) {
@@ -1635,13 +1575,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                        "stripe pointer doesn't match stripe %llu",
                        (u64) p.ec.idx);
                ret = -EIO;
-               goto out;
+               goto err;
        }
 
        s = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
        ret = PTR_ERR_OR_ZERO(s);
        if (ret)
-               goto out;
+               goto err;
 
        bkey_reassemble(&s->k_i, k);
        stripe_blockcount_set(&s->v, p.ec.block,
@@ -1652,7 +1592,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
        bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
        r.e.data_type = data_type;
        update_replicas_list(trans, &r.e, sectors);
-out:
+err:
        bch2_trans_iter_put(trans, iter);
        return ret;
 }
@@ -1821,8 +1761,6 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
 static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
                        struct bkey_s_c_reflink_p p,
                        u64 idx, unsigned sectors,
-                       unsigned front_frag,
-                       unsigned back_frag,
                        unsigned flags)
 {
        struct bch_fs *c = trans->c;
@@ -1831,28 +1769,18 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
        struct bkey_i *n;
        __le64 *refcount;
        int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
-       int frags_referenced;
        s64 ret;
 
-       ret = trans_get_key(trans, BTREE_ID_reflink,
-                           POS(0, idx), &iter, &k);
-       if (ret < 0)
-               return ret;
+       iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, POS(0, idx),
+                                  BTREE_ITER_INTENT|
+                                  BTREE_ITER_WITH_UPDATES);
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
 
        sectors = min_t(u64, sectors, k.k->p.offset - idx);
 
-       frags_referenced =
-               reflink_p_frag_references(p, 0, front_frag, k) +
-               reflink_p_frag_references(p, back_frag, p.k->size, k);
-
-       if (frags_referenced == 2) {
-               BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
-               add = -add;
-       } else if (frags_referenced == 1) {
-               BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
-               goto out;
-       }
-
        n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
        ret = PTR_ERR_OR_ZERO(n);
        if (ret)
@@ -1882,7 +1810,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
        ret = bch2_trans_update(trans, iter, n, 0);
        if (ret)
                goto err;
-out:
+
        ret = sectors;
 err:
        bch2_trans_iter_put(trans, iter);
@@ -1894,20 +1822,15 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
                        s64 sectors, unsigned flags)
 {
        u64 idx = le64_to_cpu(p.v->idx) + offset;
-       unsigned front_frag, back_frag;
        s64 ret = 0;
 
        if (sectors < 0)
                sectors = -sectors;
 
-       BUG_ON(offset + sectors > p.k->size);
-
-       front_frag = offset;
-       back_frag = offset + sectors;
+       BUG_ON(offset || sectors != p.k->size);
 
        while (sectors) {
-               ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors,
-                                       front_frag, back_frag, flags);
+               ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
                if (ret < 0)
                        return ret;
 
@@ -1990,86 +1913,27 @@ int bch2_trans_mark_update(struct btree_trans *trans,
        if (!btree_node_type_needs_gc(iter->btree_id))
                return 0;
 
-       if (!btree_node_type_is_extents(iter->btree_id)) {
-               if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
-                       old = bch2_btree_iter_peek_slot(iter);
-                       ret = bkey_err(old);
-                       if (ret)
-                               return ret;
-               } else {
-                       struct bkey_cached *ck = (void *) iter->l[0].b;
-
-                       BUG_ON(!ck->valid);
-                       old = bkey_i_to_s_c(ck->k);
-               }
-
-               if (old.k->type == new->k.type) {
-                       ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
-                                       BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
-               } else {
-                       ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
-                                       BTREE_TRIGGER_INSERT|flags) ?:
-                               bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
-                                       BTREE_TRIGGER_OVERWRITE|flags);
-               }
-       } else {
-               struct btree_iter *copy;
-               struct bkey _old;
-
-               EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
-
-               bkey_init(&_old);
-               old = (struct bkey_s_c) { &_old, NULL };
-
-               ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
-                                         0, new->k.size,
-                                         BTREE_TRIGGER_INSERT);
+       if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
+               old = bch2_btree_iter_peek_slot(iter);
+               ret = bkey_err(old);
                if (ret)
                        return ret;
+       } else {
+               struct bkey_cached *ck = (void *) iter->l[0].b;
 
-               copy = bch2_trans_copy_iter(trans, iter);
-
-               for_each_btree_key_continue(copy, 0, old, ret) {
-                       unsigned offset = 0;
-                       s64 sectors = -((s64) old.k->size);
-
-                       flags |= BTREE_TRIGGER_OVERWRITE;
-
-                       if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
-                               break;
-
-                       switch (bch2_extent_overlap(&new->k, old.k)) {
-                       case BCH_EXTENT_OVERLAP_ALL:
-                               offset = 0;
-                               sectors = -((s64) old.k->size);
-                               break;
-                       case BCH_EXTENT_OVERLAP_BACK:
-                               offset = bkey_start_offset(&new->k) -
-                                       bkey_start_offset(old.k);
-                               sectors = bkey_start_offset(&new->k) -
-                                       old.k->p.offset;
-                               break;
-                       case BCH_EXTENT_OVERLAP_FRONT:
-                               offset = 0;
-                               sectors = bkey_start_offset(old.k) -
-                                       new->k.p.offset;
-                               break;
-                       case BCH_EXTENT_OVERLAP_MIDDLE:
-                               offset = bkey_start_offset(&new->k) -
-                                       bkey_start_offset(old.k);
-                               sectors = -((s64) new->k.size);
-                               flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
-                               break;
-                       }
-
-                       BUG_ON(sectors >= 0);
+               BUG_ON(!ck->valid);
+               old = bkey_i_to_s_c(ck->k);
+       }
 
-                       ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
-                                       offset, sectors, flags);
-                       if (ret)
-                               break;
-               }
-               bch2_trans_iter_put(trans, copy);
+       if (old.k->type == new->k.type &&
+           !btree_node_type_is_extents(iter->btree_id)) {
+               ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+                               BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
+       } else {
+               ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, new->k.size,
+                               BTREE_TRIGGER_INSERT|flags) ?:
+                       bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, -((s64) old.k->size),
+                               BTREE_TRIGGER_OVERWRITE|flags);
        }
 
        return ret;
index db6e4f6cac371254607d14362b0398c86bd9f778..48f9232e61ebfcd6b6255d108098ddc43a939abb 100644 (file)
@@ -392,7 +392,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
        this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size);
 
        while (offset < bytes) {
-               unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES,
+               unsigned nr_iovecs = min_t(size_t, BIO_MAX_VECS,
                                           DIV_ROUND_UP(bytes, PAGE_SIZE));
                unsigned b = min_t(size_t, bytes - offset,
                                   nr_iovecs << PAGE_SHIFT);
index b07d39555eb6e1fe92c1d5233ef3013aae359e14..3968f1fd7d27fd5b4834dc4a575029ff4a4e4989 100644 (file)
@@ -230,112 +230,134 @@ void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
        bch2_bkey_ptrs_to_text(out, c, k);
 }
 
-enum merge_result bch2_extent_merge(struct bch_fs *c,
-                                   struct bkey_s _l, struct bkey_s _r)
+bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
 {
-       struct bkey_s_extent l = bkey_s_to_extent(_l);
-       struct bkey_s_extent r = bkey_s_to_extent(_r);
-       union bch_extent_entry *en_l = l.v->start;
-       union bch_extent_entry *en_r = r.v->start;
-       struct bch_extent_crc_unpacked crc_l, crc_r;
-
-       if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
-               return BCH_MERGE_NOMERGE;
-
-       crc_l = bch2_extent_crc_unpack(l.k, NULL);
-
-       extent_for_each_entry(l, en_l) {
-               en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
+       struct bkey_ptrs   l_ptrs = bch2_bkey_ptrs(l);
+       struct bkey_ptrs_c r_ptrs = bch2_bkey_ptrs_c(r);
+       union bch_extent_entry *en_l;
+       const union bch_extent_entry *en_r;
+       struct extent_ptr_decoded lp, rp;
+       bool use_right_ptr;
+       struct bch_dev *ca;
 
+       en_l = l_ptrs.start;
+       en_r = r_ptrs.start;
+       while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
                if (extent_entry_type(en_l) != extent_entry_type(en_r))
-                       return BCH_MERGE_NOMERGE;
-
-               switch (extent_entry_type(en_l)) {
-               case BCH_EXTENT_ENTRY_ptr: {
-                       const struct bch_extent_ptr *lp = &en_l->ptr;
-                       const struct bch_extent_ptr *rp = &en_r->ptr;
-                       struct bch_dev *ca;
-
-                       if (lp->offset + crc_l.compressed_size != rp->offset ||
-                           lp->dev                     != rp->dev ||
-                           lp->gen                     != rp->gen)
-                               return BCH_MERGE_NOMERGE;
-
-                       /* We don't allow extents to straddle buckets: */
-                       ca = bch_dev_bkey_exists(c, lp->dev);
-
-                       if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
-                               return BCH_MERGE_NOMERGE;
-
-                       break;
-               }
-               case BCH_EXTENT_ENTRY_stripe_ptr:
-                       if (en_l->stripe_ptr.block      != en_r->stripe_ptr.block ||
-                           en_l->stripe_ptr.idx        != en_r->stripe_ptr.idx)
-                               return BCH_MERGE_NOMERGE;
-                       break;
-               case BCH_EXTENT_ENTRY_crc32:
-               case BCH_EXTENT_ENTRY_crc64:
-               case BCH_EXTENT_ENTRY_crc128:
-                       crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-                       crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-
-                       if (crc_l.csum_type             != crc_r.csum_type ||
-                           crc_l.compression_type      != crc_r.compression_type ||
-                           crc_l.nonce                 != crc_r.nonce)
-                               return BCH_MERGE_NOMERGE;
-
-                       if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
-                           crc_r.offset)
-                               return BCH_MERGE_NOMERGE;
-
-                       if (!bch2_checksum_mergeable(crc_l.csum_type))
-                               return BCH_MERGE_NOMERGE;
+                       return false;
 
-                       if (crc_is_compressed(crc_l))
-                               return BCH_MERGE_NOMERGE;
+               en_l = extent_entry_next(en_l);
+               en_r = extent_entry_next(en_r);
+       }
 
-                       if (crc_l.csum_type &&
-                           crc_l.uncompressed_size +
-                           crc_r.uncompressed_size > c->sb.encoded_extent_max)
-                               return BCH_MERGE_NOMERGE;
+       if (en_l < l_ptrs.end || en_r < r_ptrs.end)
+               return false;
 
-                       if (crc_l.uncompressed_size + crc_r.uncompressed_size >
+       en_l = l_ptrs.start;
+       en_r = r_ptrs.start;
+       lp.crc = bch2_extent_crc_unpack(l.k, NULL);
+       rp.crc = bch2_extent_crc_unpack(r.k, NULL);
+
+       while (__bkey_ptr_next_decode(l.k, l_ptrs.end, lp, en_l) &&
+              __bkey_ptr_next_decode(r.k, r_ptrs.end, rp, en_r)) {
+               if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size !=
+                   rp.ptr.offset + rp.crc.offset ||
+                   lp.ptr.dev                  != rp.ptr.dev ||
+                   lp.ptr.gen                  != rp.ptr.gen ||
+                   lp.has_ec                   != rp.has_ec)
+                       return false;
+
+               /* Extents may not straddle buckets: */
+               ca = bch_dev_bkey_exists(c, lp.ptr.dev);
+               if (PTR_BUCKET_NR(ca, &lp.ptr) != PTR_BUCKET_NR(ca, &rp.ptr))
+                       return false;
+
+               if (lp.has_ec                   != rp.has_ec ||
+                   (lp.has_ec &&
+                    (lp.ec.block               != rp.ec.block ||
+                     lp.ec.redundancy          != rp.ec.redundancy ||
+                     lp.ec.idx                 != rp.ec.idx)))
+                       return false;
+
+               if (lp.crc.compression_type     != rp.crc.compression_type ||
+                   lp.crc.nonce                != rp.crc.nonce)
+                       return false;
+
+               if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
+                   lp.crc.uncompressed_size) {
+                       /* can use left extent's crc entry */
+               } else if (lp.crc.live_size <= rp.crc.offset ) {
+                       /* can use right extent's crc entry */
+               } else {
+                       /* check if checksums can be merged: */
+                       if (lp.crc.csum_type            != rp.crc.csum_type ||
+                           lp.crc.nonce                != rp.crc.nonce ||
+                           crc_is_compressed(lp.crc) ||
+                           !bch2_checksum_mergeable(lp.crc.csum_type))
+                               return false;
+
+                       if (lp.crc.offset + lp.crc.live_size != lp.crc.compressed_size ||
+                           rp.crc.offset)
+                               return false;
+
+                       if (lp.crc.csum_type &&
+                           lp.crc.uncompressed_size +
+                           rp.crc.uncompressed_size > c->sb.encoded_extent_max)
+                               return false;
+
+                       if (lp.crc.uncompressed_size + rp.crc.uncompressed_size >
                            bch2_crc_field_size_max[extent_entry_type(en_l)])
-                               return BCH_MERGE_NOMERGE;
-
-                       break;
-               default:
-                       return BCH_MERGE_NOMERGE;
+                               return false;
                }
-       }
-
-       extent_for_each_entry(l, en_l) {
-               struct bch_extent_crc_unpacked crc_l, crc_r;
-
-               en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-
-               if (!extent_entry_is_crc(en_l))
-                       continue;
 
-               crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-               crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-
-               crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
-                                                crc_l.csum,
-                                                crc_r.csum,
-                                                crc_r.uncompressed_size << 9);
+               en_l = extent_entry_next(en_l);
+               en_r = extent_entry_next(en_r);
+       }
 
-               crc_l.uncompressed_size += crc_r.uncompressed_size;
-               crc_l.compressed_size   += crc_r.compressed_size;
+       use_right_ptr = false;
+       en_l = l_ptrs.start;
+       en_r = r_ptrs.start;
+       while (en_l < l_ptrs.end) {
+               if (extent_entry_type(en_l) == BCH_EXTENT_ENTRY_ptr &&
+                   use_right_ptr)
+                       en_l->ptr = en_r->ptr;
+
+               if (extent_entry_is_crc(en_l)) {
+                       struct bch_extent_crc_unpacked crc_l =
+                               bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+                       struct bch_extent_crc_unpacked crc_r =
+                               bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+
+                       use_right_ptr = false;
+
+                       if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
+                           crc_l.uncompressed_size) {
+                               /* can use left extent's crc entry */
+                       } else if (crc_l.live_size <= crc_r.offset ) {
+                               /* can use right extent's crc entry */
+                               crc_r.offset -= crc_l.live_size;
+                               bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
+                                                    extent_entry_type(en_l));
+                               use_right_ptr = true;
+                       } else {
+                               crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
+                                                                crc_l.csum,
+                                                                crc_r.csum,
+                                                                crc_r.uncompressed_size << 9);
+
+                               crc_l.uncompressed_size += crc_r.uncompressed_size;
+                               crc_l.compressed_size   += crc_r.compressed_size;
+                               bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
+                                                    extent_entry_type(en_l));
+                       }
+               }
 
-               bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
-                                    extent_entry_type(en_l));
+               en_l = extent_entry_next(en_l);
+               en_r = extent_entry_next(en_r);
        }
 
        bch2_key_resize(l.k, l.k->size + r.k->size);
-
-       return BCH_MERGE_MERGE;
+       return true;
 }
 
 /* KEY_TYPE_reservation: */
@@ -363,25 +385,17 @@ void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
               r.v->nr_replicas);
 }
 
-enum merge_result bch2_reservation_merge(struct bch_fs *c,
-                                        struct bkey_s _l, struct bkey_s _r)
+bool bch2_reservation_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
 {
        struct bkey_s_reservation l = bkey_s_to_reservation(_l);
-       struct bkey_s_reservation r = bkey_s_to_reservation(_r);
+       struct bkey_s_c_reservation r = bkey_s_c_to_reservation(_r);
 
        if (l.v->generation != r.v->generation ||
            l.v->nr_replicas != r.v->nr_replicas)
-               return BCH_MERGE_NOMERGE;
-
-       if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-               bch2_key_resize(l.k, KEY_SIZE_MAX);
-               bch2_cut_front_s(l.k->p, r.s);
-               return BCH_MERGE_PARTIAL;
-       }
+               return false;
 
        bch2_key_resize(l.k, l.k->size + r.k->size);
-
-       return BCH_MERGE_MERGE;
+       return true;
 }
 
 /* Extent checksum entries: */
index 9999805f955ebfff87cf066aff9457c976d9c3db..3f6224f75ce82f7a8a25fe75796758e524af968f 100644 (file)
@@ -394,8 +394,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
 
 const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-enum merge_result bch2_extent_merge(struct bch_fs *,
-                                   struct bkey_s, struct bkey_s);
+bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
 #define bch2_bkey_ops_extent (struct bkey_ops) {               \
        .key_invalid    = bch2_extent_invalid,                  \
@@ -409,8 +408,7 @@ enum merge_result bch2_extent_merge(struct bch_fs *,
 
 const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-enum merge_result bch2_reservation_merge(struct bch_fs *,
-                                        struct bkey_s, struct bkey_s);
+bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
 #define bch2_bkey_ops_reservation (struct bkey_ops) {          \
        .key_invalid    = bch2_reservation_invalid,             \
index ef28995525c0cc955114536d65b61010acec4883..4ec3360b6703ce835de6efda4f5439874b8994fa 100644 (file)
@@ -893,7 +893,7 @@ void bch2_readahead(struct readahead_control *ractl)
                unsigned n = min_t(unsigned,
                                   readpages_iter.nr_pages -
                                   readpages_iter.idx,
-                                  BIO_MAX_PAGES);
+                                  BIO_MAX_VECS);
                struct bch_read_bio *rbio =
                        rbio_init(bio_alloc_bioset(GFP_NOFS, n, &c->bio_read),
                                  opts);
@@ -1102,8 +1102,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
 {
        struct bch_write_op *op;
 
-       w->io = container_of(bio_alloc_bioset(GFP_NOFS,
-                                             BIO_MAX_PAGES,
+       w->io = container_of(bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS,
                                              &c->writepage_bioset),
                             struct bch_writepage_io, op.wbio.bio);
 
@@ -1226,7 +1225,7 @@ do_io:
                    (w->io->op.res.nr_replicas != nr_replicas_this_write ||
                     bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
                     w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
-                    (BIO_MAX_PAGES * PAGE_SIZE) ||
+                    (BIO_MAX_VECS * PAGE_SIZE) ||
                     bio_end_sector(&w->io->op.wbio.bio) != sector))
                        bch2_writepage_do_io(w);
 
@@ -1690,7 +1689,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
        iter->count -= shorten;
 
        bio = bio_alloc_bioset(GFP_KERNEL,
-                              iov_iter_npages(iter, BIO_MAX_PAGES),
+                              iov_iter_npages(iter, BIO_MAX_VECS),
                               &c->dio_read_bioset);
 
        bio->bi_end_io = bch2_direct_IO_read_endio;
@@ -1725,7 +1724,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
        goto start;
        while (iter->count) {
                bio = bio_alloc_bioset(GFP_KERNEL,
-                                      iov_iter_npages(iter, BIO_MAX_PAGES),
+                                      iov_iter_npages(iter, BIO_MAX_VECS),
                                       &c->bio_read);
                bio->bi_end_io          = bch2_direct_IO_read_split_endio;
 start:
@@ -2029,7 +2028,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
        }
 
        bio = bio_alloc_bioset(GFP_KERNEL,
-                              iov_iter_npages(iter, BIO_MAX_PAGES),
+                              iov_iter_npages(iter, BIO_MAX_VECS),
                               &c->dio_write_bioset);
        dio = container_of(bio, struct dio_write, op.wbio.bio);
        init_completion(&dio->done);
@@ -2271,7 +2270,8 @@ static int bch2_extend(struct bch_inode_info *inode,
                return ret;
 
        truncate_setsize(&inode->v, iattr->ia_size);
-       setattr_copy(&inode->v, iattr);
+       /* ATTR_MODE will never be set here, ns argument isn't needed: */
+       setattr_copy(NULL, &inode->v, iattr);
 
        mutex_lock(&inode->ei_update_lock);
        ret = bch2_write_inode_size(c, inode, inode->v.i_size,
@@ -2389,7 +2389,8 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
        if (unlikely(ret))
                goto err;
 
-       setattr_copy(&inode->v, iattr);
+       /* ATTR_MODE will never be set here, ns argument isn't needed: */
+       setattr_copy(NULL, &inode->v, iattr);
 
        mutex_lock(&inode->ei_update_lock);
        ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL,
index d8cc32e043df83d3d22d1ae55aded1160e3585d2..91a0e761c8e70d5179d42a29fade38ed61666350 100644 (file)
@@ -81,7 +81,7 @@ static int bch2_ioc_setflags(struct bch_fs *c,
                return ret;
 
        inode_lock(&inode->v);
-       if (!inode_owner_or_capable(&inode->v)) {
+       if (!inode_owner_or_capable(file_mnt_user_ns(file), &inode->v)) {
                ret = -EACCES;
                goto setflags_out;
        }
@@ -152,7 +152,7 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
                return ret;
 
        inode_lock(&inode->v);
-       if (!inode_owner_or_capable(&inode->v)) {
+       if (!inode_owner_or_capable(file_mnt_user_ns(file), &inode->v)) {
                ret = -EACCES;
                goto err;
        }
index c567e17694720a13a409b026163d0ed1a1b1df3d..a95358ddefa52dd5df55c563c9e8d367ed3b2218 100644 (file)
@@ -244,11 +244,11 @@ static int inum_test(struct inode *inode, void *p)
 }
 
 static struct bch_inode_info *
-__bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
+__bch2_create(struct user_namespace *mnt_userns,
+             struct bch_inode_info *dir, struct dentry *dentry,
              umode_t mode, dev_t rdev, bool tmpfile)
 {
        struct bch_fs *c = dir->v.i_sb->s_fs_info;
-       struct user_namespace *ns = dir->v.i_sb->s_user_ns;
        struct btree_trans trans;
        struct bch_inode_unpacked dir_u;
        struct bch_inode_info *inode, *old;
@@ -284,8 +284,8 @@ retry:
 
        ret   = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u,
                                  !tmpfile ? &dentry->d_name : NULL,
-                                 from_kuid(ns, current_fsuid()),
-                                 from_kgid(ns, current_fsgid()),
+                                 from_kuid(mnt_userns, current_fsuid()),
+                                 from_kgid(mnt_userns, current_fsgid()),
                                  mode, rdev,
                                  default_acl, acl) ?:
                bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
@@ -382,11 +382,12 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
        return d_splice_alias(vinode, dentry);
 }
 
-static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
+static int bch2_mknod(struct user_namespace *mnt_userns,
+                     struct inode *vdir, struct dentry *dentry,
                      umode_t mode, dev_t rdev)
 {
        struct bch_inode_info *inode =
-               __bch2_create(to_bch_ei(vdir), dentry, mode, rdev, false);
+               __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, rdev, false);
 
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -395,10 +396,11 @@ static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
        return 0;
 }
 
-static int bch2_create(struct inode *vdir, struct dentry *dentry,
+static int bch2_create(struct user_namespace *mnt_userns,
+                      struct inode *vdir, struct dentry *dentry,
                       umode_t mode, bool excl)
 {
-       return bch2_mknod(vdir, dentry, mode|S_IFREG, 0);
+       return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFREG, 0);
 }
 
 static int __bch2_link(struct bch_fs *c,
@@ -488,14 +490,15 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
        return ret;
 }
 
-static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
+static int bch2_symlink(struct user_namespace *mnt_userns,
+                       struct inode *vdir, struct dentry *dentry,
                        const char *symname)
 {
        struct bch_fs *c = vdir->i_sb->s_fs_info;
        struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
        int ret;
 
-       inode = __bch2_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
+       inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
        if (unlikely(IS_ERR(inode)))
                return PTR_ERR(inode);
 
@@ -523,12 +526,14 @@ err:
        return ret;
 }
 
-static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
+static int bch2_mkdir(struct user_namespace *mnt_userns,
+                     struct inode *vdir, struct dentry *dentry, umode_t mode)
 {
-       return bch2_mknod(vdir, dentry, mode|S_IFDIR, 0);
+       return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFDIR, 0);
 }
 
-static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
+static int bch2_rename2(struct user_namespace *mnt_userns,
+                       struct inode *src_vdir, struct dentry *src_dentry,
                        struct inode *dst_vdir, struct dentry *dst_dentry,
                        unsigned flags)
 {
@@ -642,7 +647,8 @@ err:
        return ret;
 }
 
-void bch2_setattr_copy(struct bch_inode_info *inode,
+void bch2_setattr_copy(struct user_namespace *mnt_userns,
+                      struct bch_inode_info *inode,
                       struct bch_inode_unpacked *bi,
                       struct iattr *attr)
 {
@@ -650,9 +656,9 @@ void bch2_setattr_copy(struct bch_inode_info *inode,
        unsigned int ia_valid = attr->ia_valid;
 
        if (ia_valid & ATTR_UID)
-               bi->bi_uid = from_kuid(c->vfs_sb->s_user_ns, attr->ia_uid);
+               bi->bi_uid = from_kuid(mnt_userns, attr->ia_uid);
        if (ia_valid & ATTR_GID)
-               bi->bi_gid = from_kgid(c->vfs_sb->s_user_ns, attr->ia_gid);
+               bi->bi_gid = from_kgid(mnt_userns, attr->ia_gid);
 
        if (ia_valid & ATTR_ATIME)
                bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
@@ -668,13 +674,14 @@ void bch2_setattr_copy(struct bch_inode_info *inode,
                        : inode->v.i_gid;
 
                if (!in_group_p(gid) &&
-                   !capable_wrt_inode_uidgid(&inode->v, CAP_FSETID))
+                   !capable_wrt_inode_uidgid(mnt_userns, &inode->v, CAP_FSETID))
                        mode &= ~S_ISGID;
                bi->bi_mode = mode;
        }
 }
 
-static int bch2_setattr_nonsize(struct bch_inode_info *inode,
+static int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
+                               struct bch_inode_info *inode,
                                struct iattr *attr)
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
@@ -712,7 +719,7 @@ retry:
        if (ret)
                goto btree_err;
 
-       bch2_setattr_copy(inode, &inode_u, attr);
+       bch2_setattr_copy(mnt_userns, inode, &inode_u, attr);
 
        if (attr->ia_valid & ATTR_MODE) {
                ret = bch2_acl_chmod(&trans, &inode_u, inode_u.bi_mode, &acl);
@@ -745,7 +752,8 @@ err:
        return ret;
 }
 
-static int bch2_getattr(const struct path *path, struct kstat *stat,
+static int bch2_getattr(struct user_namespace *mnt_userns,
+                       const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned query_flags)
 {
        struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
@@ -785,26 +793,28 @@ static int bch2_getattr(const struct path *path, struct kstat *stat,
        return 0;
 }
 
-static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
+static int bch2_setattr(struct user_namespace *mnt_userns,
+                       struct dentry *dentry, struct iattr *iattr)
 {
        struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
        int ret;
 
        lockdep_assert_held(&inode->v.i_rwsem);
 
-       ret = setattr_prepare(dentry, iattr);
+       ret = setattr_prepare(mnt_userns, dentry, iattr);
        if (ret)
                return ret;
 
        return iattr->ia_valid & ATTR_SIZE
                ? bch2_truncate(inode, iattr)
-               : bch2_setattr_nonsize(inode, iattr);
+               : bch2_setattr_nonsize(mnt_userns, inode, iattr);
 }
 
-static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
+static int bch2_tmpfile(struct user_namespace *mnt_userns,
+                       struct inode *vdir, struct dentry *dentry, umode_t mode)
 {
        struct bch_inode_info *inode =
-               __bch2_create(to_bch_ei(vdir), dentry, mode, 0, true);
+               __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0, true);
 
        if (IS_ERR(inode))
                return PTR_ERR(inode);
index a420729288d4200a4ba002038e8348412ce69e55..ba700810a4bee67223264c01a877011ef1a16676 100644 (file)
@@ -42,24 +42,22 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
        pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
 }
 
-enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
-                                      struct bkey_s _l, struct bkey_s _r)
+bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
 {
        struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
-       struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
+       struct bkey_s_c_reflink_p r = bkey_s_c_to_reflink_p(_r);
 
-       if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
-               return BCH_MERGE_NOMERGE;
+       /*
+        * Disabled for now, the triggers code needs to be reworked for merging
+        * of reflink pointers to work:
+        */
+       return false;
 
-       if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
-               bch2_key_resize(l.k, KEY_SIZE_MAX);
-               bch2_cut_front_s(l.k->p, _r);
-               return BCH_MERGE_PARTIAL;
-       }
+       if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
+               return false;
 
        bch2_key_resize(l.k, l.k->size + r.k->size);
-
-       return BCH_MERGE_MERGE;
+       return true;
 }
 
 /* indirect extents */
@@ -84,6 +82,14 @@ void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
        bch2_bkey_ptrs_to_text(out, c, k);
 }
 
+bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
+{
+       struct bkey_s_reflink_v   l = bkey_s_to_reflink_v(_l);
+       struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r);
+
+       return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r);
+}
+
 /* indirect inline data */
 
 const char *bch2_indirect_inline_data_invalid(const struct bch_fs *c,
@@ -138,7 +144,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
        /* rewind iter to start of hole, if necessary: */
        bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
 
-       r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_val_bytes(&orig->k));
+       r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k));
        ret = PTR_ERR_OR_ZERO(r_v);
        if (ret)
                goto err;
@@ -159,12 +165,6 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
-       if (IS_ERR(r_p)) {
-               ret = PTR_ERR(r_p);
-               goto err;
-       }
-
        orig->k.type = KEY_TYPE_reflink_p;
        r_p = bkey_i_to_reflink_p(orig);
        set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
index bfc785619ee89d17270fa75342c85fe4f2712d54..68c5cb5a2780ddd1552d41d03229e145f1d14d3c 100644 (file)
@@ -5,8 +5,7 @@
 const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
-enum merge_result bch2_reflink_p_merge(struct bch_fs *,
-                                      struct bkey_s, struct bkey_s);
+bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
 #define bch2_bkey_ops_reflink_p (struct bkey_ops) {            \
        .key_invalid    = bch2_reflink_p_invalid,               \
index e7b40b3ca4aa551bd8e5710672e632419fb95338..8bd7553b9ebd913ab51b59928323da3302ee05da 100644 (file)
@@ -323,6 +323,7 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler,
 }
 
 static int bch2_xattr_set_handler(const struct xattr_handler *handler,
+                                 struct user_namespace *mnt_userns,
                                  struct dentry *dentry, struct inode *vinode,
                                  const char *name, const void *value,
                                  size_t size, int flags)
@@ -455,6 +456,7 @@ static int inode_opt_set_fn(struct bch_inode_info *inode,
 }
 
 static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
+                                  struct user_namespace *mnt_userns,
                                   struct dentry *dentry, struct inode *vinode,
                                   const char *name, const void *value,
                                   size_t size, int flags)