]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to f38382c574 bcachefs: Improve key marking interface
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 24 May 2019 15:57:29 +0000 (11:57 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 24 May 2019 15:58:42 +0000 (11:58 -0400)
42 files changed:
.bcachefs_revision
cmd_debug.c
include/trace/events/bcachefs.h
libbcachefs/acl.c
libbcachefs/alloc_background.c
libbcachefs/bcachefs.h
libbcachefs/bkey_methods.c
libbcachefs/btree_cache.c
libbcachefs/btree_cache.h
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/buckets_types.h
libbcachefs/checksum.c
libbcachefs/checksum.h
libbcachefs/debug.c
libbcachefs/dirent.c
libbcachefs/ec.c
libbcachefs/extents.c
libbcachefs/extents.h
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/journal_seq_blacklist.c
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/quota.c
libbcachefs/recovery.c
libbcachefs/sysfs.c
libbcachefs/tests.c
libbcachefs/xattr.c

index def87375399990b1c6d54ac8a8da8cffb86c38fc..713b43465ec5349868852cf18f2fbcb2be0a9768 100644 (file)
@@ -1 +1 @@
-454bd4f82d85bb42a86b8eb0172b13e86e5788a7
+f38382c5747090ac9160e6d5fa1386954cb1f23c
index 637da1c51fed8e71aae908850c018e767f6181d0..808226d9a352477ced91ad022bbaaf48680c1c84 100644 (file)
@@ -63,7 +63,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
                struct btree_iter *iter;
                struct btree *b;
 
-               bch2_trans_init(&trans, c);
+               bch2_trans_init(&trans, c, 0, 0);
 
                for_each_btree_node(&trans, iter, i, POS_MIN, 0, b) {
                        struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
@@ -160,7 +160,7 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
        char buf[512];
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, btree_id, start,
                           BTREE_ITER_PREFETCH, k, ret) {
@@ -181,7 +181,7 @@ static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id,
        struct btree *b;
        char buf[4096];
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
                if (bkey_cmp(b->key.k.p, end) > 0)
@@ -204,7 +204,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
        struct btree *b;
        char buf[4096];
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
                if (bkey_cmp(b->key.k.p, end) > 0)
index 5fa570a516a70ca8928da22f43d2282ded0b4b8f..76673d9ab5bc00d1a8353a0679b4d0b5d8a7b825 100644 (file)
@@ -499,16 +499,14 @@ TRACE_EVENT(copygc,
 );
 
 DECLARE_EVENT_CLASS(transaction_restart,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip),
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip),
 
        TP_STRUCT__entry(
-               __array(char,                   name,   16)
                __field(unsigned long,          ip      )
        ),
 
        TP_fast_assign(
-               memcpy(__entry->name, c->name, 16);
                __entry->ip = ip;
        ),
 
@@ -516,58 +514,130 @@ DECLARE_EVENT_CLASS(transaction_restart,
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_reused,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_would_deadlock,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
-DEFINE_EVENT(transaction_restart,      trans_restart_iters_realloced,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+TRACE_EVENT(trans_restart_iters_realloced,
+       TP_PROTO(unsigned long ip, unsigned nr),
+       TP_ARGS(ip, nr),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          ip      )
+               __field(unsigned,               nr      )
+       ),
+
+       TP_fast_assign(
+               __entry->ip     = ip;
+               __entry->nr     = nr;
+       ),
+
+       TP_printk("%pf nr %u", (void *) __entry->ip, __entry->nr)
 );
 
-DEFINE_EVENT(transaction_restart,      trans_restart_mem_realloced,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+TRACE_EVENT(trans_restart_mem_realloced,
+       TP_PROTO(unsigned long ip, unsigned long bytes),
+       TP_ARGS(ip, bytes),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          ip      )
+               __field(unsigned long,          bytes   )
+       ),
+
+       TP_fast_assign(
+               __entry->ip     = ip;
+               __entry->bytes  = bytes;
+       ),
+
+       TP_printk("%pf bytes %lu", (void *) __entry->ip, __entry->bytes)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_journal_res_get,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_journal_preres_get,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_mark_replicas,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_fault_inject,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_split,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_mark,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_upgrade,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_iter_upgrade,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_traverse,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
 );
 
 DEFINE_EVENT(transaction_restart,      trans_restart_atomic,
-       TP_PROTO(struct bch_fs *c, unsigned long ip),
-       TP_ARGS(c, ip)
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DECLARE_EVENT_CLASS(node_lock_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq),
+
+       TP_STRUCT__entry(
+               __field(u32,            level)
+               __field(u32,            iter_seq)
+               __field(u32,            node)
+               __field(u32,            node_seq)
+       ),
+
+       TP_fast_assign(
+               __entry->level          = level;
+               __entry->iter_seq       = iter_seq;
+               __entry->node           = node;
+               __entry->node_seq       = node_seq;
+       ),
+
+       TP_printk("level %u iter seq %u node %u node seq %u",
+                 __entry->level, __entry->iter_seq,
+                 __entry->node, __entry->node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_relock_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq)
 );
 
 #endif /* _TRACE_BCACHE_H */
index e1c7b87d61897364a66757bf74069c382ea7cf50..cdcccaad916d1b228f674c2123e4af1008a09181 100644 (file)
@@ -220,7 +220,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
        struct bkey_s_c_xattr xattr;
        struct posix_acl *acl = NULL;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
 
@@ -301,7 +301,7 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
        int ret;
 
        mutex_lock(&inode->ei_update_lock);
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        if (type == ACL_TYPE_ACCESS && acl) {
                ret = posix_acl_update_mode(&inode->v, &mode, &acl);
index a61b25cc719c6cf26ede177fc936df259ad10f60..744addb0019d3c12539cd173cfe960b455d07730 100644 (file)
@@ -228,10 +228,12 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
        unsigned i;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret)
-               bch2_mark_key(c, k, true, 0, NULL, 0, 0);
+               bch2_mark_key(c, k, 0, NULL, 0,
+                             BCH_BUCKET_MARK_ALLOC_READ|
+                             BCH_BUCKET_MARK_NOATOMIC);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret) {
@@ -241,8 +243,9 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
 
        for_each_journal_key(*journal_keys, j)
                if (j->btree_id == BTREE_ID_ALLOC)
-                       bch2_mark_key(c, bkey_i_to_s_c(j->k),
-                                     true, 0, NULL, 0, 0);
+                       bch2_mark_key(c, bkey_i_to_s_c(j->k), 0, NULL, 0,
+                                     BCH_BUCKET_MARK_ALLOC_READ|
+                                     BCH_BUCKET_MARK_NOATOMIC);
 
        percpu_down_write(&c->mark_lock);
        bch2_dev_usage_from_buckets(c);
@@ -283,7 +286,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
        if (k->k.p.offset >= ca->mi.nbuckets)
                return 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
                                   BTREE_ITER_INTENT);
@@ -328,7 +331,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
 
        BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
@@ -948,6 +951,7 @@ retry:
                                BTREE_INSERT_NOFAIL|
                                BTREE_INSERT_USE_RESERVE|
                                BTREE_INSERT_USE_ALLOC_RESERVE|
+                               BTREE_INSERT_BUCKET_INVALIDATE|
                                flags);
        if (ret == -EINTR)
                goto retry;
@@ -1027,7 +1031,7 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
        u64 journal_seq = 0;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
                                   POS(ca->dev_idx, 0),
index d6dc3bd457d33c5c36e7734df717d15fb69b6d7b..09afbed9511f09c8aff91663abf1fa978e70352e 100644 (file)
@@ -842,4 +842,9 @@ static inline s64 bch2_current_time(struct bch_fs *c)
        return timespec_to_bch2_time(c, now);
 }
 
+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
+{
+       return dev < c->sb.nr_devices && c->devs[dev];
+}
+
 #endif /* _BCACHEFS_H */
index 48c86e52e058b6928a4a7e12ebff58ee21121295..711bc88fd95f074510233d528aaca80693dc3b8b 100644 (file)
@@ -201,15 +201,20 @@ enum merge_result bch2_bkey_merge(struct bch_fs *c,
                                  struct bkey_i *l, struct bkey_i *r)
 {
        const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type];
+       enum merge_result ret;
 
-       if (!key_merging_disabled(c) &&
-           ops->key_merge &&
-           l->k.type == r->k.type &&
-           !bversion_cmp(l->k.version, r->k.version) &&
-           !bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
-               return ops->key_merge(c, l, r);
+       if (key_merging_disabled(c) ||
+           !ops->key_merge ||
+           l->k.type != r->k.type ||
+           bversion_cmp(l->k.version, r->k.version) ||
+           bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
+               return BCH_MERGE_NOMERGE;
 
-       return BCH_MERGE_NOMERGE;
+       ret = ops->key_merge(c, l, r);
+
+       if (ret != BCH_MERGE_NOMERGE)
+               l->k.needs_whiteout |= r->k.needs_whiteout;
+       return ret;
 }
 
 static const struct old_bkey_type {
index 2a20bdef8fd090c624ef6548d10b40f26d0f8033..587a04f56b84e6d3c6a2851efdabba831ee06784 100644 (file)
@@ -652,8 +652,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
  */
 struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
                                  const struct bkey_i *k, unsigned level,
-                                 enum six_lock_type lock_type,
-                                 bool may_drop_locks)
+                                 enum six_lock_type lock_type)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
@@ -720,8 +719,7 @@ retry:
                if (btree_node_read_locked(iter, level + 1))
                        btree_node_unlock(iter, level + 1);
 
-               if (!btree_node_lock(b, k->k.p, level, iter,
-                                    lock_type, may_drop_locks))
+               if (!btree_node_lock(b, k->k.p, level, iter, lock_type))
                        return ERR_PTR(-EINTR);
 
                if (unlikely(PTR_HASH(&b->key) != PTR_HASH(k) ||
@@ -731,9 +729,7 @@ retry:
                        if (bch2_btree_node_relock(iter, level + 1))
                                goto retry;
 
-                       trans_restart();
-                       trace_trans_restart_btree_node_reused(c,
-                                               iter->trans->ip);
+                       trace_trans_restart_btree_node_reused(iter->trans->ip);
                        return ERR_PTR(-EINTR);
                }
        }
@@ -770,9 +766,9 @@ retry:
 struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                                          struct btree_iter *iter,
                                          struct btree *b,
-                                         bool may_drop_locks,
                                          enum btree_node_sibling sib)
 {
+       struct btree_trans *trans = iter->trans;
        struct btree *parent;
        struct btree_node_iter node_iter;
        struct bkey_packed *k;
@@ -784,8 +780,10 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
        if (!parent)
                return NULL;
 
-       if (!bch2_btree_node_relock(iter, level + 1))
-               goto out_upgrade;
+       if (!bch2_btree_node_relock(iter, level + 1)) {
+               ret = ERR_PTR(-EINTR);
+               goto out;
+       }
 
        node_iter = iter->l[parent->level].iter;
 
@@ -802,19 +800,19 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
        bch2_bkey_unpack(parent, &tmp.k, k);
 
        ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-                                 SIX_LOCK_intent, may_drop_locks);
+                                 SIX_LOCK_intent);
 
-       if (PTR_ERR_OR_ZERO(ret) == -EINTR && may_drop_locks) {
+       if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
                struct btree_iter *linked;
 
                if (!bch2_btree_node_relock(iter, level + 1))
-                       goto out_upgrade;
+                       goto out;
 
                /*
                 * We might have got -EINTR because trylock failed, and we're
                 * holding other locks that would cause us to deadlock:
                 */
-               trans_for_each_iter(iter->trans, linked)
+               trans_for_each_iter(trans, linked)
                        if (btree_iter_cmp(iter, linked) < 0)
                                __bch2_btree_iter_unlock(linked);
 
@@ -822,7 +820,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                        btree_node_unlock(iter, level);
 
                ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-                                         SIX_LOCK_intent, may_drop_locks);
+                                         SIX_LOCK_intent);
 
                /*
                 * before btree_iter_relock() calls btree_iter_verify_locks():
@@ -839,17 +837,16 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                        }
                }
 
-               bch2_btree_trans_relock(iter->trans);
+               bch2_trans_relock(trans);
        }
 out:
        if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
                btree_node_unlock(iter, level + 1);
 
-       bch2_btree_trans_verify_locks(iter->trans);
+       if (PTR_ERR_OR_ZERO(ret) == -EINTR)
+               bch2_btree_iter_upgrade(iter, level + 2);
 
-       BUG_ON((!may_drop_locks || !IS_ERR(ret)) &&
-              (iter->uptodate >= BTREE_ITER_NEED_RELOCK ||
-               !btree_node_locked(iter, level)));
+       BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
 
        if (!IS_ERR_OR_NULL(ret)) {
                struct btree *n1 = ret, *n2 = b;
@@ -862,12 +859,9 @@ out:
                                n2->data->min_key));
        }
 
+       bch2_btree_trans_verify_locks(trans);
+
        return ret;
-out_upgrade:
-       if (may_drop_locks)
-               bch2_btree_iter_upgrade(iter, level + 2, true);
-       ret = ERR_PTR(-EINTR);
-       goto out;
 }
 
 void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
index 08e6f2a68ecbc684aade46c114f15169b78f47ae..19e14d32cf2d2f2ecab405e71a3e5004bbf6b0c3 100644 (file)
@@ -22,11 +22,10 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
 
 struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
                                  const struct bkey_i *, unsigned,
-                                 enum six_lock_type, bool);
+                                 enum six_lock_type);
 
 struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
-                                         struct btree *, bool,
-                                         enum btree_node_sibling);
+                               struct btree *, enum btree_node_sibling);
 
 void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
                              const struct bkey_i *, unsigned);
index 9f0de5cd25ab31093f5fc321a8f04ef561110ab9..c2b893a9cb0f31173777ee3d08c0fe75516f5afd 100644 (file)
@@ -170,7 +170,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
                *max_stale = max(*max_stale, ptr_stale(ca, ptr));
        }
 
-       bch2_mark_key(c, k, true, k.k->size, NULL, 0, flags);
+       bch2_mark_key(c, k, k.k->size, NULL, 0, flags);
 fsck_err:
        return ret;
 }
@@ -214,7 +214,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
        u8 max_stale;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
 
@@ -283,7 +283,7 @@ static int mark_journal_key(struct bch_fs *c, enum btree_id id,
        if (ret)
                return ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k),
                           BTREE_ITER_SLOTS, k, ret) {
@@ -422,8 +422,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
 
        for_each_pending_btree_node_free(c, as, d)
                if (d->index_update_done)
-                       bch2_mark_key(c, bkey_i_to_s_c(&d->key),
-                                     true, 0, NULL, 0,
+                       bch2_mark_key(c, bkey_i_to_s_c(&d->key), 0, NULL, 0,
                                      BCH_BUCKET_MARK_GC);
 
        mutex_unlock(&c->btree_interior_update_lock);
@@ -1057,7 +1056,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
        struct btree *merge[GC_MERGE_NODES];
        u32 lock_seq[GC_MERGE_NODES];
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        /*
         * XXX: We don't have a good way of positively matching on sibling nodes
index fe888c57522a28a35cb5f78221409ad16647b956..8b7e05ed066af7abe84c98ba4c5a43f5163f3757 100644 (file)
@@ -1151,7 +1151,7 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
        struct btree_iter *iter;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_node_iter(&trans, b->btree_id, b->key.k.p,
                                        BTREE_MAX_DEPTH, b->level, 0);
index 5631f98f38456df739b8e28852da2b9e212c694b..e78c6cadeb9657ef27c275e1f5e694bc5cd3c627 100644 (file)
@@ -14,13 +14,18 @@ static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *,
                                                    struct btree_iter_level *,
                                                    struct bkey *);
 
-#define BTREE_ITER_NOT_END     ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_GET_LOCKS   ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_DROP                ((struct btree *) 2)
+#define BTREE_ITER_NO_NODE_LOCK_ROOT   ((struct btree *) 3)
+#define BTREE_ITER_NO_NODE_UP          ((struct btree *) 4)
+#define BTREE_ITER_NO_NODE_DOWN                ((struct btree *) 5)
+#define BTREE_ITER_NO_NODE_INIT                ((struct btree *) 6)
+#define BTREE_ITER_NO_NODE_ERROR       ((struct btree *) 7)
 
 static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
 {
        return l < BTREE_MAX_DEPTH &&
-               iter->l[l].b &&
-               iter->l[l].b != BTREE_ITER_NOT_END;
+               (unsigned long) iter->l[l].b >= 128;
 }
 
 /* Returns < 0 if @k is before iter pos, > 0 if @k is after */
@@ -105,19 +110,20 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
        struct btree *b = btree_iter_node(iter, level);
        int want = __btree_lock_want(iter, level);
 
-       if (!b || b == BTREE_ITER_NOT_END)
+       if (!is_btree_node(iter, level))
                return false;
 
        if (race_fault())
                return false;
 
-       if (!six_relock_type(&b->lock, want, iter->l[level].lock_seq) &&
-           !(iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
-             btree_node_lock_increment(iter, b, level, want)))
+       if (six_relock_type(&b->lock, want, iter->l[level].lock_seq) ||
+           (btree_node_lock_seq_matches(iter, b, level) &&
+            btree_node_lock_increment(iter, b, level, want))) {
+               mark_btree_node_locked(iter, level, want);
+               return true;
+       } else {
                return false;
-
-       mark_btree_node_locked(iter, level, want);
-       return true;
+       }
 }
 
 static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
@@ -140,7 +146,7 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
            : six_relock_type(&b->lock, SIX_LOCK_intent, iter->l[level].lock_seq))
                goto success;
 
-       if (iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
+       if (btree_node_lock_seq_matches(iter, b, level) &&
            btree_node_lock_increment(iter, b, level, BTREE_NODE_INTENT_LOCKED)) {
                btree_node_unlock(iter, level);
                goto success;
@@ -153,7 +159,7 @@ success:
 }
 
 static inline bool btree_iter_get_locks(struct btree_iter *iter,
-                                       bool upgrade)
+                                       bool upgrade, bool trace)
 {
        unsigned l = iter->level;
        int fail_idx = -1;
@@ -165,6 +171,17 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
                if (!(upgrade
                      ? bch2_btree_node_upgrade(iter, l)
                      : bch2_btree_node_relock(iter, l))) {
+                       if (trace)
+                               (upgrade
+                                ? trace_node_upgrade_fail
+                                : trace_node_relock_fail)(l, iter->l[l].lock_seq,
+                                               is_btree_node(iter, l)
+                                               ? 0
+                                               : (unsigned long) iter->l[l].b,
+                                               is_btree_node(iter, l)
+                                               ? iter->l[l].b->lock.state.seq
+                                               : 0);
+
                        fail_idx = l;
                        btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
                }
@@ -179,7 +196,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
         */
        while (fail_idx >= 0) {
                btree_node_unlock(iter, fail_idx);
-               iter->l[fail_idx].b = BTREE_ITER_NOT_END;
+               iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
                --fail_idx;
        }
 
@@ -195,8 +212,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
 bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                           unsigned level,
                           struct btree_iter *iter,
-                          enum six_lock_type type,
-                          bool may_drop_locks)
+                          enum six_lock_type type)
 {
        struct btree_iter *linked;
        bool ret = true;
@@ -224,11 +240,11 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                 */
                if (type == SIX_LOCK_intent &&
                    linked->nodes_locked != linked->nodes_intent_locked) {
-                       if (may_drop_locks) {
+                       if (!(iter->trans->nounlock)) {
                                linked->locks_want = max_t(unsigned,
                                                linked->locks_want,
                                                __fls(linked->nodes_locked) + 1);
-                               btree_iter_get_locks(linked, true);
+                               btree_iter_get_locks(linked, true, false);
                        }
                        ret = false;
                }
@@ -240,21 +256,19 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                 */
                if (linked->btree_id == iter->btree_id &&
                    level > __fls(linked->nodes_locked)) {
-                       if (may_drop_locks) {
+                       if (!(iter->trans->nounlock)) {
                                linked->locks_want =
                                        max(level + 1, max_t(unsigned,
                                            linked->locks_want,
                                            iter->locks_want));
-                               btree_iter_get_locks(linked, true);
+                               btree_iter_get_locks(linked, true, false);
                        }
                        ret = false;
                }
        }
 
        if (unlikely(!ret)) {
-               trans_restart();
-               trace_trans_restart_would_deadlock(iter->trans->c,
-                                                  iter->trans->ip);
+               trace_trans_restart_would_deadlock(iter->trans->ip);
                return false;
        }
 
@@ -269,9 +283,6 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter)
 {
        unsigned l;
 
-       BUG_ON((iter->flags & BTREE_ITER_NOUNLOCK) &&
-              !btree_node_locked(iter, 0));
-
        for (l = 0; btree_iter_node(iter, l); l++) {
                if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
                    !btree_node_locked(iter, l))
@@ -292,10 +303,10 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans)
 #endif
 
 __flatten
-static bool bch2_btree_iter_relock(struct btree_iter *iter)
+static bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
 {
        return iter->uptodate >= BTREE_ITER_NEED_RELOCK
-               ? btree_iter_get_locks(iter, false)
+               ? btree_iter_get_locks(iter, false, trace)
                : true;
 }
 
@@ -308,7 +319,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
 
        iter->locks_want = new_locks_want;
 
-       if (btree_iter_get_locks(iter, true))
+       if (btree_iter_get_locks(iter, true, true))
                return true;
 
        /*
@@ -319,10 +330,9 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
        trans_for_each_iter(iter->trans, linked)
                if (linked != iter &&
                    linked->btree_id == iter->btree_id &&
-                   btree_iter_cmp(linked, iter) <= 0 &&
                    linked->locks_want < new_locks_want) {
                        linked->locks_want = new_locks_want;
-                       btree_iter_get_locks(linked, true);
+                       btree_iter_get_locks(linked, true, false);
                }
 
        return false;
@@ -389,28 +399,21 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter,
        bch2_btree_trans_verify_locks(iter->trans);
 }
 
-int bch2_btree_iter_unlock(struct btree_iter *iter)
-{
-       struct btree_iter *linked;
-
-       trans_for_each_iter(iter->trans, linked)
-               __bch2_btree_iter_unlock(linked);
-
-       return btree_iter_err(iter);
-}
+/* Btree transaction locking: */
 
-bool bch2_btree_trans_relock(struct btree_trans *trans)
+bool bch2_trans_relock(struct btree_trans *trans)
 {
        struct btree_iter *iter;
        bool ret = true;
 
        trans_for_each_iter(trans, iter)
-               ret &= bch2_btree_iter_relock(iter);
+               if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
+                       ret &= bch2_btree_iter_relock(iter, true);
 
        return ret;
 }
 
-void bch2_btree_trans_unlock(struct btree_trans *trans)
+void bch2_trans_unlock(struct btree_trans *trans)
 {
        struct btree_iter *iter;
 
@@ -418,8 +421,6 @@ void bch2_btree_trans_unlock(struct btree_trans *trans)
                __bch2_btree_iter_unlock(iter);
 }
 
-/* Btree transaction locking: */
-
 /* Btree iterator: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -824,7 +825,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
        trans_for_each_iter(iter->trans, linked)
                if (linked->l[level].b == b) {
                        __btree_node_unlock(linked, level);
-                       linked->l[level].b = BTREE_ITER_NOT_END;
+                       linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
                }
 }
 
@@ -862,26 +863,28 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
                         * that depth
                         */
                        iter->level = depth_want;
-                       iter->l[iter->level].b = NULL;
+                       for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
+                               iter->l[i].b = NULL;
                        return 1;
                }
 
                lock_type = __btree_lock_want(iter, iter->level);
                if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
-                                             iter, lock_type, true)))
+                                             iter, lock_type)))
                        return -EINTR;
 
                if (likely(b == c->btree_roots[iter->btree_id].b &&
                           b->level == iter->level &&
                           !race_fault())) {
                        for (i = 0; i < iter->level; i++)
-                               iter->l[i].b = BTREE_ITER_NOT_END;
+                               iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
                        iter->l[iter->level].b = b;
+                       for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
+                               iter->l[i].b = NULL;
 
                        mark_btree_node_locked(iter, iter->level, lock_type);
                        btree_iter_node_set(iter, b);
                        return 0;
-
                }
 
                six_unlock_type(&b->lock, lock_type);
@@ -932,7 +935,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
        bch2_bkey_unpack(l->b, &tmp.k,
                         bch2_btree_node_iter_peek(&l->iter, l->b));
 
-       b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, true);
+       b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type);
        if (unlikely(IS_ERR(b)))
                return PTR_ERR(b);
 
@@ -971,7 +974,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans,
 #undef btree_iter_cmp_by_idx
 
 retry_all:
-       bch2_btree_trans_unlock(trans);
+       bch2_trans_unlock(trans);
 
        if (unlikely(ret == -ENOMEM)) {
                struct closure cl;
@@ -987,7 +990,7 @@ retry_all:
        if (unlikely(ret == -EIO)) {
                trans->error = true;
                iter->flags |= BTREE_ITER_ERROR;
-               iter->l[iter->level].b = BTREE_ITER_NOT_END;
+               iter->l[iter->level].b = BTREE_ITER_NO_NODE_ERROR;
                goto out;
        }
 
@@ -1022,12 +1025,12 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
        unsigned l = iter->level;
 
        while (btree_iter_node(iter, l) &&
-              !(is_btree_node(iter, l) &&
-                bch2_btree_node_relock(iter, l) &&
-                (!check_pos ||
-                 btree_iter_pos_in_node(iter, iter->l[l].b)))) {
+              (!is_btree_node(iter, l) ||
+               !bch2_btree_node_relock(iter, l) ||
+                (check_pos &&
+                 !btree_iter_pos_in_node(iter, iter->l[l].b)))) {
                btree_node_unlock(iter, l);
-               iter->l[l].b = BTREE_ITER_NOT_END;
+               iter->l[l].b = BTREE_ITER_NO_NODE_UP;
                l++;
        }
 
@@ -1041,7 +1044,7 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
  * Returns 0 on success, -EIO on error (error reading in a btree node).
  *
  * On error, caller (peek_node()/peek_key()) must return NULL; the error is
- * stashed in the iterator and returned from bch2_btree_iter_unlock().
+ * stashed in the iterator and returned from bch2_trans_exit().
  */
 int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
 {
@@ -1050,7 +1053,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
        if (unlikely(iter->level >= BTREE_MAX_DEPTH))
                return 0;
 
-       if (bch2_btree_iter_relock(iter))
+       if (bch2_btree_iter_relock(iter, false))
                return 0;
 
        /*
@@ -1083,7 +1086,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
                                return 0;
 
                        iter->level = depth_want;
-                       iter->l[iter->level].b = BTREE_ITER_NOT_END;
+                       iter->l[iter->level].b = BTREE_ITER_NO_NODE_DOWN;
                        return ret;
                }
        }
@@ -1099,7 +1102,8 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
 {
        int ret;
 
-       ret = __bch2_btree_iter_traverse(iter);
+       ret =   bch2_trans_cond_resched(iter->trans) ?:
+               __bch2_btree_iter_traverse(iter);
        if (unlikely(ret))
                ret = __btree_iter_traverse_all(iter->trans, iter, ret);
 
@@ -1111,7 +1115,7 @@ static inline void bch2_btree_iter_checks(struct btree_iter *iter,
 {
        EBUG_ON(iter->btree_id >= BTREE_ID_NR);
        EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
-               (iter->btree_id == BTREE_ID_EXTENTS &&
+               (btree_node_type_is_extents(iter->btree_id) &&
                 type != BTREE_ITER_NODES));
 
        bch2_btree_trans_verify_locks(iter->trans);
@@ -1291,9 +1295,11 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                return btree_iter_peek_uptodate(iter);
 
        while (1) {
-               ret = bch2_btree_iter_traverse(iter);
-               if (unlikely(ret))
-                       return bkey_s_c_err(ret);
+               if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
+                       ret = bch2_btree_iter_traverse(iter);
+                       if (unlikely(ret))
+                               return bkey_s_c_err(ret);
+               }
 
                k = __btree_iter_peek(iter, l);
                if (likely(k.k))
@@ -1345,10 +1351,17 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
 
        bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
 
+       iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
+
        if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
-               k = bch2_btree_iter_peek(iter);
-               if (IS_ERR_OR_NULL(k.k))
-                       return k;
+               /*
+                * XXX: when we just need to relock we should be able to avoid
+                * calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
+                * for that to work
+                */
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+
+               return bch2_btree_iter_peek(iter);
        }
 
        do {
@@ -1548,9 +1561,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
        if (iter->uptodate == BTREE_ITER_UPTODATE)
                return btree_iter_peek_uptodate(iter);
 
-       ret = bch2_btree_iter_traverse(iter);
-       if (unlikely(ret))
-               return bkey_s_c_err(ret);
+       if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
+               ret = bch2_btree_iter_traverse(iter);
+               if (unlikely(ret))
+                       return bkey_s_c_err(ret);
+       }
 
        return __bch2_btree_iter_peek_slot(iter);
 }
@@ -1587,7 +1602,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        unsigned i;
 
-       if (btree_id == BTREE_ID_EXTENTS &&
+       if (btree_node_type_is_extents(btree_id) &&
            !(flags & BTREE_ITER_NODES))
                flags |= BTREE_ITER_IS_EXTENTS;
 
@@ -1604,7 +1619,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
        iter->nodes_intent_locked       = 0;
        for (i = 0; i < ARRAY_SIZE(iter->l); i++)
                iter->l[i].b            = NULL;
-       iter->l[iter->level].b          = BTREE_ITER_NOT_END;
+       iter->l[iter->level].b          = BTREE_ITER_NO_NODE_INIT;
 
        prefetch(c->btree_roots[btree_id].b);
 }
@@ -1649,11 +1664,13 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
        return ret;
 }
 
-static int btree_trans_realloc_iters(struct btree_trans *trans,
-                                    unsigned new_size)
+static int bch2_trans_realloc_iters(struct btree_trans *trans,
+                                   unsigned new_size)
 {
        void *new_iters, *new_updates;
 
+       new_size = roundup_pow_of_two(new_size);
+
        BUG_ON(new_size > BTREE_ITER_MAX);
 
        if (new_size <= trans->size)
@@ -1694,19 +1711,13 @@ success:
        trans->size     = new_size;
 
        if (trans->iters_live) {
-               trans_restart();
-               trace_trans_restart_iters_realloced(trans->c, trans->ip);
+               trace_trans_restart_iters_realloced(trans->ip, trans->size);
                return -EINTR;
        }
 
        return 0;
 }
 
-void bch2_trans_preload_iters(struct btree_trans *trans)
-{
-       btree_trans_realloc_iters(trans, BTREE_ITER_MAX);
-}
-
 static int btree_trans_iter_alloc(struct btree_trans *trans)
 {
        unsigned idx = __ffs64(~trans->iters_linked);
@@ -1715,7 +1726,7 @@ static int btree_trans_iter_alloc(struct btree_trans *trans)
                goto got_slot;
 
        if (trans->nr_iters == trans->size) {
-               int ret = btree_trans_realloc_iters(trans, trans->size * 2);
+               int ret = bch2_trans_realloc_iters(trans, trans->size * 2);
                if (ret)
                        return ret;
        }
@@ -1812,7 +1823,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
 
        for (i = 0; i < ARRAY_SIZE(iter->l); i++)
                iter->l[i].b            = NULL;
-       iter->l[iter->level].b          = BTREE_ITER_NOT_END;
+       iter->l[iter->level].b          = BTREE_ITER_NO_NODE_INIT;
 
        return iter;
 }
@@ -1845,50 +1856,40 @@ struct btree_iter *bch2_trans_copy_iter(struct btree_trans *trans,
        return &trans->iters[idx];
 }
 
-void *bch2_trans_kmalloc(struct btree_trans *trans,
-                        size_t size)
+static int bch2_trans_preload_mem(struct btree_trans *trans, size_t size)
 {
-       void *ret;
-
-       if (trans->mem_top + size > trans->mem_bytes) {
+       if (size > trans->mem_bytes) {
                size_t old_bytes = trans->mem_bytes;
-               size_t new_bytes = roundup_pow_of_two(trans->mem_top + size);
+               size_t new_bytes = roundup_pow_of_two(size);
                void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
 
                if (!new_mem)
-                       return ERR_PTR(-ENOMEM);
+                       return -ENOMEM;
 
                trans->mem = new_mem;
                trans->mem_bytes = new_bytes;
 
                if (old_bytes) {
-                       trans_restart();
-                       trace_trans_restart_mem_realloced(trans->c, trans->ip);
-                       return ERR_PTR(-EINTR);
+                       trace_trans_restart_mem_realloced(trans->ip, new_bytes);
+                       return -EINTR;
                }
        }
 
-       ret = trans->mem + trans->mem_top;
-       trans->mem_top += size;
-       return ret;
+       return 0;
 }
 
-int bch2_trans_unlock(struct btree_trans *trans)
+void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
 {
-       u64 iters = trans->iters_linked;
-       int ret = 0;
-
-       while (iters) {
-               unsigned idx = __ffs64(iters);
-               struct btree_iter *iter = &trans->iters[idx];
-
-               ret = ret ?: btree_iter_err(iter);
+       void *p;
+       int ret;
 
-               __bch2_btree_iter_unlock(iter);
-               iters ^= 1ULL << idx;
-       }
+       ret = bch2_trans_preload_mem(trans, trans->mem_top + size);
+       if (ret)
+               return ERR_PTR(ret);
 
-       return ret;
+       p = trans->mem + trans->mem_top;
+       trans->mem_top += size;
+       return p;
 }
 
 inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters)
@@ -1904,7 +1905,7 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters)
        }
 }
 
-void __bch2_trans_begin(struct btree_trans *trans)
+void bch2_trans_begin(struct btree_trans *trans)
 {
        u64 iters_to_unlink;
 
@@ -1935,7 +1936,9 @@ void __bch2_trans_begin(struct btree_trans *trans)
        bch2_btree_iter_traverse_all(trans);
 }
 
-void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
+void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
+                    unsigned expected_nr_iters,
+                    size_t expected_mem_bytes)
 {
        memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
 
@@ -1944,12 +1947,20 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
        trans->size             = ARRAY_SIZE(trans->iters_onstack);
        trans->iters            = trans->iters_onstack;
        trans->updates          = trans->updates_onstack;
+       trans->fs_usage_deltas  = NULL;
+
+       if (expected_nr_iters > trans->size)
+               bch2_trans_realloc_iters(trans, expected_nr_iters);
+
+       if (expected_mem_bytes)
+               bch2_trans_preload_mem(trans, expected_mem_bytes);
 }
 
 int bch2_trans_exit(struct btree_trans *trans)
 {
        bch2_trans_unlock(trans);
 
+       kfree(trans->fs_usage_deltas);
        kfree(trans->mem);
        if (trans->used_mempool)
                mempool_free(trans->iters, &trans->c->btree_iters_pool);
index a46a6a4ed5eecbd04fc1b7857e7ea03b6a1bf0bd..177cc314f3581f8e30eee6b33644b15d2cd25277 100644 (file)
@@ -18,6 +18,19 @@ static inline struct btree *btree_iter_node(struct btree_iter *iter,
        return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
 }
 
+static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
+                                       const struct btree *b, unsigned level)
+{
+       /*
+        * We don't compare the low bits of the lock sequence numbers because
+        * @iter might have taken a write lock on @b, and we don't want to skip
+        * the linked iterator if the sequence numbers were equal before taking
+        * that write lock. The lock sequence number is incremented by taking
+        * and releasing write locks and is even when unlocked:
+        */
+       return iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1;
+}
+
 static inline struct btree *btree_node_parent(struct btree_iter *iter,
                                              struct btree *b)
 {
@@ -56,30 +69,20 @@ __trans_next_iter(struct btree_trans *trans, unsigned idx)
 static inline bool __iter_has_node(const struct btree_iter *iter,
                                   const struct btree *b)
 {
-       /*
-        * We don't compare the low bits of the lock sequence numbers because
-        * @iter might have taken a write lock on @b, and we don't want to skip
-        * the linked iterator if the sequence numbers were equal before taking
-        * that write lock. The lock sequence number is incremented by taking
-        * and releasing write locks and is even when unlocked:
-        */
-
        return iter->l[b->level].b == b &&
-               iter->l[b->level].lock_seq >> 1 == b->lock.state.seq >> 1;
+               btree_node_lock_seq_matches(iter, b, b->level);
 }
 
 static inline struct btree_iter *
 __trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
                            unsigned idx)
 {
-       EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
+       struct btree_iter *iter = __trans_next_iter(trans, idx);
 
-       for (; idx < trans->nr_iters; idx++)
-               if ((trans->iters_linked & (1ULL << idx)) &&
-                   __iter_has_node(&trans->iters[idx], b))
-                       return &trans->iters[idx];
+       while (iter && !__iter_has_node(iter, b))
+               iter = __trans_next_iter(trans, iter->idx + 1);
 
-       return NULL;
+       return iter;
 }
 
 #define trans_for_each_iter_with_node(_trans, _b, _iter)               \
@@ -101,22 +104,19 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
                              struct btree_node_iter *, struct bkey_packed *,
                              unsigned, unsigned);
 
-int bch2_btree_iter_unlock(struct btree_iter *);
-
-bool bch2_btree_trans_relock(struct btree_trans *);
-void bch2_btree_trans_unlock(struct btree_trans *);
+bool bch2_trans_relock(struct btree_trans *);
+void bch2_trans_unlock(struct btree_trans *);
 
 bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
 bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
 
 static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
-                                          unsigned new_locks_want,
-                                          bool may_drop_locks)
+                                          unsigned new_locks_want)
 {
        new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
 
        return iter->locks_want < new_locks_want
-               ? (may_drop_locks
+               ? (!iter->trans->nounlock
                   ? __bch2_btree_iter_upgrade(iter, new_locks_want)
                   : __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
                : iter->uptodate <= BTREE_ITER_NEED_PEEK;
@@ -157,7 +157,7 @@ static inline struct bpos btree_type_successor(enum btree_id id,
        if (id == BTREE_ID_INODES) {
                pos.inode++;
                pos.offset = 0;
-       } else if (id != BTREE_ID_EXTENTS) {
+       } else if (!btree_node_type_is_extents(id)) {
                pos = bkey_successor(pos);
        }
 
@@ -170,7 +170,7 @@ static inline struct bpos btree_type_predecessor(enum btree_id id,
        if (id == BTREE_ID_INODES) {
                --pos.inode;
                pos.offset = 0;
-       } else /* if (id != BTREE_ID_EXTENTS) */ {
+       } else {
                pos = bkey_predecessor(pos);
        }
 
@@ -192,19 +192,18 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
        return __btree_iter_cmp(l->btree_id, l->pos, r);
 }
 
-int bch2_trans_unlock(struct btree_trans *);
-
 /*
  * Unlocks before scheduling
  * Note: does not revalidate iterator
  */
-static inline void bch2_trans_cond_resched(struct btree_trans *trans)
+static inline int bch2_trans_cond_resched(struct btree_trans *trans)
 {
-       if (need_resched()) {
+       if (need_resched() || race_fault()) {
                bch2_trans_unlock(trans);
                schedule();
-       } else if (race_fault()) {
-               bch2_trans_unlock(trans);
+               return bch2_trans_relock(trans) ? 0 : -EINTR;
+       } else {
+               return 0;
        }
 }
 
@@ -232,8 +231,6 @@ static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
 static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
                                                     unsigned flags)
 {
-       bch2_trans_cond_resched(iter->trans);
-
        return flags & BTREE_ITER_SLOTS
                ? bch2_btree_iter_next_slot(iter)
                : bch2_btree_iter_next(iter);
@@ -262,7 +259,6 @@ static inline int bkey_err(struct bkey_s_c k)
 
 /* new multiple iterator interface: */
 
-void bch2_trans_preload_iters(struct btree_trans *);
 int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
 int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
 int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *);
@@ -297,7 +293,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
                                enum btree_id, struct bpos,
                                unsigned, unsigned, unsigned);
 
-void __bch2_trans_begin(struct btree_trans *);
+void bch2_trans_begin(struct btree_trans *);
 
 static inline void bch2_trans_begin_updates(struct btree_trans *trans)
 {
@@ -305,27 +301,7 @@ static inline void bch2_trans_begin_updates(struct btree_trans *trans)
 }
 
 void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-void bch2_trans_init(struct btree_trans *, struct bch_fs *);
+void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
 int bch2_trans_exit(struct btree_trans *);
 
-#ifdef TRACE_TRANSACTION_RESTARTS
-#define bch2_trans_begin(_trans)                                       \
-do {                                                                   \
-       if (is_power_of_2((_trans)->nr_restarts) &&                     \
-           (_trans)->nr_restarts >= 8)                                 \
-               pr_info("nr restarts: %zu", (_trans)->nr_restarts);     \
-                                                                       \
-       (_trans)->nr_restarts++;                                        \
-       __bch2_trans_begin(_trans);                                     \
-} while (0)
-#else
-#define bch2_trans_begin(_trans)       __bch2_trans_begin(_trans)
-#endif
-
-#ifdef TRACE_TRANSACTION_RESTARTS_ALL
-#define trans_restart(...) pr_info("transaction restart" __VA_ARGS__)
-#else
-#define trans_restart(...) no_printk("transaction restart" __VA_ARGS__)
-#endif
-
 #endif /* _BCACHEFS_BTREE_ITER_H */
index e9686197a90817aa4473f90f371a2e2aafeb2ac7..35289b0c3586c6826f08871f69a61a301c282553 100644 (file)
@@ -107,7 +107,7 @@ static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
 
 static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
 {
-       BUG_ON(!level && iter->flags & BTREE_ITER_NOUNLOCK);
+       EBUG_ON(!level && iter->trans->nounlock);
 
        __btree_node_unlock(iter, level);
 }
@@ -175,20 +175,18 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter,
 }
 
 bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
-                           struct btree_iter *, enum six_lock_type, bool);
+                           struct btree_iter *, enum six_lock_type);
 
 static inline bool btree_node_lock(struct btree *b, struct bpos pos,
                                   unsigned level,
                                   struct btree_iter *iter,
-                                  enum six_lock_type type,
-                                  bool may_drop_locks)
+                                  enum six_lock_type type)
 {
        EBUG_ON(level >= BTREE_MAX_DEPTH);
 
        return likely(six_trylock_type(&b->lock, type)) ||
                btree_node_lock_increment(iter, b, level, type) ||
-               __bch2_btree_node_lock(b, pos, level, iter,
-                                      type, may_drop_locks);
+               __bch2_btree_node_lock(b, pos, level, iter, type);
 }
 
 bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
index 57ef50142ee11395f27d30ba4043049a64c1bdd4..f2641d564e49faa4558b3c53ef986d44d3cb0d5e 100644 (file)
@@ -193,7 +193,6 @@ enum btree_iter_type {
  */
 #define BTREE_ITER_IS_EXTENTS          (1 << 4)
 #define BTREE_ITER_ERROR               (1 << 5)
-#define BTREE_ITER_NOUNLOCK            (1 << 6)
 
 enum btree_iter_uptodate {
        BTREE_ITER_UPTODATE             = 0,
@@ -269,7 +268,6 @@ struct btree_insert_entry {
 struct btree_trans {
        struct bch_fs           *c;
        unsigned long           ip;
-       size_t                  nr_restarts;
        u64                     commit_start;
 
        u64                     iters_linked;
@@ -283,6 +281,7 @@ struct btree_trans {
        u8                      size;
        unsigned                used_mempool:1;
        unsigned                error:1;
+       unsigned                nounlock:1;
 
        unsigned                mem_top;
        unsigned                mem_bytes;
@@ -297,11 +296,12 @@ struct btree_trans {
        u64                     *journal_seq;
        struct disk_reservation *disk_res;
        unsigned                flags;
+       unsigned                journal_u64s;
 
        struct btree_iter       iters_onstack[2];
        struct btree_insert_entry updates_onstack[6];
 
-       struct replicas_delta_list fs_usage_deltas;
+       struct replicas_delta_list *fs_usage_deltas;
 };
 
 #define BTREE_FLAG(flag)                                               \
index be11efdcbe041c76d452a6ad6d8d7d62cca35b92..32e30f75ed22e38e9e9fe04f2867bc9b4aa171f3 100644 (file)
@@ -47,6 +47,7 @@ enum {
        __BTREE_INSERT_NOMARK,
        __BTREE_INSERT_MARK_INMEM,
        __BTREE_INSERT_NO_CLEAR_REPLICAS,
+       __BTREE_INSERT_BUCKET_INVALIDATE,
        __BTREE_INSERT_NOWAIT,
        __BTREE_INSERT_GC_LOCK_HELD,
        __BCH_HASH_SET_MUST_CREATE,
@@ -93,6 +94,8 @@ enum {
 
 #define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS)
 
+#define BTREE_INSERT_BUCKET_INVALIDATE (1 << __BTREE_INSERT_BUCKET_INVALIDATE)
+
 /* Don't block on allocation failure (for new btree nodes: */
 #define BTREE_INSERT_NOWAIT            (1 << __BTREE_INSERT_NOWAIT)
 #define BTREE_INSERT_GC_LOCK_HELD      (1 << __BTREE_INSERT_GC_LOCK_HELD)
@@ -105,6 +108,8 @@ int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
 int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
                     struct disk_reservation *, u64 *, int flags);
 
+int bch2_btree_delete_at_range(struct btree_trans *, struct btree_iter *,
+                              struct bpos, u64 *);
 int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
                            struct bpos, struct bpos, u64 *);
 
@@ -125,7 +130,7 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
        struct btree_trans trans;                                       \
        int _ret;                                                       \
                                                                        \
-       bch2_trans_init(&trans, (_c));                                  \
+       bch2_trans_init(&trans, (_c), 0, 0);                            \
                                                                        \
        do {                                                            \
                bch2_trans_begin(&trans);                               \
index fb6bf79a1a8ab9eeed09d5dda162c8603b70b7d8..c6920b63fa858ed3b319c17d4550072b5969c0f7 100644 (file)
@@ -193,7 +193,9 @@ found:
                       : gc_pos_btree_root(as->btree_id)) >= 0 &&
            gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
                bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key),
-                             false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
+                             0, NULL, 0,
+                             BCH_BUCKET_MARK_OVERWRITE|
+                             BCH_BUCKET_MARK_GC);
 }
 
 static void __btree_node_free(struct bch_fs *c, struct btree *b)
@@ -263,13 +265,13 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
 {
        BUG_ON(!pending->index_update_done);
 
-       bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
-                     false, 0,
-                     NULL, 0, 0);
+       bch2_mark_key(c, bkey_i_to_s_c(&pending->key), 0, NULL, 0,
+                     BCH_BUCKET_MARK_OVERWRITE);
 
        if (gc_visited(c, gc_phase(GC_PHASE_PENDING_DELETE)))
-               bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
-                             false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
+               bch2_mark_key(c, bkey_i_to_s_c(&pending->key), 0, NULL, 0,
+                             BCH_BUCKET_MARK_OVERWRITE|
+                             BCH_BUCKET_MARK_GC);
 }
 
 static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
@@ -1074,10 +1076,12 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
        fs_usage = bch2_fs_usage_scratch_get(c);
 
        bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
-                     true, 0, fs_usage, 0, 0);
+                     0, fs_usage, 0,
+                     BCH_BUCKET_MARK_INSERT);
        if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
                bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
-                                    true, 0, NULL, 0,
+                                    0, NULL, 0,
+                                    BCH_BUCKET_MARK_INSERT|
                                     BCH_BUCKET_MARK_GC);
 
        if (old && !btree_node_fake(old))
@@ -1170,11 +1174,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
        fs_usage = bch2_fs_usage_scratch_get(c);
 
        bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
-                            true, 0, fs_usage, 0, 0);
+                            0, fs_usage, 0,
+                            BCH_BUCKET_MARK_INSERT);
 
        if (gc_visited(c, gc_pos_btree_node(b)))
                bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
-                                    true, 0, NULL, 0, BCH_BUCKET_MARK_GC);
+                                    0, NULL, 0,
+                                    BCH_BUCKET_MARK_INSERT|
+                                    BCH_BUCKET_MARK_GC);
 
        while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
               bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@@ -1550,6 +1557,7 @@ split:
 int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
                          unsigned flags)
 {
+       struct btree_trans *trans = iter->trans;
        struct btree *b = iter->l[0].b;
        struct btree_update *as;
        struct closure cl;
@@ -1560,7 +1568,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
         * We already have a disk reservation and open buckets pinned; this
         * allocation must not block:
         */
-       trans_for_each_iter(iter->trans, linked)
+       trans_for_each_iter(trans, linked)
                if (linked->btree_id == BTREE_ID_EXTENTS)
                        flags |= BTREE_INSERT_USE_RESERVE;
 
@@ -1572,10 +1580,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
                if (flags & BTREE_INSERT_NOUNLOCK)
                        return -EINTR;
 
-               bch2_btree_trans_unlock(iter->trans);
+               bch2_trans_unlock(trans);
                down_read(&c->gc_lock);
 
-               if (!bch2_btree_trans_relock(iter->trans))
+               if (!bch2_trans_relock(trans))
                        ret = -EINTR;
        }
 
@@ -1583,8 +1591,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
         * XXX: figure out how far we might need to split,
         * instead of locking/reserving all the way to the root:
         */
-       if (!bch2_btree_iter_upgrade(iter, U8_MAX,
-                       !(flags & BTREE_INSERT_NOUNLOCK))) {
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+               trace_trans_restart_iter_upgrade(trans->ip);
                ret = -EINTR;
                goto out;
        }
@@ -1596,7 +1604,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
                ret = PTR_ERR(as);
                if (ret == -EAGAIN) {
                        BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
-                       bch2_btree_iter_unlock(iter);
+                       bch2_trans_unlock(trans);
                        ret = -EINTR;
                }
                goto out;
@@ -1623,6 +1631,7 @@ void __bch2_foreground_maybe_merge(struct bch_fs *c,
                                   unsigned flags,
                                   enum btree_node_sibling sib)
 {
+       struct btree_trans *trans = iter->trans;
        struct btree_update *as;
        struct bkey_format_state new_s;
        struct bkey_format new_f;
@@ -1646,8 +1655,7 @@ retry:
                goto out;
 
        /* XXX: can't be holding read locks */
-       m = bch2_btree_node_get_sibling(c, iter, b,
-                       !(flags & BTREE_INSERT_NOUNLOCK), sib);
+       m = bch2_btree_node_get_sibling(c, iter, b, sib);
        if (IS_ERR(m)) {
                ret = PTR_ERR(m);
                goto err;
@@ -1694,8 +1702,7 @@ retry:
            !down_read_trylock(&c->gc_lock))
                goto err_cycle_gc_lock;
 
-       if (!bch2_btree_iter_upgrade(iter, U8_MAX,
-                       !(flags & BTREE_INSERT_NOUNLOCK))) {
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
                ret = -EINTR;
                goto err_unlock;
        }
@@ -1757,7 +1764,7 @@ retry:
        if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
                up_read(&c->gc_lock);
 out:
-       bch2_btree_trans_verify_locks(iter->trans);
+       bch2_btree_trans_verify_locks(trans);
 
        /*
         * Don't downgrade locks here: we're called after successful insert,
@@ -1777,7 +1784,7 @@ err_cycle_gc_lock:
        if (flags & BTREE_INSERT_NOUNLOCK)
                goto out;
 
-       bch2_btree_iter_unlock(iter);
+       bch2_trans_unlock(trans);
 
        down_read(&c->gc_lock);
        up_read(&c->gc_lock);
@@ -1793,7 +1800,7 @@ err:
 
        if ((ret == -EAGAIN || ret == -EINTR) &&
            !(flags & BTREE_INSERT_NOUNLOCK)) {
-               bch2_btree_iter_unlock(iter);
+               bch2_trans_unlock(trans);
                closure_sync(&cl);
                ret = bch2_btree_iter_traverse(iter);
                if (ret)
@@ -1860,6 +1867,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
 int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
                            __le64 seq, unsigned flags)
 {
+       struct btree_trans *trans = iter->trans;
        struct closure cl;
        struct btree *b;
        int ret;
@@ -1868,11 +1876,11 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
 
        closure_init_stack(&cl);
 
-       bch2_btree_iter_upgrade(iter, U8_MAX, true);
+       bch2_btree_iter_upgrade(iter, U8_MAX);
 
        if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
                if (!down_read_trylock(&c->gc_lock)) {
-                       bch2_btree_iter_unlock(iter);
+                       bch2_trans_unlock(trans);
                        down_read(&c->gc_lock);
                }
        }
@@ -1891,7 +1899,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
                    ret != -EINTR)
                        break;
 
-               bch2_btree_iter_unlock(iter);
+               bch2_trans_unlock(trans);
                closure_sync(&cl);
        }
 
@@ -1994,10 +2002,12 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                fs_usage = bch2_fs_usage_scratch_get(c);
 
                bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
-                             true, 0, fs_usage, 0, 0);
+                             0, fs_usage, 0,
+                             BCH_BUCKET_MARK_INSERT);
                if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
                        bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
-                                            true, 0, NULL, 0,
+                                            0, NULL, 0,
+                                            BCH_BUCKET_MARK_INSERT||
                                             BCH_BUCKET_MARK_GC);
 
                bch2_btree_node_free_index(as, NULL,
@@ -2040,14 +2050,14 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
 
        closure_init_stack(&cl);
 
-       if (!bch2_btree_iter_upgrade(iter, U8_MAX, true))
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX))
                return -EINTR;
 
        if (!down_read_trylock(&c->gc_lock)) {
-               bch2_btree_trans_unlock(iter->trans);
+               bch2_trans_unlock(iter->trans);
                down_read(&c->gc_lock);
 
-               if (!bch2_btree_trans_relock(iter->trans)) {
+               if (!bch2_trans_relock(iter->trans)) {
                        ret = -EINTR;
                        goto err;
                }
@@ -2058,12 +2068,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                /* bch2_btree_reserve_get will unlock */
                ret = bch2_btree_cache_cannibalize_lock(c, &cl);
                if (ret) {
-                       bch2_btree_trans_unlock(iter->trans);
+                       bch2_trans_unlock(iter->trans);
                        up_read(&c->gc_lock);
                        closure_sync(&cl);
                        down_read(&c->gc_lock);
 
-                       if (!bch2_btree_trans_relock(iter->trans)) {
+                       if (!bch2_trans_relock(iter->trans)) {
                                ret = -EINTR;
                                goto err;
                        }
@@ -2087,12 +2097,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                if (ret != -EINTR)
                        goto err;
 
-               bch2_btree_trans_unlock(iter->trans);
+               bch2_trans_unlock(iter->trans);
                up_read(&c->gc_lock);
                closure_sync(&cl);
                down_read(&c->gc_lock);
 
-               if (!bch2_btree_trans_relock(iter->trans))
+               if (!bch2_trans_relock(iter->trans))
                        goto err;
        }
 
index dde1fc1f97a2f9cecf2438671e621040a5443198..250aae47ad4d85afe6137156616eb4eca8f2fe1a 100644 (file)
@@ -430,16 +430,15 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans)
        if (ret != -EAGAIN)
                return ret;
 
-       bch2_btree_trans_unlock(trans);
+       bch2_trans_unlock(trans);
 
        ret = bch2_journal_preres_get(&c->journal,
                        &trans->journal_preres, u64s, 0);
        if (ret)
                return ret;
 
-       if (!bch2_btree_trans_relock(trans)) {
-               trans_restart(" (iter relock after journal preres get blocked)");
-               trace_trans_restart_journal_preres_get(c, trans->ip);
+       if (!bch2_trans_relock(trans)) {
+               trace_trans_restart_journal_preres_get(trans->ip);
                return -EINTR;
        }
 
@@ -450,21 +449,13 @@ static int bch2_trans_journal_res_get(struct btree_trans *trans,
                                      unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct btree_insert_entry *i;
-       unsigned u64s = 0;
        int ret;
 
-       if (unlikely(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
-               return 0;
-
        if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
                flags |= JOURNAL_RES_GET_RESERVED;
 
-       trans_for_each_update(trans, i)
-               u64s += jset_u64s(i->k->k.u64s);
-
        ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
-                                  u64s, flags);
+                                  trans->journal_u64s, flags);
 
        return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
 }
@@ -550,33 +541,29 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct bch_fs_usage *fs_usage = NULL;
        struct btree_insert_entry *i;
-       struct btree_iter *linked;
+       unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
+               ? BCH_BUCKET_MARK_BUCKET_INVALIDATE
+               : 0;
        int ret;
 
-       if (likely(!(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS))) {
-               memset(&trans->fs_usage_deltas.fs_usage, 0,
-                      sizeof(trans->fs_usage_deltas.fs_usage));
-               trans->fs_usage_deltas.top = trans->fs_usage_deltas.d;
-       }
-
        trans_for_each_update_iter(trans, i)
                BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
 
        trans_for_each_update_iter(trans, i)
                if (update_has_triggers(trans, i) &&
                    update_triggers_transactional(trans, i)) {
-                       ret = bch2_trans_mark_update(trans, i,
-                                               &trans->fs_usage_deltas);
+                       ret = bch2_trans_mark_update(trans, i);
+                       if (ret == -EINTR)
+                               trace_trans_restart_mark(trans->ip);
                        if (ret)
-                               return ret;
+                               goto out_clear_replicas;
                }
 
        btree_trans_lock_write(c, trans);
 
        if (race_fault()) {
                ret = -EINTR;
-               trans_restart(" (race)");
-               trace_trans_restart_fault_inject(c, trans->ip);
+               trace_trans_restart_fault_inject(trans->ip);
                goto out;
        }
 
@@ -610,9 +597,16 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
         * Don't get journal reservation until after we know insert will
         * succeed:
         */
-       ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
-       if (ret)
-               goto out;
+       if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
+               trans->journal_u64s = 0;
+
+               trans_for_each_update(trans, i)
+                       trans->journal_u64s += jset_u64s(i->k->k.u64s);
+
+               ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
+               if (ret)
+                       goto out;
+       }
 
        if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
                if (journal_seq_verify(c))
@@ -623,33 +617,24 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                                i->k->k.version = MAX_VERSION;
        }
 
-       if (trans->flags & BTREE_INSERT_NOUNLOCK) {
-               /*
-                * linked iterators that weren't being updated may or may not
-                * have been traversed/locked, depending on what the caller was
-                * doing:
-                */
-               trans_for_each_iter(trans, linked)
-                       if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
-                               linked->flags |= BTREE_ITER_NOUNLOCK;
-       }
-
        trans_for_each_update_iter(trans, i)
                if (update_has_triggers(trans, i) &&
                    !update_triggers_transactional(trans, i))
-                       bch2_mark_update(trans, i, fs_usage, 0);
+                       bch2_mark_update(trans, i, fs_usage, mark_flags);
 
-       if (fs_usage) {
+       if (fs_usage && trans->fs_usage_deltas)
                bch2_replicas_delta_list_apply(c, fs_usage,
-                                              &trans->fs_usage_deltas);
+                                              trans->fs_usage_deltas);
+
+       if (fs_usage)
                bch2_trans_fs_usage_apply(trans, fs_usage);
-       }
 
        if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
            unlikely(c->gc_pos.phase))
                trans_for_each_update_iter(trans, i)
                        if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
                                bch2_mark_update(trans, i, NULL,
+                                                mark_flags|
                                                 BCH_BUCKET_MARK_GC);
 
        trans_for_each_update(trans, i)
@@ -667,6 +652,12 @@ out:
        }
 
        bch2_journal_res_put(&c->journal, &trans->journal_res);
+out_clear_replicas:
+       if (trans->fs_usage_deltas) {
+               memset(&trans->fs_usage_deltas->fs_usage, 0,
+                      sizeof(trans->fs_usage_deltas->fs_usage));
+               trans->fs_usage_deltas->used = 0;
+       }
 
        return ret;
 }
@@ -725,9 +716,10 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                 * don't care if we got ENOSPC because we told split it
                 * couldn't block:
                 */
-               if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
-                       trans_restart(" (split)");
-                       trace_trans_restart_btree_node_split(c, trans->ip);
+               if (!ret ||
+                   ret == -EINTR ||
+                   (flags & BTREE_INSERT_NOUNLOCK)) {
+                       trace_trans_restart_btree_node_split(trans->ip);
                        ret = -EINTR;
                }
                break;
@@ -743,25 +735,23 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                                return ret;
                }
 
-               if (bch2_btree_trans_relock(trans))
+               if (bch2_trans_relock(trans))
                        return 0;
 
-               trans_restart(" (iter relock after marking replicas)");
-               trace_trans_restart_mark_replicas(c, trans->ip);
+               trace_trans_restart_mark_replicas(trans->ip);
                ret = -EINTR;
                break;
        case BTREE_INSERT_NEED_JOURNAL_RES:
-               bch2_btree_trans_unlock(trans);
+               bch2_trans_unlock(trans);
 
                ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
                if (ret)
                        return ret;
 
-               if (bch2_btree_trans_relock(trans))
+               if (bch2_trans_relock(trans))
                        return 0;
 
-               trans_restart(" (iter relock after journal res get blocked)");
-               trace_trans_restart_journal_res_get(c, trans->ip);
+               trace_trans_restart_journal_res_get(trans->ip);
                ret = -EINTR;
                break;
        default:
@@ -773,8 +763,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                int ret2 = bch2_btree_iter_traverse_all(trans);
 
                if (ret2) {
-                       trans_restart(" (traverse)");
-                       trace_trans_restart_traverse(c, trans->ip);
+                       trace_trans_restart_traverse(trans->ip);
                        return ret2;
                }
 
@@ -785,8 +774,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                if (!(flags & BTREE_INSERT_ATOMIC))
                        return 0;
 
-               trans_restart(" (atomic)");
-               trace_trans_restart_atomic(c, trans->ip);
+               trace_trans_restart_atomic(trans->ip);
        }
 
        return ret;
@@ -808,16 +796,11 @@ static int __bch2_trans_commit(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
-       struct btree_iter *linked;
        int ret;
 
        trans_for_each_update_iter(trans, i) {
-               unsigned old_locks_want = i->iter->locks_want;
-               unsigned old_uptodate = i->iter->uptodate;
-
-               if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
-                       trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
-                                     old_locks_want, old_uptodate);
+               if (!bch2_btree_iter_upgrade(i->iter, 1)) {
+                       trace_trans_restart_upgrade(trans->ip);
                        ret = -EINTR;
                        goto err;
                }
@@ -831,18 +814,20 @@ static int __bch2_trans_commit(struct btree_trans *trans,
        if (unlikely(ret))
                goto err;
 
+       if (trans->flags & BTREE_INSERT_NOUNLOCK)
+               trans->nounlock = true;
+
        trans_for_each_update_leaf(trans, i)
                bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
 
+       trans->nounlock = false;
+
        trans_for_each_update_iter(trans, i)
                bch2_btree_iter_downgrade(i->iter);
 err:
        /* make sure we didn't drop or screw up locks: */
        bch2_btree_trans_verify_locks(trans);
 
-       trans_for_each_iter(trans, linked)
-               linked->flags &= ~BTREE_ITER_NOUNLOCK;
-
        return ret;
 }
 
@@ -883,7 +868,7 @@ int bch2_trans_commit(struct btree_trans *trans,
                if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
                        return -EROFS;
 
-               bch2_btree_trans_unlock(trans);
+               bch2_trans_unlock(trans);
 
                ret = bch2_fs_read_write_early(c);
                if (ret)
@@ -891,7 +876,7 @@ int bch2_trans_commit(struct btree_trans *trans,
 
                percpu_ref_get(&c->writes);
 
-               if (!bch2_btree_trans_relock(trans)) {
+               if (!bch2_trans_relock(trans)) {
                        ret = -EINTR;
                        goto err;
                }
@@ -965,20 +950,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
        return i;
 }
 
-int bch2_btree_delete_at(struct btree_trans *trans,
-                        struct btree_iter *iter, unsigned flags)
-{
-       struct bkey_i k;
-
-       bkey_init(&k.k);
-       k.k.p = iter->pos;
-
-       bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &k));
-       return bch2_trans_commit(trans, NULL, NULL,
-                                BTREE_INSERT_NOFAIL|
-                                BTREE_INSERT_USE_RESERVE|flags);
-}
-
 /**
  * bch2_btree_insert - insert keys into the extent btree
  * @c:                 pointer to struct bch_fs
@@ -995,7 +966,9 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
        struct btree_iter *iter;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
 
        iter = bch2_trans_get_iter(&trans, id, bkey_start_pos(&k->k),
                                   BTREE_ITER_INTENT);
@@ -1003,35 +976,24 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
        bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k));
 
        ret = bch2_trans_commit(&trans, disk_res, journal_seq, flags);
+       if (ret == -EINTR)
+               goto retry;
        bch2_trans_exit(&trans);
 
        return ret;
 }
 
-/*
- * bch_btree_delete_range - delete everything within a given range
- *
- * Range is a half open interval - [start, end)
- */
-int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
-                           struct bpos start, struct bpos end,
-                           u64 *journal_seq)
+int bch2_btree_delete_at_range(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              struct bpos end,
+                              u64 *journal_seq)
 {
-       struct btree_trans trans;
-       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret = 0;
-
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
-
-       iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
-
+retry:
        while ((k = bch2_btree_iter_peek(iter)).k &&
               !(ret = bkey_err(k)) &&
               bkey_cmp(iter->pos, end) < 0) {
-               unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
-               /* really shouldn't be using a bare, unpadded bkey_i */
                struct bkey_i delete;
 
                bkey_init(&delete.k);
@@ -1049,26 +1011,72 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
                delete.k.p = iter->pos;
 
                if (iter->flags & BTREE_ITER_IS_EXTENTS) {
+                       unsigned max_sectors =
+                               KEY_SIZE_MAX & (~0 << trans->c->block_bits);
+
                        /* create the biggest key we can */
                        bch2_key_resize(&delete.k, max_sectors);
                        bch2_cut_back(end, &delete.k);
                        bch2_extent_trim_atomic(&delete, iter);
                }
 
-               bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &delete));
-
-               ret = bch2_trans_commit(&trans, NULL, journal_seq,
+               bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &delete));
+               ret = bch2_trans_commit(trans, NULL, journal_seq,
                                        BTREE_INSERT_ATOMIC|
                                        BTREE_INSERT_NOFAIL);
-               if (ret == -EINTR)
-                       ret = 0;
                if (ret)
                        break;
 
-               bch2_trans_cond_resched(&trans);
+               bch2_trans_cond_resched(trans);
        }
 
-       bch2_trans_exit(&trans);
+       if (ret == -EINTR) {
+               ret = 0;
+               goto retry;
+       }
+
+       return ret;
+
+}
+
+int bch2_btree_delete_at(struct btree_trans *trans,
+                        struct btree_iter *iter, unsigned flags)
+{
+       struct bkey_i k;
+
+       bkey_init(&k.k);
+       k.k.p = iter->pos;
+
+       bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &k));
+       return bch2_trans_commit(trans, NULL, NULL,
+                                BTREE_INSERT_NOFAIL|
+                                BTREE_INSERT_USE_RESERVE|flags);
+}
+
+/*
+ * bch_btree_delete_range - delete everything within a given range
+ *
+ * Range is a half open interval - [start, end)
+ */
+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
+                           struct bpos start, struct bpos end,
+                           u64 *journal_seq)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       int ret = 0;
+
+       /*
+        * XXX: whether we need mem/more iters depends on whether this btree id
+        * has triggers
+        */
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
+
+       iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
+
+       ret = bch2_btree_delete_at_range(&trans, iter, end, journal_seq);
+       ret = bch2_trans_exit(&trans) ?: ret;
+
        BUG_ON(ret == -EINTR);
        return ret;
 }
index 9f09e5be09378958332d10f4b2a6bc5ccc5dd193..3cfe684a604f35b596300c041ba95f49bf5a0210 100644 (file)
@@ -495,9 +495,11 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c)
 
                buckets = bucket_array(ca);
 
+               preempt_disable();
                for_each_bucket(g, buckets)
                        bch2_dev_usage_update(c, ca, c->usage_base,
                                              old, g->mark, false);
+               preempt_enable();
        }
 }
 
@@ -544,6 +546,67 @@ static inline void update_cached_sectors(struct bch_fs *c,
        update_replicas(c, fs_usage, &r.e, sectors);
 }
 
+static struct replicas_delta_list *
+replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
+{
+       struct replicas_delta_list *d = trans->fs_usage_deltas;
+       unsigned new_size = d ? (d->size + more) * 2 : 128;
+
+       if (!d || d->used + more > d->size) {
+               d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
+               BUG_ON(!d);
+
+               d->size = new_size;
+               trans->fs_usage_deltas = d;
+       }
+       return d;
+}
+
+static inline void update_replicas_list(struct btree_trans *trans,
+                                       struct bch_replicas_entry *r,
+                                       s64 sectors)
+{
+       struct replicas_delta_list *d;
+       struct replicas_delta *n;
+       unsigned b = replicas_entry_bytes(r) + 8;
+
+       d = replicas_deltas_realloc(trans, b);
+
+       n = (void *) d->d + d->used;
+       n->delta = sectors;
+       memcpy(&n->r, r, replicas_entry_bytes(r));
+       d->used += b;
+}
+
+static inline void update_cached_sectors_list(struct btree_trans *trans,
+                                             unsigned dev, s64 sectors)
+{
+       struct bch_replicas_padded r;
+
+       bch2_replicas_entry_cached(&r.e, dev);
+
+       update_replicas_list(trans, &r.e, sectors);
+}
+
+void bch2_replicas_delta_list_apply(struct bch_fs *c,
+                                   struct bch_fs_usage *fs_usage,
+                                   struct replicas_delta_list *r)
+{
+       struct replicas_delta *d = r->d;
+       struct replicas_delta *top = (void *) r->d + r->used;
+
+       acc_u64s((u64 *) fs_usage,
+                (u64 *) &r->fs_usage, sizeof(*fs_usage) / sizeof(u64));
+
+       while (d != top) {
+               BUG_ON((void *) d > (void *) top);
+
+               update_replicas(c, fs_usage, &d->r, d->delta);
+
+               d = (void *) d + replicas_entry_bytes(&d->r) + 8;
+       }
+}
+
 #define do_mark_fn(fn, c, pos, flags, ...)                             \
 ({                                                                     \
        int gc, ret = 0;                                                \
@@ -623,23 +686,20 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
 }
 
 static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
-                          bool inserting,
                           struct bch_fs_usage *fs_usage,
-                          unsigned journal_seq, unsigned flags,
-                          bool gc)
+                          u64 journal_seq, unsigned flags)
 {
+       bool gc = flags & BCH_BUCKET_MARK_GC;
        struct bkey_alloc_unpacked u;
        struct bch_dev *ca;
        struct bucket *g;
        struct bucket_mark old, m;
 
-       if (!inserting)
-               return 0;
-
        /*
         * alloc btree is read in by bch2_alloc_read, not gc:
         */
-       if (flags & BCH_BUCKET_MARK_GC)
+       if ((flags & BCH_BUCKET_MARK_GC) &&
+           !(flags & BCH_BUCKET_MARK_BUCKET_INVALIDATE))
                return 0;
 
        ca = bch_dev_bkey_exists(c, k.k->p.inode);
@@ -650,18 +710,21 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
        g = __bucket(ca, k.k->p.offset, gc);
        u = bch2_alloc_unpack(k);
 
-       old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
+       old = bucket_cmpxchg(g, m, ({
                m.gen                   = u.gen;
                m.data_type             = u.data_type;
                m.dirty_sectors         = u.dirty_sectors;
                m.cached_sectors        = u.cached_sectors;
 
-               if (!(flags & BCH_BUCKET_MARK_GC)) {
+               if (journal_seq) {
                        m.journal_seq_valid     = 1;
                        m.journal_seq           = journal_seq;
                }
        }));
 
+       if (!(flags & BCH_BUCKET_MARK_ALLOC_READ))
+               bch2_dev_usage_update(c, ca, fs_usage, old, m, gc);
+
        g->io_time[READ]        = u.read_time;
        g->io_time[WRITE]       = u.write_time;
        g->oldest_gen           = u.oldest_gen;
@@ -672,7 +735,8 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
         * not:
         */
 
-       if (old.cached_sectors) {
+       if ((flags & BCH_BUCKET_MARK_BUCKET_INVALIDATE) &&
+           old.cached_sectors) {
                update_cached_sectors(c, fs_usage, ca->dev_idx,
                                      -old.cached_sectors);
                trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
@@ -759,11 +823,12 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
 
 static void bucket_set_stripe(struct bch_fs *c,
                              const struct bch_stripe *v,
-                             bool enabled,
                              struct bch_fs_usage *fs_usage,
                              u64 journal_seq,
-                             bool gc)
+                             unsigned flags)
 {
+       bool enabled = !(flags & BCH_BUCKET_MARK_OVERWRITE);
+       bool gc = flags & BCH_BUCKET_MARK_GC;
        unsigned i;
 
        for (i = 0; i < v->nr_blocks; i++) {
@@ -789,9 +854,9 @@ static bool bch2_mark_pointer(struct bch_fs *c,
                              struct extent_ptr_decoded p,
                              s64 sectors, enum bch_data_type data_type,
                              struct bch_fs_usage *fs_usage,
-                             unsigned journal_seq, unsigned flags,
-                             bool gc)
+                             u64 journal_seq, unsigned flags)
 {
+       bool gc = flags & BCH_BUCKET_MARK_GC;
        struct bucket_mark old, new;
        struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
        struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
@@ -858,9 +923,9 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
                                struct bch_extent_stripe_ptr p,
                                enum bch_data_type data_type,
                                struct bch_fs_usage *fs_usage,
-                               s64 sectors, unsigned flags,
-                               bool gc)
+                               s64 sectors, unsigned flags)
 {
+       bool gc = flags & BCH_BUCKET_MARK_GC;
        struct stripe *m;
        unsigned old, new, nr_data;
        int blocks_nonempty_delta;
@@ -913,8 +978,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
 static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
                            s64 sectors, enum bch_data_type data_type,
                            struct bch_fs_usage *fs_usage,
-                           unsigned journal_seq, unsigned flags,
-                           bool gc)
+                           unsigned journal_seq, unsigned flags)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
@@ -935,7 +999,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
                        ? sectors
                        : ptr_disk_sectors_delta(p, sectors);
                bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
-                                       fs_usage, journal_seq, flags, gc);
+                                       fs_usage, journal_seq, flags);
 
                if (p.ptr.cached) {
                        if (disk_sectors && !stale)
@@ -948,7 +1012,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
                        for (i = 0; i < p.ec_nr; i++) {
                                ret = bch2_mark_stripe_ptr(c, p.ec[i],
                                                data_type, fs_usage,
-                                               disk_sectors, flags, gc);
+                                               disk_sectors, flags);
                                if (ret)
                                        return ret;
                        }
@@ -964,11 +1028,10 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
 }
 
 static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
-                           bool inserting,
                            struct bch_fs_usage *fs_usage,
-                           u64 journal_seq, unsigned flags,
-                           bool gc)
+                           u64 journal_seq, unsigned flags)
 {
+       bool gc = flags & BCH_BUCKET_MARK_GC;
        struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
        size_t idx = s.k->p.offset;
        struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
@@ -976,19 +1039,14 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
 
        spin_lock(&c->ec_stripes_heap_lock);
 
-       if (!m || (!inserting && !m->alive)) {
+       if (!m || ((flags & BCH_BUCKET_MARK_OVERWRITE) && !m->alive)) {
                spin_unlock(&c->ec_stripes_heap_lock);
                bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
                                    idx);
                return -1;
        }
 
-       if (!gc && m->alive)
-               bch2_stripes_heap_del(c, m, idx);
-
-       memset(m, 0, sizeof(*m));
-
-       if (inserting) {
+       if (!(flags & BCH_BUCKET_MARK_OVERWRITE)) {
                m->sectors      = le16_to_cpu(s.v->sectors);
                m->algorithm    = s.v->algorithm;
                m->nr_blocks    = s.v->nr_blocks;
@@ -996,11 +1054,11 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
 
                bch2_bkey_to_replicas(&m->r.e, k);
 
-       /*
-        * XXX: account for stripes somehow here
-        */
+               /*
+                * XXX: account for stripes somehow here
+                */
 #if 0
-       update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
+               update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
 #endif
 
                /* gc recalculates these fields: */
@@ -1013,53 +1071,54 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
                }
 
                if (!gc)
-                       bch2_stripes_heap_insert(c, m, idx);
-               else
-                       m->alive = true;
+                       bch2_stripes_heap_update(c, m, idx);
+               m->alive        = true;
+       } else {
+               if (!gc)
+                       bch2_stripes_heap_del(c, m, idx);
+               memset(m, 0, sizeof(*m));
        }
 
        spin_unlock(&c->ec_stripes_heap_lock);
 
-       bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
+       bucket_set_stripe(c, s.v, fs_usage, 0, flags);
        return 0;
 }
 
 int bch2_mark_key_locked(struct bch_fs *c,
-                  struct bkey_s_c k,
-                  bool inserting, s64 sectors,
+                  struct bkey_s_c k, s64 sectors,
                   struct bch_fs_usage *fs_usage,
                   u64 journal_seq, unsigned flags)
 {
-       bool gc = flags & BCH_BUCKET_MARK_GC;
        int ret = 0;
 
        preempt_disable();
 
-       if (!fs_usage || gc)
-               fs_usage = fs_usage_ptr(c, journal_seq, gc);
+       if (!fs_usage || (flags & BCH_BUCKET_MARK_GC))
+               fs_usage = fs_usage_ptr(c, journal_seq,
+                                       flags & BCH_BUCKET_MARK_GC);
 
        switch (k.k->type) {
        case KEY_TYPE_alloc:
-               ret = bch2_mark_alloc(c, k, inserting,
-                               fs_usage, journal_seq, flags, gc);
+               ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_btree_ptr:
-               ret = bch2_mark_extent(c, k, inserting
-                               ?  c->opts.btree_node_size
-                               : -c->opts.btree_node_size,
-                               BCH_DATA_BTREE,
-                               fs_usage, journal_seq, flags, gc);
+               sectors = !(flags & BCH_BUCKET_MARK_OVERWRITE)
+                       ?  c->opts.btree_node_size
+                       : -c->opts.btree_node_size;
+
+               ret = bch2_mark_extent(c, k, sectors, BCH_DATA_BTREE,
+                               fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_extent:
                ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
-                               fs_usage, journal_seq, flags, gc);
+                               fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_stripe:
-               ret = bch2_mark_stripe(c, k, inserting,
-                               fs_usage, journal_seq, flags, gc);
+               ret = bch2_mark_stripe(c, k, fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_inode:
-               if (inserting)
+               if (!(flags & BCH_BUCKET_MARK_OVERWRITE))
                        fs_usage->nr_inodes++;
                else
                        fs_usage->nr_inodes--;
@@ -1083,14 +1142,14 @@ int bch2_mark_key_locked(struct bch_fs *c,
 }
 
 int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
-                 bool inserting, s64 sectors,
+                 s64 sectors,
                  struct bch_fs_usage *fs_usage,
                  u64 journal_seq, unsigned flags)
 {
        int ret;
 
        percpu_down_read_preempt_disable(&c->mark_lock);
-       ret = bch2_mark_key_locked(c, k, inserting, sectors,
+       ret = bch2_mark_key_locked(c, k, sectors,
                                   fs_usage, journal_seq, flags);
        percpu_up_read_preempt_enable(&c->mark_lock);
 
@@ -1130,9 +1189,9 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
                        sectors = old.k->p.offset - new->k.p.offset;
                        BUG_ON(sectors <= 0);
 
-                       bch2_mark_key_locked(c, old, true, sectors,
+                       bch2_mark_key_locked(c, old, sectors,
                                fs_usage, trans->journal_res.seq,
-                               flags);
+                               BCH_BUCKET_MARK_INSERT|flags);
 
                        sectors = bkey_start_offset(&new->k) -
                                old.k->p.offset;
@@ -1142,8 +1201,9 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
                BUG_ON(sectors >= 0);
        }
 
-       return bch2_mark_key_locked(c, old, false, sectors, fs_usage,
-                                   trans->journal_res.seq, flags) ?: 1;
+       return bch2_mark_key_locked(c, old, sectors, fs_usage,
+                                   trans->journal_res.seq,
+                                   BCH_BUCKET_MARK_OVERWRITE|flags) ?: 1;
 }
 
 int bch2_mark_update(struct btree_trans *trans,
@@ -1162,10 +1222,11 @@ int bch2_mark_update(struct btree_trans *trans,
                return 0;
 
        if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
-               bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
+               bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
                        bpos_min(insert->k->k.p, b->key.k.p).offset -
                        bkey_start_offset(&insert->k->k),
-                       fs_usage, trans->journal_res.seq, flags);
+                       fs_usage, trans->journal_res.seq,
+                       BCH_BUCKET_MARK_INSERT|flags);
 
        if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
                return 0;
@@ -1246,46 +1307,6 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
 
 /* trans_mark: */
 
-static inline void update_replicas_list(struct replicas_delta_list *d,
-                                       struct bch_replicas_entry *r,
-                                       s64 sectors)
-{
-       d->top->delta = sectors;
-       memcpy(&d->top->r, r, replicas_entry_bytes(r));
-
-       d->top = (void *) d->top + replicas_entry_bytes(r) + 8;
-
-       BUG_ON((void *) d->top > (void *) d->d + sizeof(d->pad));
-}
-
-static inline void update_cached_sectors_list(struct replicas_delta_list *d,
-                                             unsigned dev, s64 sectors)
-{
-       struct bch_replicas_padded r;
-
-       bch2_replicas_entry_cached(&r.e, dev);
-
-       update_replicas_list(d, &r.e, sectors);
-}
-
-void bch2_replicas_delta_list_apply(struct bch_fs *c,
-                                   struct bch_fs_usage *fs_usage,
-                                   struct replicas_delta_list *r)
-{
-       struct replicas_delta *d = r->d;
-
-       acc_u64s((u64 *) fs_usage,
-                (u64 *) &r->fs_usage, sizeof(*fs_usage) / sizeof(u64));
-
-       while (d != r->top) {
-               BUG_ON((void *) d > (void *) r->top);
-
-               update_replicas(c, fs_usage, &d->r, d->delta);
-
-               d = (void *) d + replicas_entry_bytes(&d->r) + 8;
-       }
-}
-
 static int trans_get_key(struct btree_trans *trans,
                         enum btree_id btree_id, struct bpos pos,
                         struct btree_insert_entry **insert,
@@ -1347,8 +1368,7 @@ static int trans_update_key(struct btree_trans *trans,
 
 static int bch2_trans_mark_pointer(struct btree_trans *trans,
                        struct extent_ptr_decoded p,
-                       s64 sectors, enum bch_data_type data_type,
-                       struct replicas_delta_list *d)
+                       s64 sectors, enum bch_data_type data_type)
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
@@ -1409,8 +1429,7 @@ out:
 
 static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                        struct bch_extent_stripe_ptr p,
-                       s64 sectors, enum bch_data_type data_type,
-                       struct replicas_delta_list *d)
+                       s64 sectors, enum bch_data_type data_type)
 {
        struct bch_replicas_padded r;
        struct btree_insert_entry *insert;
@@ -1455,7 +1474,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
 
        bch2_bkey_to_replicas(&r.e, s.s_c);
 
-       update_replicas_list(d, &r.e, sectors);
+       update_replicas_list(trans, &r.e, sectors);
 out:
        bch2_trans_iter_put(trans, iter);
        return ret;
@@ -1463,8 +1482,7 @@ out:
 
 static int bch2_trans_mark_extent(struct btree_trans *trans,
                        struct bkey_s_c k,
-                       s64 sectors, enum bch_data_type data_type,
-                       struct replicas_delta_list *d)
+                       s64 sectors, enum bch_data_type data_type)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
@@ -1487,7 +1505,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
                        : ptr_disk_sectors_delta(p, sectors);
 
                ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
-                                             data_type, d);
+                                             data_type);
                if (ret < 0)
                        return ret;
 
@@ -1495,7 +1513,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
 
                if (p.ptr.cached) {
                        if (disk_sectors && !stale)
-                               update_cached_sectors_list(d, p.ptr.dev,
+                               update_cached_sectors_list(trans, p.ptr.dev,
                                                           disk_sectors);
                } else if (!p.ec_nr) {
                        dirty_sectors          += disk_sectors;
@@ -1503,7 +1521,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
                } else {
                        for (i = 0; i < p.ec_nr; i++) {
                                ret = bch2_trans_mark_stripe_ptr(trans, p.ec[i],
-                                               disk_sectors, data_type, d);
+                                               disk_sectors, data_type);
                                if (ret)
                                        return ret;
                        }
@@ -1513,29 +1531,32 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
        }
 
        if (dirty_sectors)
-               update_replicas_list(d, &r.e, dirty_sectors);
+               update_replicas_list(trans, &r.e, dirty_sectors);
 
        return 0;
 }
 
-int bch2_trans_mark_key(struct btree_trans *trans,
-                       struct bkey_s_c k,
-                       bool inserting, s64 sectors,
-                       struct replicas_delta_list *d)
+int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
+                       s64 sectors, unsigned flags)
 {
+       struct replicas_delta_list *d;
        struct bch_fs *c = trans->c;
 
        switch (k.k->type) {
        case KEY_TYPE_btree_ptr:
-               return bch2_trans_mark_extent(trans, k, inserting
-                               ?  c->opts.btree_node_size
-                               : -c->opts.btree_node_size,
-                               BCH_DATA_BTREE, d);
+               sectors = !(flags & BCH_BUCKET_MARK_OVERWRITE)
+                       ?  c->opts.btree_node_size
+                       : -c->opts.btree_node_size;
+
+               return bch2_trans_mark_extent(trans, k, sectors,
+                                             BCH_DATA_BTREE);
        case KEY_TYPE_extent:
-               return bch2_trans_mark_extent(trans, k,
-                               sectors, BCH_DATA_USER, d);
+               return bch2_trans_mark_extent(trans, k, sectors,
+                                             BCH_DATA_USER);
        case KEY_TYPE_inode:
-               if (inserting)
+               d = replicas_deltas_realloc(trans, 0);
+
+               if (!(flags & BCH_BUCKET_MARK_OVERWRITE))
                        d->fs_usage.nr_inodes++;
                else
                        d->fs_usage.nr_inodes--;
@@ -1543,6 +1564,8 @@ int bch2_trans_mark_key(struct btree_trans *trans,
        case KEY_TYPE_reservation: {
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 
+               d = replicas_deltas_realloc(trans, 0);
+
                sectors *= replicas;
                replicas = clamp_t(unsigned, replicas, 1,
                                   ARRAY_SIZE(d->fs_usage.persistent_reserved));
@@ -1557,8 +1580,7 @@ int bch2_trans_mark_key(struct btree_trans *trans,
 }
 
 int bch2_trans_mark_update(struct btree_trans *trans,
-                          struct btree_insert_entry *insert,
-                          struct replicas_delta_list *d)
+                          struct btree_insert_entry *insert)
 {
        struct btree_iter       *iter = insert->iter;
        struct btree            *b = iter->l[0].b;
@@ -1570,9 +1592,10 @@ int bch2_trans_mark_update(struct btree_trans *trans,
                return 0;
 
        ret = bch2_trans_mark_key(trans,
-                       bkey_i_to_s_c(insert->k), true,
+                       bkey_i_to_s_c(insert->k),
                        bpos_min(insert->k->k.p, b->key.k.p).offset -
-                       bkey_start_offset(&insert->k->k), d);
+                       bkey_start_offset(&insert->k->k),
+                       BCH_BUCKET_MARK_INSERT);
        if (ret)
                return ret;
 
@@ -1606,8 +1629,8 @@ int bch2_trans_mark_update(struct btree_trans *trans,
                                sectors = k.k->p.offset - insert->k->k.p.offset;
                                BUG_ON(sectors <= 0);
 
-                               ret = bch2_trans_mark_key(trans, k, true,
-                                                         sectors, d);
+                               ret = bch2_trans_mark_key(trans, k, sectors,
+                                               BCH_BUCKET_MARK_INSERT);
                                if (ret)
                                        return ret;
 
@@ -1619,7 +1642,8 @@ int bch2_trans_mark_update(struct btree_trans *trans,
                        BUG_ON(sectors >= 0);
                }
 
-               ret = bch2_trans_mark_key(trans, k, false, sectors, d);
+               ret = bch2_trans_mark_key(trans, k, sectors,
+                                         BCH_BUCKET_MARK_OVERWRITE);
                if (ret)
                        return ret;
 
index a32c25d8f2980b6b7e3aeb284efa547a34b58c10..65a934f88781e79b326533adf2695eda8bfb5e33 100644 (file)
@@ -248,15 +248,17 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
                               size_t, enum bch_data_type, unsigned,
                               struct gc_pos, unsigned);
 
-#define BCH_BUCKET_MARK_GC                     (1 << 0)
-#define BCH_BUCKET_MARK_NOATOMIC               (1 << 1)
-
-int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
-                 bool, s64, struct bch_fs_usage *,
-                 u64, unsigned);
-int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
-                 bool, s64, struct bch_fs_usage *,
-                 u64, unsigned);
+#define BCH_BUCKET_MARK_INSERT                 (1 << 0)
+#define BCH_BUCKET_MARK_OVERWRITE              (1 << 1)
+#define BCH_BUCKET_MARK_BUCKET_INVALIDATE      (1 << 2)
+#define BCH_BUCKET_MARK_GC                     (1 << 3)
+#define BCH_BUCKET_MARK_ALLOC_READ             (1 << 4)
+#define BCH_BUCKET_MARK_NOATOMIC               (1 << 5)
+
+int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, s64,
+                        struct bch_fs_usage *, u64, unsigned);
+int bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64,
+                 struct bch_fs_usage *, u64, unsigned);
 int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
                        struct disk_reservation *, unsigned);
 
@@ -269,11 +271,9 @@ int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
 void bch2_replicas_delta_list_apply(struct bch_fs *,
                                    struct bch_fs_usage *,
                                    struct replicas_delta_list *);
-int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
-                       bool, s64, struct replicas_delta_list *);
+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, s64, unsigned);
 int bch2_trans_mark_update(struct btree_trans *,
-                          struct btree_insert_entry *,
-                          struct replicas_delta_list *);
+                          struct btree_insert_entry *);
 void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
 
 /* disk reservations: */
index a333b9ec930277e8c8eaa4a634f16ab6af4909cc..309a5fb61e989031929cab1ee3dd51cc6fbaa855 100644 (file)
@@ -100,11 +100,10 @@ struct replicas_delta {
 } __packed;
 
 struct replicas_delta_list {
+       unsigned                size;
+       unsigned                used;
        struct bch_fs_usage     fs_usage;
-
-       struct replicas_delta   *top;
        struct replicas_delta   d[0];
-       u8                      pad[256];
 };
 
 /*
index b6cf6801e55288c10f1c4f679d691aa37facd13c..a4c1b8ada3e89977532ecad8eba438fb7bfe93ff 100644 (file)
@@ -280,22 +280,8 @@ void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
        do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
 }
 
-static inline bool bch2_checksum_mergeable(unsigned type)
-{
-
-       switch (type) {
-       case BCH_CSUM_NONE:
-       case BCH_CSUM_CRC32C:
-       case BCH_CSUM_CRC64:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static struct bch_csum bch2_checksum_merge(unsigned type,
-                                          struct bch_csum a,
-                                          struct bch_csum b, size_t b_len)
+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
+                                   struct bch_csum b, size_t b_len)
 {
        BUG_ON(!bch2_checksum_mergeable(type));
 
index 580eff6677fdaea8f6cf43536430822779d56eac..2c0fbbb83273b47d7fd4e2355baf9ea844094432 100644 (file)
@@ -8,6 +8,22 @@
 #include <linux/crc64.h>
 #include <crypto/chacha.h>
 
+static inline bool bch2_checksum_mergeable(unsigned type)
+{
+
+       switch (type) {
+       case BCH_CSUM_NONE:
+       case BCH_CSUM_CRC32C:
+       case BCH_CSUM_CRC64:
+               return true;
+       default:
+               return false;
+       }
+}
+
+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
+                                   struct bch_csum, size_t);
+
 static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
 {
        return crc64_be(crc, p, len);
index a22ac8d60bb012f67df4c25d1bbac77d2a748205..47b8dd74dc62fd3b5fbce70ea299253a4cbd3fbc 100644 (file)
@@ -220,7 +220,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
        if (!i->size)
                return i->ret;
 
-       bch2_trans_init(&trans, i->c);
+       bch2_trans_init(&trans, i->c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
        k = bch2_btree_iter_peek(iter);
@@ -274,7 +274,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
        if (!i->size || !bkey_cmp(POS_MAX, i->from))
                return i->ret;
 
-       bch2_trans_init(&trans, i->c);
+       bch2_trans_init(&trans, i->c, 0, 0);
 
        for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
                bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
@@ -327,7 +327,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
        if (!i->size)
                return i->ret;
 
-       bch2_trans_init(&trans, i->c);
+       bch2_trans_init(&trans, i->c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
 
index b379780e703f4ff817efaf89cf01b45f4715dded..11e628876fffbb600974c9b97016a4dff19531f1 100644 (file)
@@ -312,7 +312,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
        struct bkey_s_c k;
        u64 inum = 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_hash_lookup(&trans, bch2_dirent_hash_desc,
                                hash_info, dir_inum, name, 0);
@@ -369,7 +369,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
        if (!dir_emit_dots(file, ctx))
                return 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
                           POS(inode->v.i_ino, ctx->pos), 0, k, ret) {
index a31d6cb23f6ddb6f80dfff2c5eb4af37f70bdc38..43cceb02955516be136ec0950d3c2cd552e67903 100644 (file)
@@ -113,7 +113,7 @@ const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
            bkey_val_u64s(k.k) < stripe_val_u64s(s))
                return "incorrect value size";
 
-       return NULL;
+       return bch2_bkey_ptrs_invalid(c, k);
 }
 
 void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
@@ -134,6 +134,8 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
                pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
                       (u64) s->ptrs[i].offset,
                       stripe_blockcount_get(s, i));
+
+       bch2_bkey_ptrs_to_text(out, c, k);
 }
 
 static int ptr_matches_stripe(struct bch_fs *c,
@@ -177,6 +179,25 @@ static int extent_matches_stripe(struct bch_fs *c,
        return -1;
 }
 
+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
+{
+       struct bkey_s_c_extent e;
+       const union bch_extent_entry *entry;
+
+       if (!bkey_extent_is_data(k.k))
+               return false;
+
+       e = bkey_s_c_to_extent(k);
+
+       extent_for_each_entry(e, entry)
+               if (extent_entry_type(entry) ==
+                   BCH_EXTENT_ENTRY_stripe_ptr &&
+                   entry->stripe_ptr.idx == idx)
+                       return true;
+
+       return false;
+}
+
 static void ec_stripe_key_init(struct bch_fs *c,
                               struct bkey_i_stripe *s,
                               struct open_buckets *blocks,
@@ -419,7 +440,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
        if (!buf)
                return -ENOMEM;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
                                   POS(0, stripe_idx),
@@ -541,7 +562,7 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
        if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT))
                return ret;
 
-       bch2_btree_trans_unlock(iter->trans);
+       bch2_trans_unlock(iter->trans);
        ret = -EINTR;
 
        if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
@@ -589,17 +610,21 @@ void bch2_stripes_heap_update(struct bch_fs *c,
        ec_stripes_heap *h = &c->ec_stripes_heap;
        size_t i;
 
-       heap_verify_backpointer(c, idx);
+       if (m->alive) {
+               heap_verify_backpointer(c, idx);
 
-       h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
+               h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
 
-       i = m->heap_idx;
-       heap_sift_up(h,   i, ec_stripes_heap_cmp,
-                    ec_stripes_heap_set_backpointer);
-       heap_sift_down(h, i, ec_stripes_heap_cmp,
-                      ec_stripes_heap_set_backpointer);
+               i = m->heap_idx;
+               heap_sift_up(h,   i, ec_stripes_heap_cmp,
+                            ec_stripes_heap_set_backpointer);
+               heap_sift_down(h, i, ec_stripes_heap_cmp,
+                              ec_stripes_heap_set_backpointer);
 
-       heap_verify_backpointer(c, idx);
+               heap_verify_backpointer(c, idx);
+       } else {
+               bch2_stripes_heap_insert(c, m, idx);
+       }
 
        if (stripe_idx_to_delete(c) >= 0)
                schedule_work(&c->ec_stripe_delete_work);
@@ -676,7 +701,7 @@ static int ec_stripe_bkey_insert(struct bch_fs *c,
        struct bkey_s_c k;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
 
@@ -743,8 +768,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
        BKEY_PADDED(k) tmp;
        int ret = 0, dev, idx;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   bkey_start_pos(pos),
@@ -753,12 +777,19 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
        while ((k = bch2_btree_iter_peek(iter)).k &&
               !(ret = bkey_err(k)) &&
               bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
+               if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
+                       bch2_btree_iter_next(iter);
+                       continue;
+               }
+
                idx = extent_matches_stripe(c, &s->key.v, k);
                if (idx < 0) {
                        bch2_btree_iter_next(iter);
                        continue;
                }
 
+               bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
+
                dev = s->key.v.ptrs[idx].dev;
 
                bkey_reassemble(&tmp.k, k);
@@ -1207,7 +1238,7 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
        new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
        BUG_ON(!new_key);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
@@ -1243,10 +1274,12 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
        if (ret)
                return ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k, ret)
-               bch2_mark_key(c, k, true, 0, NULL, 0, 0);
+               bch2_mark_key(c, k, 0, NULL, 0,
+                             BCH_BUCKET_MARK_ALLOC_READ|
+                             BCH_BUCKET_MARK_NOATOMIC);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret) {
@@ -1257,7 +1290,9 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
        for_each_journal_key(*journal_keys, i)
                if (i->btree_id == BTREE_ID_EC)
                        bch2_mark_key(c, bkey_i_to_s_c(i->k),
-                                     true, 0, NULL, 0, 0);
+                                     0, NULL, 0,
+                                     BCH_BUCKET_MARK_ALLOC_READ|
+                                     BCH_BUCKET_MARK_NOATOMIC);
 
        return 0;
 }
@@ -1270,7 +1305,7 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
        size_t i, idx = 0;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);
 
index f8f29251a5e7564d5f67adedcda1d8f87bc6fec8..dffcc1445cea03ff42437e24fe38d04cc54a8f9a 100644 (file)
@@ -500,43 +500,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
        }
 }
 
-static const char *extent_ptr_invalid(const struct bch_fs *c,
-                                     struct bkey_s_c k,
-                                     const struct bch_extent_ptr *ptr,
-                                     unsigned size_ondisk,
-                                     bool metadata)
-{
-       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-       const struct bch_extent_ptr *ptr2;
-       struct bch_dev *ca;
-
-       if (ptr->dev >= c->sb.nr_devices ||
-           !c->devs[ptr->dev])
-               return "pointer to invalid device";
-
-       ca = bch_dev_bkey_exists(c, ptr->dev);
-       if (!ca)
-               return "pointer to invalid device";
-
-       bkey_for_each_ptr(ptrs, ptr2)
-               if (ptr != ptr2 && ptr->dev == ptr2->dev)
-                       return "multiple pointers to same device";
-
-       if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
-               return "offset past end of device";
-
-       if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
-               return "offset before first bucket";
-
-       if (bucket_remainder(ca, ptr->offset) +
-           size_ondisk > ca->mi.bucket_size)
-               return "spans multiple buckets";
-
-       return NULL;
-}
-
-static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-                             struct bkey_s_c k)
+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
@@ -590,37 +555,109 @@ static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
        }
 }
 
-/* Btree ptrs */
+static const char *extent_ptr_invalid(const struct bch_fs *c,
+                                     struct bkey_s_c k,
+                                     const struct bch_extent_ptr *ptr,
+                                     unsigned size_ondisk,
+                                     bool metadata)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr2;
+       struct bch_dev *ca;
 
-const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
+       if (!bch2_dev_exists2(c, ptr->dev))
+               return "pointer to invalid device";
+
+       ca = bch_dev_bkey_exists(c, ptr->dev);
+       if (!ca)
+               return "pointer to invalid device";
+
+       bkey_for_each_ptr(ptrs, ptr2)
+               if (ptr != ptr2 && ptr->dev == ptr2->dev)
+                       return "multiple pointers to same device";
+
+       if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
+               return "offset past end of device";
+
+       if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
+               return "offset before first bucket";
+
+       if (bucket_remainder(ca, ptr->offset) +
+           size_ondisk > ca->mi.bucket_size)
+               return "spans multiple buckets";
+
+       return NULL;
+}
+
+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
-       const struct bch_extent_ptr *ptr;
+       struct bch_extent_crc_unpacked crc;
+       unsigned size_ondisk = k.k->size;
        const char *reason;
+       unsigned nonce = UINT_MAX;
 
-       if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-               return "value too big";
+       if (k.k->type == KEY_TYPE_btree_ptr)
+               size_ondisk = c->opts.btree_node_size;
 
        bkey_extent_entry_for_each(ptrs, entry) {
                if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
                        return "invalid extent entry type";
 
-               if (!extent_entry_is_ptr(entry))
+               if (k.k->type == KEY_TYPE_btree_ptr &&
+                   !extent_entry_is_ptr(entry))
                        return "has non ptr field";
-       }
 
-       bkey_for_each_ptr(ptrs, ptr) {
-               reason = extent_ptr_invalid(c, k, ptr,
-                                           c->opts.btree_node_size,
-                                           true);
-               if (reason)
-                       return reason;
+               switch (extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+                       reason = extent_ptr_invalid(c, k, &entry->ptr,
+                                                   size_ondisk, false);
+                       if (reason)
+                               return reason;
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+               case BCH_EXTENT_ENTRY_crc64:
+               case BCH_EXTENT_ENTRY_crc128:
+                       crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
+
+                       if (crc.offset + crc.live_size >
+                           crc.uncompressed_size)
+                               return "checksum offset + key size > uncompressed size";
+
+                       size_ondisk = crc.compressed_size;
+
+                       if (!bch2_checksum_type_valid(c, crc.csum_type))
+                               return "invalid checksum type";
+
+                       if (crc.compression_type >= BCH_COMPRESSION_NR)
+                               return "invalid compression type";
+
+                       if (bch2_csum_type_is_encryption(crc.csum_type)) {
+                               if (nonce == UINT_MAX)
+                                       nonce = crc.offset + crc.nonce;
+                               else if (nonce != crc.offset + crc.nonce)
+                                       return "incorrect nonce";
+                       }
+                       break;
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       break;
+               }
        }
 
        return NULL;
 }
 
+/* Btree ptrs */
+
+const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
+               return "value too big";
+
+       return bch2_bkey_ptrs_invalid(c, k);
+}
+
 void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
                               struct bkey_s_c k)
 {
@@ -665,13 +702,7 @@ err:
 void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
                            struct bkey_s_c k)
 {
-       const char *invalid;
-
-       bkey_ptrs_to_text(out, c, k);
-
-       invalid = bch2_btree_ptr_invalid(c, k);
-       if (invalid)
-               pr_buf(out, " invalid: %s", invalid);
+       bch2_bkey_ptrs_to_text(out, c, k);
 }
 
 /* Extents */
@@ -1260,60 +1291,10 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
 
 const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
-       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-       const union bch_extent_entry *entry;
-       struct bch_extent_crc_unpacked crc;
-       const struct bch_extent_ptr *ptr;
-       unsigned size_ondisk = e.k->size;
-       const char *reason;
-       unsigned nonce = UINT_MAX;
-
-       if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX)
+       if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
                return "value too big";
 
-       extent_for_each_entry(e, entry) {
-               if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-                       return "invalid extent entry type";
-
-               switch (extent_entry_type(entry)) {
-               case BCH_EXTENT_ENTRY_ptr:
-                       ptr = entry_to_ptr(entry);
-
-                       reason = extent_ptr_invalid(c, e.s_c, &entry->ptr,
-                                                   size_ondisk, false);
-                       if (reason)
-                               return reason;
-                       break;
-               case BCH_EXTENT_ENTRY_crc32:
-               case BCH_EXTENT_ENTRY_crc64:
-               case BCH_EXTENT_ENTRY_crc128:
-                       crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
-
-                       if (crc.offset + e.k->size >
-                           crc.uncompressed_size)
-                               return "checksum offset + key size > uncompressed size";
-
-                       size_ondisk = crc.compressed_size;
-
-                       if (!bch2_checksum_type_valid(c, crc.csum_type))
-                               return "invalid checksum type";
-
-                       if (crc.compression_type >= BCH_COMPRESSION_NR)
-                               return "invalid compression type";
-
-                       if (bch2_csum_type_is_encryption(crc.csum_type)) {
-                               if (nonce == UINT_MAX)
-                                       nonce = crc.offset + crc.nonce;
-                               else if (nonce != crc.offset + crc.nonce)
-                                       return "incorrect nonce";
-                       }
-                       break;
-               case BCH_EXTENT_ENTRY_stripe_ptr:
-                       break;
-               }
-       }
-
-       return NULL;
+       return bch2_bkey_ptrs_invalid(c, k);
 }
 
 void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
@@ -1374,62 +1355,66 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
 void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
                         struct bkey_s_c k)
 {
-       const char *invalid;
+       bch2_bkey_ptrs_to_text(out, c, k);
+}
 
-       bkey_ptrs_to_text(out, c, k);
+static unsigned bch2_crc_field_size_max[] = {
+       [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
+       [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
+       [BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
+};
 
-       invalid = bch2_extent_invalid(c, k);
-       if (invalid)
-               pr_buf(out, " invalid: %s", invalid);
+static void bch2_extent_crc_pack(union bch_extent_crc *dst,
+                                struct bch_extent_crc_unpacked src)
+{
+#define set_common_fields(_dst, _src)                                  \
+               _dst.csum_type          = _src.csum_type,               \
+               _dst.compression_type   = _src.compression_type,        \
+               _dst._compressed_size   = _src.compressed_size - 1,     \
+               _dst._uncompressed_size = _src.uncompressed_size - 1,   \
+               _dst.offset             = _src.offset
+
+       switch (extent_entry_type(to_entry(dst))) {
+       case BCH_EXTENT_ENTRY_crc32:
+               set_common_fields(dst->crc32, src);
+               dst->crc32.csum  = *((__le32 *) &src.csum.lo);
+               break;
+       case BCH_EXTENT_ENTRY_crc64:
+               set_common_fields(dst->crc64, src);
+               dst->crc64.nonce        = src.nonce;
+               dst->crc64.csum_lo      = src.csum.lo;
+               dst->crc64.csum_hi      = *((__le16 *) &src.csum.hi);
+               break;
+       case BCH_EXTENT_ENTRY_crc128:
+               set_common_fields(dst->crc128, src);
+               dst->crc128.nonce       = src.nonce;
+               dst->crc128.csum        = src.csum;
+               break;
+       default:
+               BUG();
+       }
+#undef set_common_fields
 }
 
 static void bch2_extent_crc_init(union bch_extent_crc *crc,
                                 struct bch_extent_crc_unpacked new)
 {
-#define common_fields(_crc)                                            \
-               .csum_type              = _crc.csum_type,               \
-               .compression_type       = _crc.compression_type,        \
-               ._compressed_size       = _crc.compressed_size - 1,     \
-               ._uncompressed_size     = _crc.uncompressed_size - 1,   \
-               .offset                 = _crc.offset
-
        if (bch_crc_bytes[new.csum_type]        <= 4 &&
-           new.uncompressed_size               <= CRC32_SIZE_MAX &&
-           new.nonce                           <= CRC32_NONCE_MAX) {
-               crc->crc32 = (struct bch_extent_crc32) {
-                       .type = 1 << BCH_EXTENT_ENTRY_crc32,
-                       common_fields(new),
-                       .csum                   = *((__le32 *) &new.csum.lo),
-               };
-               return;
-       }
-
-       if (bch_crc_bytes[new.csum_type]        <= 10 &&
-           new.uncompressed_size               <= CRC64_SIZE_MAX &&
-           new.nonce                           <= CRC64_NONCE_MAX) {
-               crc->crc64 = (struct bch_extent_crc64) {
-                       .type = 1 << BCH_EXTENT_ENTRY_crc64,
-                       common_fields(new),
-                       .nonce                  = new.nonce,
-                       .csum_lo                = new.csum.lo,
-                       .csum_hi                = *((__le16 *) &new.csum.hi),
-               };
-               return;
-       }
+           new.uncompressed_size - 1           <= CRC32_SIZE_MAX &&
+           new.nonce                           <= CRC32_NONCE_MAX)
+               crc->type = 1 << BCH_EXTENT_ENTRY_crc32;
+       else if (bch_crc_bytes[new.csum_type]   <= 10 &&
+                  new.uncompressed_size - 1    <= CRC64_SIZE_MAX &&
+                  new.nonce                    <= CRC64_NONCE_MAX)
+               crc->type = 1 << BCH_EXTENT_ENTRY_crc64;
+       else if (bch_crc_bytes[new.csum_type]   <= 16 &&
+                  new.uncompressed_size - 1    <= CRC128_SIZE_MAX &&
+                  new.nonce                    <= CRC128_NONCE_MAX)
+               crc->type = 1 << BCH_EXTENT_ENTRY_crc128;
+       else
+               BUG();
 
-       if (bch_crc_bytes[new.csum_type]        <= 16 &&
-           new.uncompressed_size               <= CRC128_SIZE_MAX &&
-           new.nonce                           <= CRC128_NONCE_MAX) {
-               crc->crc128 = (struct bch_extent_crc128) {
-                       .type = 1 << BCH_EXTENT_ENTRY_crc128,
-                       common_fields(new),
-                       .nonce                  = new.nonce,
-                       .csum                   = new.csum,
-               };
-               return;
-       }
-#undef common_fields
-       BUG();
+       bch2_extent_crc_pack(crc, new);
 }
 
 void bch2_extent_crc_append(struct bkey_i_extent *e,
@@ -1454,10 +1439,15 @@ static inline void __extent_entry_insert(struct bkey_i_extent *e,
 void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
                                    struct extent_ptr_decoded *p)
 {
-       struct bch_extent_crc_unpacked crc;
+       struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(&e->k, NULL);
        union bch_extent_entry *pos;
        unsigned i;
 
+       if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
+               pos = e->v.start;
+               goto found;
+       }
+
        extent_for_each_crc(extent_i_to_s(e), crc, pos)
                if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
                        pos = extent_entry_next(pos);
@@ -1535,46 +1525,101 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
 {
        struct bkey_s_extent el = bkey_i_to_s_extent(l);
        struct bkey_s_extent er = bkey_i_to_s_extent(r);
-       union bch_extent_entry *en_l, *en_r;
+       union bch_extent_entry *en_l = el.v->start;
+       union bch_extent_entry *en_r = er.v->start;
+       struct bch_extent_crc_unpacked crc_l, crc_r;
 
        if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k))
                return BCH_MERGE_NOMERGE;
 
-       extent_for_each_entry(el, en_l) {
-               struct bch_extent_ptr *lp, *rp;
-               struct bch_dev *ca;
+       crc_l = bch2_extent_crc_unpack(el.k, NULL);
 
+       extent_for_each_entry(el, en_l) {
                en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
 
-               if ((extent_entry_type(en_l) !=
-                    extent_entry_type(en_r)) ||
-                   !extent_entry_is_ptr(en_l))
+               if (extent_entry_type(en_l) != extent_entry_type(en_r))
                        return BCH_MERGE_NOMERGE;
 
-               lp = &en_l->ptr;
-               rp = &en_r->ptr;
+               switch (extent_entry_type(en_l)) {
+               case BCH_EXTENT_ENTRY_ptr: {
+                       const struct bch_extent_ptr *lp = &en_l->ptr;
+                       const struct bch_extent_ptr *rp = &en_r->ptr;
+                       struct bch_dev *ca;
 
-               if (lp->offset + el.k->size     != rp->offset ||
-                   lp->dev                     != rp->dev ||
-                   lp->gen                     != rp->gen)
-                       return BCH_MERGE_NOMERGE;
+                       if (lp->offset + crc_l.compressed_size != rp->offset ||
+                           lp->dev                     != rp->dev ||
+                           lp->gen                     != rp->gen)
+                               return BCH_MERGE_NOMERGE;
+
+                       /* We don't allow extents to straddle buckets: */
+                       ca = bch_dev_bkey_exists(c, lp->dev);
 
-               /* We don't allow extents to straddle buckets: */
-               ca = bch_dev_bkey_exists(c, lp->dev);
+                       if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
+                               return BCH_MERGE_NOMERGE;
 
-               if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
+                       break;
+               }
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       if (en_l->stripe_ptr.block      != en_r->stripe_ptr.block ||
+                           en_l->stripe_ptr.idx        != en_r->stripe_ptr.idx)
+                               return BCH_MERGE_NOMERGE;
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+               case BCH_EXTENT_ENTRY_crc64:
+               case BCH_EXTENT_ENTRY_crc128:
+                       crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l));
+                       crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r));
+
+                       if (crc_l.csum_type             != crc_r.csum_type ||
+                           crc_l.compression_type      != crc_r.compression_type ||
+                           crc_l.nonce                 != crc_r.nonce)
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
+                           crc_r.offset)
+                               return BCH_MERGE_NOMERGE;
+
+                       if (!bch2_checksum_mergeable(crc_l.csum_type))
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_l.compression_type)
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_l.csum_type &&
+                           crc_l.uncompressed_size +
+                           crc_r.uncompressed_size > c->sb.encoded_extent_max)
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_l.uncompressed_size + crc_r.uncompressed_size - 1 >
+                           bch2_crc_field_size_max[extent_entry_type(en_l)])
+                               return BCH_MERGE_NOMERGE;
+
+                       break;
+               default:
                        return BCH_MERGE_NOMERGE;
+               }
        }
 
-       l->k.needs_whiteout |= r->k.needs_whiteout;
+       extent_for_each_entry(el, en_l) {
+               struct bch_extent_crc_unpacked crc_l, crc_r;
 
-       /* Keys with no pointers aren't restricted to one bucket and could
-        * overflow KEY_SIZE
-        */
-       if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
-               bch2_key_resize(&l->k, KEY_SIZE_MAX);
-               bch2_cut_front(l->k.p, r);
-               return BCH_MERGE_PARTIAL;
+               en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
+
+               if (!extent_entry_is_crc(en_l))
+                       continue;
+
+               crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l));
+               crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r));
+
+               crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
+                                                crc_l.csum,
+                                                crc_r.csum,
+                                                crc_r.uncompressed_size << 9);
+
+               crc_l.uncompressed_size += crc_r.uncompressed_size;
+               crc_l.compressed_size   += crc_r.compressed_size;
+
+               bch2_extent_crc_pack(entry_to_crc(en_l), crc_l);
        }
 
        bch2_key_resize(&l->k, l->k.size + r->k.size);
@@ -1670,7 +1715,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
 
        end.offset += size;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
                           BTREE_ITER_SLOTS, k, err) {
@@ -1745,11 +1790,6 @@ enum merge_result bch2_reservation_merge(struct bch_fs *c,
            li->v.nr_replicas != ri->v.nr_replicas)
                return BCH_MERGE_NOMERGE;
 
-       l->k.needs_whiteout |= r->k.needs_whiteout;
-
-       /* Keys with no pointers aren't restricted to one bucket and could
-        * overflow KEY_SIZE
-        */
        if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
                bch2_key_resize(&l->k, KEY_SIZE_MAX);
                bch2_cut_front(l->k.p, r);
index 77d698418fb58bc1c07187a66e95c33aa4b2a2ed..9bf156d0a4326dff1448bb756683f3227f594da6 100644 (file)
@@ -358,6 +358,10 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
                               struct bch_io_failures *,
                               struct extent_ptr_decoded *);
 
+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
+
 /* bch_btree_ptr: */
 
 const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
index 7133482eeab6b1f75db098c5701e8cd8564aa4b6..81a86664c99e572d01e2eb533d101a7d56e598ec 100644 (file)
@@ -322,10 +322,10 @@ static int bch2_extent_update(struct btree_trans *trans,
        if (i_sectors_delta ||
            new_i_size > inode->ei_inode.bi_size) {
                if (c->opts.new_inode_updates) {
-                       bch2_btree_trans_unlock(trans);
+                       bch2_trans_unlock(trans);
                        mutex_lock(&inode->ei_update_lock);
 
-                       if (!bch2_btree_trans_relock(trans)) {
+                       if (!bch2_trans_relock(trans)) {
                                mutex_unlock(&inode->ei_update_lock);
                                return -EINTR;
                        }
@@ -435,8 +435,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
 
        BUG_ON(k->k.p.inode != inode->v.i_ino);
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
 
        iter = bch2_trans_get_iter(&trans,
                                BTREE_ID_EXTENTS,
@@ -998,7 +997,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
                }
 
                bkey_reassemble(&tmp.k, k);
-               bch2_btree_trans_unlock(trans);
+               bch2_trans_unlock(trans);
                k = bkey_i_to_s_c(&tmp.k);
 
                if (readpages_iter) {
@@ -1054,7 +1053,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
        ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
        BUG_ON(ret);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
                                   BTREE_ITER_SLOTS);
@@ -1103,7 +1102,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
        bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
        bio_add_page_contig(&rbio->bio, page);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
                                   BTREE_ITER_SLOTS);
 
@@ -2101,8 +2100,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
                                   BTREE_ITER_INTENT);
@@ -2148,7 +2146,7 @@ static inline int range_has_data(struct bch_fs *c,
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) {
                if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
@@ -2404,8 +2402,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
        if ((offset | len) & (block_bytes(c) - 1))
                return -EINVAL;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
 
        /*
         * We need i_mutex to keep the page cache consistent with the extents
@@ -2520,8 +2517,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
        unsigned replicas = io_opts(c, inode).data_replicas;
        int ret;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        inode_lock(&inode->v);
        inode_dio_wait(&inode->v);
@@ -2732,7 +2728,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
        if (offset >= isize)
                return -ENXIO;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(inode->v.i_ino, offset >> 9), 0, k, ret) {
@@ -2805,7 +2801,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
        if (offset >= isize)
                return -ENXIO;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(inode->v.i_ino, offset >> 9),
index dc6c7dfbfcf68040c648f9281604fa2b5eb97f5c..a324278b6f43d1df3dcf0d67664c2b52d0458ec3 100644 (file)
@@ -164,7 +164,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
        struct bch_inode_unpacked inode_u;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
 
@@ -355,7 +355,7 @@ __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
        if (!tmpfile)
                mutex_lock(&dir->ei_update_lock);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 8, 1024);
 retry:
        bch2_trans_begin(&trans);
 
@@ -507,7 +507,7 @@ static int __bch2_link(struct bch_fs *c,
        int ret;
 
        mutex_lock(&inode->ei_update_lock);
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 4, 1024);
 retry:
        bch2_trans_begin(&trans);
 
@@ -594,7 +594,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
        int ret;
 
        bch2_lock_inodes(dir, inode);
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 4, 1024);
 retry:
        bch2_trans_begin(&trans);
 
@@ -801,13 +801,13 @@ static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
                        return ret;
        }
 
+       bch2_trans_init(&trans, c, 8, 2048);
+
        bch2_lock_inodes(i.src_dir,
                         i.dst_dir,
                         i.src_inode,
                         i.dst_inode);
 
-       bch2_trans_init(&trans, c);
-
        if (S_ISDIR(i.src_inode->v.i_mode) &&
            inode_attrs_changing(i.dst_dir, i.src_inode)) {
                ret = -EXDEV;
@@ -968,7 +968,7 @@ static int bch2_setattr_nonsize(struct bch_inode_info *inode, struct iattr *iatt
        if (ret)
                goto err;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
        kfree(acl);
@@ -1123,7 +1123,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        if (start + len < start)
                return -EINVAL;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(ei->v.i_ino, start >> 9), 0, k, ret)
@@ -1511,7 +1511,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons
                 */
 
                c1 = bch2_path_to_fs(devs[0]);
-               if (!c1)
+               if (IS_ERR(c1))
                        return c;
 
                for (i = 1; i < nr_devs; i++) {
index 998c10ab5ec60ce778ab6afd73c670103c9497c7..433552df9049a700ec3ec33bb9efcc51f1288c52 100644 (file)
@@ -57,7 +57,7 @@ static int remove_dirent(struct btree_trans *trans,
        name.name = buf;
 
        /* Unlock so we don't deadlock, after copying name: */
-       bch2_btree_trans_unlock(trans);
+       bch2_trans_unlock(trans);
 
        ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
        if (ret) {
@@ -450,8 +450,7 @@ static int check_extents(struct bch_fs *c)
        u64 i_sectors;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        bch_verbose(c, "checking extents");
 
@@ -546,8 +545,7 @@ static int check_dirents(struct bch_fs *c)
 
        bch_verbose(c, "checking dirents");
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        hash_check_init(&h);
 
@@ -703,8 +701,7 @@ static int check_xattrs(struct bch_fs *c)
 
        hash_check_init(&h);
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
                                   POS(BCACHEFS_ROOT_INO, 0), 0);
@@ -917,8 +914,7 @@ static int check_directory_structure(struct bch_fs *c,
        u64 d_inum;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        bch_verbose(c, "checking directory structure");
 
@@ -1014,7 +1010,7 @@ retry:
                if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
                                "unreachable directory found (inum %llu)",
                                k.k->p.inode)) {
-                       bch2_btree_trans_unlock(&trans);
+                       bch2_trans_unlock(&trans);
 
                        ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
                        if (ret) {
@@ -1084,8 +1080,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
        u64 d_inum;
        int ret;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
 
@@ -1228,7 +1223,7 @@ static int check_inode(struct btree_trans *trans,
 
        ret = bch2_inode_unpack(inode, &u);
 
-       bch2_btree_trans_unlock(trans);
+       bch2_trans_unlock(trans);
 
        if (bch2_fs_inconsistent_on(ret, c,
                         "error unpacking inode %llu in fsck",
@@ -1333,8 +1328,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
        int ret = 0, ret2 = 0;
        u64 nlinks_pos;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
                                   POS(range_start, 0), 0);
@@ -1458,8 +1452,7 @@ int bch2_fsck_walk_inodes_only(struct bch_fs *c)
        struct bkey_s_c_inode inode;
        int ret;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) {
                if (k.k->type != KEY_TYPE_inode)
index d2748e7000283d484b033c62fa8c9ff25dc10558..59ae6d073a3336b319eb548f7d604e352818ead9 100644 (file)
@@ -390,7 +390,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
        if (ret)
                return ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0),
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
index 5df690f9afe4c3e3718f4a80af347a33d6f9fbae..dc922a9140b1fbc94f36bdc804e806f4a43cfecf 100644 (file)
@@ -285,7 +285,7 @@ int bch2_write_index_default(struct bch_write_op *op)
        BUG_ON(bch2_keylist_empty(keys));
        bch2_verify_keylist_sorted(keys);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   bkey_start_pos(&bch2_keylist_front(keys)->k),
@@ -432,21 +432,32 @@ static void init_append_extent(struct bch_write_op *op,
                               struct bversion version,
                               struct bch_extent_crc_unpacked crc)
 {
+       struct bch_fs *c = op->c;
        struct bkey_i_extent *e = bkey_extent_init(op->insert_keys.top);
-       struct bch_extent_ptr *ptr;
+       struct extent_ptr_decoded p = { .crc = crc };
+       struct open_bucket *ob;
+       unsigned i;
 
        op->pos.offset += crc.uncompressed_size;
-       e->k.p = op->pos;
-       e->k.size = crc.uncompressed_size;
-       e->k.version = version;
+       e->k.p          = op->pos;
+       e->k.size       = crc.uncompressed_size;
+       e->k.version    = version;
 
-       bch2_extent_crc_append(e, crc);
-       bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i,
-                                      crc.compressed_size);
+       BUG_ON(crc.compressed_size > wp->sectors_free);
+       wp->sectors_free -= crc.compressed_size;
 
-       if (op->flags & BCH_WRITE_CACHED)
-               extent_for_each_ptr(extent_i_to_s(e), ptr)
-                       ptr->cached = true;
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+               p.ptr = ob->ptr;
+               p.ptr.cached = !ca->mi.durability ||
+                       (op->flags & BCH_WRITE_CACHED) != 0;
+               p.ptr.offset += ca->mi.bucket_size - ob->sectors_free;
+               bch2_extent_ptr_decoded_append(e, &p);
+
+               BUG_ON(crc.compressed_size > ob->sectors_free);
+               ob->sectors_free -= crc.compressed_size;
+       }
 
        bch2_keylist_push(&op->insert_keys);
 }
@@ -1253,7 +1264,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
 
        flags &= ~BCH_READ_LAST_FRAGMENT;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   rbio->pos, BTREE_ITER_SLOTS);
@@ -1301,7 +1312,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
        struct bkey_s_c k;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        flags &= ~BCH_READ_LAST_FRAGMENT;
        flags |= BCH_READ_MUST_CLONE;
@@ -1314,7 +1325,7 @@ retry:
 
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_btree_trans_unlock(&trans);
+               bch2_trans_unlock(&trans);
 
                bytes = min_t(unsigned, bvec_iter.bi_size,
                              (k.k->p.offset - bvec_iter.bi_sector) << 9);
@@ -1404,13 +1415,13 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
        struct bkey_i_extent *e;
        BKEY_PADDED(k) new;
        struct bch_extent_crc_unpacked new_crc;
-       unsigned offset;
+       u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
        int ret;
 
        if (rbio->pick.crc.compression_type)
                return;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
 
@@ -1427,24 +1438,19 @@ retry:
        e = bkey_i_to_extent(&new.k);
 
        if (!bch2_extent_matches_ptr(c, extent_i_to_s_c(e),
-                                    rbio->pick.ptr,
-                                    rbio->pos.offset -
-                                    rbio->pick.crc.offset) ||
+                                    rbio->pick.ptr, data_offset) ||
            bversion_cmp(e->k.version, rbio->version))
                goto out;
 
        /* Extent was merged? */
-       if (bkey_start_offset(&e->k) < rbio->pos.offset ||
-           e->k.p.offset > rbio->pos.offset + rbio->pick.crc.uncompressed_size)
+       if (bkey_start_offset(&e->k) < data_offset ||
+           e->k.p.offset > data_offset + rbio->pick.crc.uncompressed_size)
                goto out;
 
-       /* The extent might have been partially overwritten since we read it: */
-       offset = rbio->pick.crc.offset + (bkey_start_offset(&e->k) - rbio->pos.offset);
-
        if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
-                               rbio->pick.crc, NULL, &new_crc,
-                               offset, e->k.size,
-                               rbio->pick.crc.csum_type)) {
+                       rbio->pick.crc, NULL, &new_crc,
+                       bkey_start_offset(&e->k) - data_offset, e->k.size,
+                       rbio->pick.crc.csum_type)) {
                bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
                goto out;
        }
@@ -1848,7 +1854,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
                BCH_READ_USER_MAPPED;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        BUG_ON(rbio->_state);
        BUG_ON(flags & BCH_READ_NODECODE);
@@ -1869,7 +1875,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
                 */
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_btree_trans_unlock(&trans);
+               bch2_trans_unlock(&trans);
 
                bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size,
                              (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9);
index 3ec80437504ee8348c83f99a86fb5a3fbba9c516..0a174dffe76e92bdc146fb3aff08d4ea7ad9fcb5 100644 (file)
@@ -963,6 +963,8 @@ void bch2_fs_journal_stop(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
 
+       bch2_journal_flush_all_pins(j);
+
        wait_event(j->wait, journal_entry_close(j));
 
        /* do we need to write another journal entry? */
index 93ee5e889389786f2a1b34aaaea57e574f545854..231f5da22f45cee83ab98a239539dc932eb5f593 100644 (file)
@@ -257,7 +257,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
        unsigned i, nr, new_nr;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for (i = 0; i < BTREE_ID_NR; i++) {
                struct btree_iter *iter;
index 190b545b5e456d92748ae11322dc0dab197de6c1..74e17fa92c782f1531a6726809cc89479485037c 100644 (file)
@@ -41,8 +41,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        BKEY_PADDED(key) tmp;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   POS_MIN, BTREE_ITER_PREFETCH);
@@ -112,7 +111,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        if (flags & BCH_FORCE_IF_METADATA_LOST)
                return -EINVAL;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
        closure_init_stack(&cl);
 
        for (id = 0; id < BTREE_ID_NR; id++) {
index d39f5633a8245517028431c68026197df12797f5..97890918b829aef3a206f5e848d615cb57db2a7b 100644 (file)
@@ -61,8 +61,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
        struct keylist *keys = &op->insert_keys;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   bkey_start_pos(&bch2_keylist_front(keys)->k),
@@ -500,7 +499,7 @@ int bch2_move_data(struct bch_fs *c,
        INIT_LIST_HEAD(&ctxt.reads);
        init_waitqueue_head(&ctxt.wait);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        stats->data_type = BCH_DATA_USER;
        stats->btree_id = BTREE_ID_EXTENTS;
@@ -634,7 +633,7 @@ static int bch2_move_btree(struct bch_fs *c,
        enum data_cmd cmd;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        stats->data_type = BCH_DATA_BTREE;
 
index 2b0edb6826d1be58457ca3e437e653625ae0e8df..8a42660cff08a8e54400075cc6927f0a65ead9b4 100644 (file)
@@ -360,7 +360,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
                           BTREE_ITER_PREFETCH, k, ret) {
@@ -432,7 +432,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
                        return ret;
        }
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
@@ -725,7 +725,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
        bkey_quota_init(&new_quota.k_i);
        new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p,
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
index 70fd9a27ae3d4d0b134d37aa1832541e4e3662c7..535e2b6a15fbbbc7727077078aa999aeb6eaa4a3 100644 (file)
@@ -213,8 +213,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
        bool split_compressed = false;
        int ret;
 
-       bch2_trans_init(&trans, c);
-       bch2_trans_preload_iters(&trans);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 retry:
        bch2_trans_begin(&trans);
 
@@ -258,13 +257,9 @@ retry:
        } while (bkey_cmp(iter->pos, k->k.p) < 0);
 
        if (split_compressed) {
-               memset(&trans.fs_usage_deltas.fs_usage, 0,
-                      sizeof(trans.fs_usage_deltas.fs_usage));
-               trans.fs_usage_deltas.top = trans.fs_usage_deltas.d;
-
-               ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k), false,
+               ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
                                          -((s64) k->k.size),
-                                         &trans.fs_usage_deltas) ?:
+                                         BCH_BUCKET_MARK_OVERWRITE) ?:
                      bch2_trans_commit(&trans, &disk_res, NULL,
                                        BTREE_INSERT_ATOMIC|
                                        BTREE_INSERT_NOFAIL|
index c2744c7dd2baccb8d846f6bbbfb88db8336b7ef6..675706761b12672eb8f2ec8aa788ec45a8a9d5db 100644 (file)
@@ -262,7 +262,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
        if (!test_bit(BCH_FS_STARTED, &c->flags))
                return -EPERM;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
                if (k.k->type == KEY_TYPE_extent) {
index 265db89af47b390e54b0a66d065ba2ac3d3b7e8e..96bca8009da70f7f51e361f29dee8c023dd56169 100644 (file)
@@ -34,7 +34,7 @@ static void test_delete(struct bch_fs *c, u64 nr)
 
        bkey_cookie_init(&k.k_i);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
                                   BTREE_ITER_INTENT);
@@ -66,7 +66,7 @@ static void test_delete_written(struct bch_fs *c, u64 nr)
 
        bkey_cookie_init(&k.k_i);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
                                   BTREE_ITER_INTENT);
@@ -94,7 +94,7 @@ static void test_iterate(struct bch_fs *c, u64 nr)
        u64 i;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        delete_test_keys(c);
 
@@ -139,7 +139,7 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr)
        u64 i;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        delete_test_keys(c);
 
@@ -189,7 +189,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
        u64 i;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        delete_test_keys(c);
 
@@ -243,7 +243,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
        u64 i;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        delete_test_keys(c);
 
@@ -304,7 +304,7 @@ static void test_peek_end(struct bch_fs *c, u64 nr)
        struct btree_iter *iter;
        struct bkey_s_c k;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, 0);
 
@@ -323,7 +323,7 @@ static void test_peek_end_extents(struct bch_fs *c, u64 nr)
        struct btree_iter *iter;
        struct bkey_s_c k;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
 
@@ -429,7 +429,7 @@ static void rand_lookup(struct bch_fs *c, u64 nr)
        struct bkey_s_c k;
        u64 i;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for (i = 0; i < nr; i++) {
                iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
@@ -450,7 +450,7 @@ static void rand_mixed(struct bch_fs *c, u64 nr)
        int ret;
        u64 i;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for (i = 0; i < nr; i++) {
                iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
@@ -502,10 +502,10 @@ static void seq_insert(struct bch_fs *c, u64 nr)
 
        bkey_cookie_init(&insert.k_i);
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN,
-                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
                insert.k.p = iter->pos;
 
                bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &insert.k_i));
@@ -523,10 +523,11 @@ static void seq_lookup(struct bch_fs *c, u64 nr)
        struct btree_trans trans;
        struct btree_iter *iter;
        struct bkey_s_c k;
+       int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k)
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret)
                ;
        bch2_trans_exit(&trans);
 }
@@ -538,10 +539,10 @@ static void seq_overwrite(struct bch_fs *c, u64 nr)
        struct bkey_s_c k;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN,
-                          BTREE_ITER_INTENT, k) {
+                          BTREE_ITER_INTENT, k, ret) {
                struct bkey_i_cookie u;
 
                bkey_reassemble(&u.k_i, k);
index fd58829a2007aa357cbe4c06c86fc4610e97014b..41a9753e919c65b9b79053ce12b7145cfd241c92 100644 (file)
@@ -125,7 +125,7 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
        struct bkey_s_c_xattr xattr;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
                                &inode->ei_str_hash, inode->v.i_ino,
@@ -276,7 +276,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
        u64 inum = dentry->d_inode->i_ino;
        int ret;
 
-       bch2_trans_init(&trans, c);
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
                           POS(inum, 0), 0, k, ret) {