]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_write_buffer.c
btree_write_buffer: ensure atomic64_sub_return_release availability
[bcachefs-tools-debian] / libbcachefs / btree_write_buffer.c
index 3a3e36c16bc030eae7ea5779ca69413969aa5ec8..76b6f2dcaa4fb7889b2e3de3b5a48d57b7723868 100644 (file)
@@ -9,6 +9,7 @@
 #include "journal.h"
 #include "journal_reclaim.h"
 
+#include <linux/atomic.h>
 #include <linux/sort.h>
 
 static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
@@ -75,7 +76,8 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
        }
        return 0;
 trans_commit:
-       return  bch2_trans_update(trans, iter, &wb->k, 0) ?:
+       return  bch2_trans_update_seq(trans, wb->journal_seq, iter, &wb->k,
+                                     BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
                bch2_trans_commit(trans, NULL, NULL,
                                  commit_flags|
                                  BTREE_INSERT_NOCHECK_RW|
@@ -103,6 +105,33 @@ static union btree_write_buffer_state btree_write_buffer_switch(struct btree_wri
        return old;
 }
 
+/*
+ * Update a btree with a write buffered key using the journal seq of the
+ * original write buffer insert.
+ *
+ * It is not safe to rejournal the key once it has been inserted into the write
+ * buffer because that may break recovery ordering. For example, the key may
+ * have already been modified in the active write buffer in a seq that comes
+ * before the current transaction. If we were to journal this key again and
+ * crash, recovery would process updates in the wrong order.
+ */
+static int
+btree_write_buffered_insert(struct btree_trans *trans,
+                         struct btree_write_buffered_key *wb)
+{
+       struct btree_iter iter;
+       int ret;
+
+       bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k),
+                            BTREE_ITER_CACHED|BTREE_ITER_INTENT);
+
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update_seq(trans, wb->journal_seq, &iter, &wb->k,
+                                     BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
 int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
                                    bool locked)
 {
@@ -129,6 +158,9 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
        keys = wb->keys[s.idx];
        nr = s.nr;
 
+       if (race_fault())
+               goto slowpath;
+
        /*
         * We first sort so that we can detect and skip redundant updates, and
         * then we attempt to flush in sorted btree order, as this is most
@@ -164,7 +196,8 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
 
                if (!iter.path || iter.path->btree_id != i->btree) {
                        bch2_trans_iter_exit(trans, &iter);
-                       bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p, BTREE_ITER_INTENT);
+                       bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p,
+                                            BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
                }
 
                bch2_btree_iter_set_pos(&iter, i->k.k.p);
@@ -235,7 +268,7 @@ slowpath:
                                commit_flags|
                                BTREE_INSERT_NOFAIL|
                                BTREE_INSERT_JOURNAL_RECLAIM,
-                               __bch2_btree_insert(trans, i->btree, &i->k, 0));
+                               btree_write_buffered_insert(trans, i));
                if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
                        break;
        }
@@ -264,7 +297,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
        mutex_lock(&wb->flush_lock);
 
        return bch2_trans_run(c,
-                       __bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true));
+                       __bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true));
 }
 
 static inline u64 btree_write_buffer_ref(int idx)
@@ -281,7 +314,6 @@ int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
        struct btree_write_buffer *wb = &c->btree_write_buffer;
        struct btree_write_buffered_key *i;
        union btree_write_buffer_state old, new;
-       unsigned offset = 0;
        int ret = 0;
        u64 v;
 
@@ -289,8 +321,7 @@ int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
                EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
 
                i->journal_seq          = trans->journal_res.seq;
-               i->journal_offset       = trans->journal_res.offset + offset;
-               offset++;
+               i->journal_offset       = trans->journal_res.offset;
        }
 
        preempt_disable();