]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_write_buffer.c
Update bcachefs sources to 3ca08ab51ec9 bcachefs: six locks: Simplify optimistic...
[bcachefs-tools-debian] / libbcachefs / btree_write_buffer.c
index b50226313a47f1c42c16b7baa780a07d5d737f9e..a6bf6ed37ced60cfee4bb61c15c47c06d5ace9c7 100644 (file)
@@ -11,6 +11,9 @@
 
 #include <linux/sort.h>
 
+static int bch2_btree_write_buffer_journal_flush(struct journal *,
+                               struct journal_entry_pin *, u64);
+
 static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
 {
        const struct btree_write_buffered_key *l = _l;
@@ -45,6 +48,13 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
        if (ret)
                return ret;
 
+       /*
+        * We can't clone a path that has write locks: unshare it now, before
+        * set_pos and traverse():
+        */
+       if (iter->path->ref > 1)
+               iter->path = __bch2_btree_path_make_mut(trans, iter->path, true, _THIS_IP_);
+
        path = iter->path;
 
        if (!*write_locked) {
@@ -64,23 +74,18 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
 
        bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
        (*fast)++;
-
-       if (path->ref > 1) {
-               /*
-                * We can't clone a path that has write locks: if the path is
-                * shared, unlock before set_pos(), traverse():
-                */
-               bch2_btree_node_unlock_write(trans, path, path->l[0].b);
-               *write_locked = false;
-       }
        return 0;
 trans_commit:
-       return  bch2_trans_update(trans, iter, &wb->k, 0) ?:
+       trans->journal_res.seq = wb->journal_seq;
+
+       return  bch2_trans_update(trans, iter, &wb->k,
+                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
                bch2_trans_commit(trans, NULL, NULL,
                                  commit_flags|
-                                 BTREE_INSERT_NOCHECK_RW|
-                                 BTREE_INSERT_NOFAIL|
-                                 BTREE_INSERT_JOURNAL_RECLAIM);
+                                 BCH_TRANS_COMMIT_no_check_rw|
+                                 BCH_TRANS_COMMIT_no_enospc|
+                                 BCH_TRANS_COMMIT_no_journal_res|
+                                 BCH_TRANS_COMMIT_journal_reclaim);
 }
 
 static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
@@ -103,6 +108,35 @@ static union btree_write_buffer_state btree_write_buffer_switch(struct btree_wri
        return old;
 }
 
+/*
+ * Update a btree with a write buffered key using the journal seq of the
+ * original write buffer insert.
+ *
+ * It is not safe to rejournal the key once it has been inserted into the write
+ * buffer because that may break recovery ordering. For example, the key may
+ * have already been modified in the active write buffer in a seq that comes
+ * before the current transaction. If we were to journal this key again and
+ * crash, recovery would process updates in the wrong order.
+ */
+static int
+btree_write_buffered_insert(struct btree_trans *trans,
+                         struct btree_write_buffered_key *wb)
+{
+       struct btree_iter iter;
+       int ret;
+
+       bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k),
+                            BTREE_ITER_CACHED|BTREE_ITER_INTENT);
+
+       trans->journal_res.seq = wb->journal_seq;
+
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, &wb->k,
+                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
 int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
                                    bool locked)
 {
@@ -122,13 +156,17 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
        if (!locked && !mutex_trylock(&wb->flush_lock))
                return 0;
 
-       bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL);
+       bch2_journal_pin_copy(j, &pin, &wb->journal_pin,
+                             bch2_btree_write_buffer_journal_flush);
        bch2_journal_pin_drop(j, &wb->journal_pin);
 
        s = btree_write_buffer_switch(wb);
        keys = wb->keys[s.idx];
        nr = s.nr;
 
+       if (race_fault())
+               goto slowpath;
+
        /*
         * We first sort so that we can detect and skip redundant updates, and
         * then we attempt to flush in sorted btree order, as this is most
@@ -137,7 +175,7 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
         * However, since we're not flushing in the order they appear in the
         * journal we won't be able to drop our journal pin until everything is
         * flushed - which means this could deadlock the journal if we weren't
-        * passing BTREE_INSERT_JOURNAL_RECLAIM. This causes the update to fail
+        * passing BCH_TRANS_COMMIT_journal_reclaim. This causes the update to fail
         * if it would block taking a journal reservation.
         *
         * If that happens, simply skip the key so we can optimistically insert
@@ -164,7 +202,8 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
 
                if (!iter.path || iter.path->btree_id != i->btree) {
                        bch2_trans_iter_exit(trans, &iter);
-                       bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p, BTREE_ITER_INTENT);
+                       bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p,
+                                            BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
                }
 
                bch2_btree_iter_set_pos(&iter, i->k.k.p);
@@ -220,22 +259,15 @@ slowpath:
                if (!i->journal_seq)
                        continue;
 
-               if (i->journal_seq > pin.seq) {
-                       struct journal_entry_pin pin2;
-
-                       memset(&pin2, 0, sizeof(pin2));
-
-                       bch2_journal_pin_add(j, i->journal_seq, &pin2, NULL);
-                       bch2_journal_pin_drop(j, &pin);
-                       bch2_journal_pin_copy(j, &pin, &pin2, NULL);
-                       bch2_journal_pin_drop(j, &pin2);
-               }
+               bch2_journal_pin_update(j, i->journal_seq, &pin,
+                             bch2_btree_write_buffer_journal_flush);
 
                ret = commit_do(trans, NULL, NULL,
                                commit_flags|
-                               BTREE_INSERT_NOFAIL|
-                               BTREE_INSERT_JOURNAL_RECLAIM,
-                               __bch2_btree_insert(trans, i->btree, &i->k, 0));
+                               BCH_TRANS_COMMIT_no_enospc|
+                               BCH_TRANS_COMMIT_no_journal_res|
+                               BCH_TRANS_COMMIT_journal_reclaim,
+                               btree_write_buffered_insert(trans, i));
                if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
                        break;
        }
@@ -264,7 +296,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
        mutex_lock(&wb->flush_lock);
 
        return bch2_trans_run(c,
-                       __bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true));
+                       __bch2_btree_write_buffer_flush(trans, BCH_TRANS_COMMIT_no_check_rw, true));
 }
 
 static inline u64 btree_write_buffer_ref(int idx)