]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/recovery.c
Update bcachefs sources to 3ca08ab51ec9 bcachefs: six locks: Simplify optimistic...
[bcachefs-tools-debian] / libbcachefs / recovery.c
index 9c30500ce9200af8be8f71a50f5fa02c356e4400..130274b195e21621c391ad89d965440cf077816c 100644 (file)
@@ -98,6 +98,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
        unsigned update_flags = BTREE_TRIGGER_NORUN;
        int ret;
 
+       if (k->overwritten)
+               return 0;
+
+       trans->journal_res.seq = k->journal_seq;
+
        /*
         * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
         * keep the key cache coherent with the underlying btree. Nothing
@@ -139,27 +144,14 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
 static int bch2_journal_replay(struct bch_fs *c)
 {
        struct journal_keys *keys = &c->journal_keys;
-       struct journal_key **keys_sorted, *k;
+       DARRAY(struct journal_key *) keys_sorted = { 0 };
+       struct journal_key **kp;
        struct journal *j = &c->journal;
        u64 start_seq   = c->journal_replay_seq_start;
        u64 end_seq     = c->journal_replay_seq_start;
-       size_t i;
+       struct btree_trans *trans = bch2_trans_get(c);
        int ret;
 
-       move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
-       keys->gap = keys->nr;
-
-       keys_sorted = kvmalloc_array(keys->nr, sizeof(*keys_sorted), GFP_KERNEL);
-       if (!keys_sorted)
-               return -BCH_ERR_ENOMEM_journal_replay;
-
-       for (i = 0; i < keys->nr; i++)
-               keys_sorted[i] = &keys->d[i];
-
-       sort(keys_sorted, keys->nr,
-            sizeof(keys_sorted[0]),
-            journal_sort_seq_cmp, NULL);
-
        if (keys->nr) {
                ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
                                           keys->nr, start_seq, end_seq);
@@ -167,27 +159,61 @@ static int bch2_journal_replay(struct bch_fs *c)
                        goto err;
        }
 
-       for (i = 0; i < keys->nr; i++) {
-               k = keys_sorted[i];
+       /*
+        * First, attempt to replay keys in sorted order. This is more
+        * efficient, but some might fail if that would cause a journal
+        * deadlock.
+        */
+       for (size_t i = 0; i < keys->nr; i++) {
+               cond_resched();
+
+               struct journal_key *k = keys->d + i;
+
+               ret = commit_do(trans, NULL, NULL,
+                               BCH_TRANS_COMMIT_no_enospc|
+                               BCH_TRANS_COMMIT_journal_reclaim|
+                               (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
+                            bch2_journal_replay_key(trans, k));
+               BUG_ON(!ret && !k->overwritten);
+               if (ret) {
+                       ret = darray_push(&keys_sorted, k);
+                       if (ret)
+                               goto err;
+               }
+       }
 
+       /*
+        * Now, replay any remaining keys in the order in which they appear in
+        * the journal, unpinning those journal entries as we go:
+        */
+       sort(keys_sorted.data, keys_sorted.nr,
+            sizeof(keys_sorted.data[0]),
+            journal_sort_seq_cmp, NULL);
+
+       darray_for_each(keys_sorted, kp) {
                cond_resched();
 
+               struct journal_key *k = *kp;
+
                replay_now_at(j, k->journal_seq);
 
-               ret = bch2_trans_do(c, NULL, NULL,
-                                   BTREE_INSERT_LAZY_RW|
-                                   BTREE_INSERT_NOFAIL|
-                                   (!k->allocated
-                                    ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim
-                                    : 0),
+               ret = commit_do(trans, NULL, NULL,
+                               BCH_TRANS_COMMIT_no_enospc|
+                               (!k->allocated
+                                ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim
+                                : 0),
                             bch2_journal_replay_key(trans, k));
-               if (ret) {
-                       bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
-                               bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret));
+               bch_err_msg(c, ret, "while replaying key at btree %s level %u:",
+                           bch2_btree_id_str(k->btree_id), k->level);
+               if (ret)
                        goto err;
-               }
+
+               BUG_ON(!k->overwritten);
        }
 
+       bch2_trans_put(trans);
+       trans = NULL;
+
        replay_now_at(j, j->replay_journal_seq_end);
        j->replay_journal_seq = 0;
 
@@ -198,10 +224,10 @@ static int bch2_journal_replay(struct bch_fs *c)
        if (keys->nr && !ret)
                bch2_journal_log_msg(c, "journal replay finished");
 err:
-       kvfree(keys_sorted);
-
-       if (ret)
-               bch_err_fn(c, ret);
+       if (trans)
+               bch2_trans_put(trans);
+       darray_exit(&keys_sorted);
+       bch_err_fn(c, ret);
        return ret;
 }
 
@@ -468,7 +494,7 @@ err:
 noinline_for_stack
 static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
 {
-       int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
+       int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
                                __bch2_fs_upgrade_for_subvolumes(trans));
        if (ret)
                bch_err_fn(c, ret);
@@ -489,7 +515,19 @@ static int bch2_check_allocations(struct bch_fs *c)
 
 static int bch2_set_may_go_rw(struct bch_fs *c)
 {
+       struct journal_keys *keys = &c->journal_keys;
+
+       /*
+        * After we go RW, the journal keys buffer can't be modified (except for
+        * setting journal_key->overwritten: it will be accessed by multiple
+        * threads
+        */
+       move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
+       keys->gap = keys->nr;
+
        set_bit(BCH_FS_MAY_GO_RW, &c->flags);
+       if (keys->nr)
+               return bch2_fs_read_write_early(c);
        return 0;
 }