]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/journal_reclaim.c
Update bcachefs sources to e7f6215768 bcachefs: Fix snapshot_skiplist_good()
[bcachefs-tools-debian] / libbcachefs / journal_reclaim.c
index e873ce2a3f03a5e9c2ba4d4cfc2ff87c30065ad2..10e1860dad79acba08a5ff904dd92f7e3aa3f1ce 100644 (file)
@@ -2,17 +2,19 @@
 
 #include "bcachefs.h"
 #include "btree_key_cache.h"
+#include "btree_update.h"
+#include "buckets.h"
 #include "errcode.h"
 #include "error.h"
 #include "journal.h"
 #include "journal_io.h"
 #include "journal_reclaim.h"
 #include "replicas.h"
-#include "super.h"
+#include "sb-members.h"
+#include "trace.h"
 
 #include <linux/kthread.h>
 #include <linux/sched/mm.h>
-#include <trace/events/bcachefs.h>
 
 /* Free space calculations: */
 
@@ -209,24 +211,7 @@ void bch2_journal_space_available(struct journal *j)
        clean           = j->space[journal_space_clean].total;
        total           = j->space[journal_space_total].total;
 
-       if (!clean_ondisk &&
-           journal_cur_seq(j) == j->seq_ondisk) {
-               struct printbuf buf = PRINTBUF;
-
-               __bch2_journal_debug_to_text(&buf, j);
-               bch_err(c, "journal stuck\n%s", buf.buf);
-               printbuf_exit(&buf);
-
-               /*
-                * Hack: bch2_fatal_error() calls bch2_journal_halt() which
-                * takes journal lock:
-                */
-               spin_unlock(&j->lock);
-               bch2_fatal_error(c);
-               spin_lock(&j->lock);
-
-               ret = JOURNAL_ERR_journal_stuck;
-       } else if (!j->space[journal_space_discarded].next_entry)
+       if (!j->space[journal_space_discarded].next_entry)
                ret = JOURNAL_ERR_journal_full;
 
        if ((j->space[journal_space_clean_ondisk].next_entry <
@@ -287,7 +272,7 @@ void bch2_journal_do_discards(struct journal *j)
                                blkdev_issue_discard(ca->disk_sb.bdev,
                                        bucket_to_sector(ca,
                                                ja->buckets[ja->discard_idx]),
-                                       ca->mi.bucket_size, GFP_NOIO);
+                                       ca->mi.bucket_size, GFP_NOFS);
 
                        spin_lock(&j->lock);
                        ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
@@ -318,9 +303,7 @@ static void bch2_journal_reclaim_fast(struct journal *j)
         */
        while (!fifo_empty(&j->pin) &&
               !atomic_read(&fifo_peek_front(&j->pin).count)) {
-               BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
-               BUG_ON(!list_empty(&fifo_peek_front(&j->pin).flushed));
-               BUG_ON(!fifo_pop(&j->pin, temp));
+               fifo_pop(&j->pin, temp);
                popped = true;
        }
 
@@ -347,13 +330,13 @@ void bch2_journal_pin_put(struct journal *j, u64 seq)
        }
 }
 
-static inline void __journal_pin_drop(struct journal *j,
+static inline bool __journal_pin_drop(struct journal *j,
                                      struct journal_entry_pin *pin)
 {
        struct journal_entry_pin_list *pin_list;
 
        if (!journal_pin_active(pin))
-               return;
+               return false;
 
        if (j->flush_in_progress == pin)
                j->flush_in_progress_dropped = true;
@@ -363,27 +346,39 @@ static inline void __journal_pin_drop(struct journal *j,
        list_del_init(&pin->list);
 
        /*
-        * Unpinning a journal entry may make journal_next_bucket() succeed if
+        * Unpinning a journal entry may make journal_next_bucket() succeed, if
         * writing a new last_seq will now make another bucket available:
         */
-       if (atomic_dec_and_test(&pin_list->count) &&
-           pin_list == &fifo_peek_front(&j->pin))
-               bch2_journal_reclaim_fast(j);
+       return atomic_dec_and_test(&pin_list->count) &&
+               pin_list == &fifo_peek_front(&j->pin);
 }
 
 void bch2_journal_pin_drop(struct journal *j,
                           struct journal_entry_pin *pin)
 {
        spin_lock(&j->lock);
-       __journal_pin_drop(j, pin);
+       if (__journal_pin_drop(j, pin))
+               bch2_journal_reclaim_fast(j);
        spin_unlock(&j->lock);
 }
 
+static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
+{
+       if (fn == bch2_btree_node_flush0 ||
+           fn == bch2_btree_node_flush1)
+               return JOURNAL_PIN_btree;
+       else if (fn == bch2_btree_key_cache_journal_flush)
+               return JOURNAL_PIN_key_cache;
+       else
+               return JOURNAL_PIN_other;
+}
+
 void bch2_journal_pin_set(struct journal *j, u64 seq,
                          struct journal_entry_pin *pin,
                          journal_pin_flush_fn flush_fn)
 {
        struct journal_entry_pin_list *pin_list;
+       bool reclaim;
 
        spin_lock(&j->lock);
 
@@ -400,18 +395,19 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
 
        pin_list = journal_seq_pin(j, seq);
 
-       __journal_pin_drop(j, pin);
+       reclaim = __journal_pin_drop(j, pin);
 
        atomic_inc(&pin_list->count);
        pin->seq        = seq;
        pin->flush      = flush_fn;
 
-       if (flush_fn == bch2_btree_key_cache_journal_flush)
-               list_add(&pin->list, &pin_list->key_cache_list);
-       else if (flush_fn)
-               list_add(&pin->list, &pin_list->list);
+       if (flush_fn)
+               list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]);
        else
                list_add(&pin->list, &pin_list->flushed);
+
+       if (reclaim)
+               bch2_journal_reclaim_fast(j);
        spin_unlock(&j->lock);
 
        /*
@@ -442,37 +438,37 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
 
 static struct journal_entry_pin *
 journal_get_next_pin(struct journal *j,
-                    bool get_any,
-                    bool get_key_cache,
-                    u64 max_seq, u64 *seq)
+                    u64 seq_to_flush,
+                    unsigned allowed_below_seq,
+                    unsigned allowed_above_seq,
+                    u64 *seq)
 {
        struct journal_entry_pin_list *pin_list;
        struct journal_entry_pin *ret = NULL;
+       unsigned i;
 
        fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
-               if (*seq > max_seq && !get_any && !get_key_cache)
+               if (*seq > seq_to_flush && !allowed_above_seq)
                        break;
 
-               if (*seq <= max_seq || get_any) {
-                       ret = list_first_entry_or_null(&pin_list->list,
-                               struct journal_entry_pin, list);
-                       if (ret)
-                               return ret;
-               }
-
-               if (*seq <= max_seq || get_any || get_key_cache) {
-                       ret = list_first_entry_or_null(&pin_list->key_cache_list,
-                               struct journal_entry_pin, list);
-                       if (ret)
-                               return ret;
-               }
+               for (i = 0; i < JOURNAL_PIN_NR; i++)
+                       if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
+                           ((1U << i) & allowed_above_seq)) {
+                               ret = list_first_entry_or_null(&pin_list->list[i],
+                                       struct journal_entry_pin, list);
+                               if (ret)
+                                       return ret;
+                       }
        }
 
        return NULL;
 }
 
 /* returns true if we did work */
-static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
+static size_t journal_flush_pins(struct journal *j,
+                                u64 seq_to_flush,
+                                unsigned allowed_below_seq,
+                                unsigned allowed_above_seq,
                                 unsigned min_any,
                                 unsigned min_key_cache)
 {
@@ -485,15 +481,25 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
        lockdep_assert_held(&j->reclaim_lock);
 
        while (1) {
+               unsigned allowed_above = allowed_above_seq;
+               unsigned allowed_below = allowed_below_seq;
+
+               if (min_any) {
+                       allowed_above |= ~0;
+                       allowed_below |= ~0;
+               }
+
+               if (min_key_cache) {
+                       allowed_above |= 1U << JOURNAL_PIN_key_cache;
+                       allowed_below |= 1U << JOURNAL_PIN_key_cache;
+               }
+
                cond_resched();
 
                j->last_flushed = jiffies;
 
                spin_lock(&j->lock);
-               pin = journal_get_next_pin(j,
-                                          min_any != 0,
-                                          min_key_cache != 0,
-                                          seq_to_flush, &seq);
+               pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
                if (pin) {
                        BUG_ON(j->flush_in_progress);
                        j->flush_in_progress = pin;
@@ -652,6 +658,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
                                atomic_long_read(&c->btree_key_cache.nr_keys));
 
                nr_flushed = journal_flush_pins(j, seq_to_flush,
+                                               ~0, 0,
                                                min_nr, min_key_cache);
 
                if (direct)
@@ -703,7 +710,7 @@ static int bch2_journal_reclaim_thread(void *arg)
                        j->next_reclaim = now + delay;
 
                while (1) {
-                       set_current_state(TASK_INTERRUPTIBLE);
+                       set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
                        if (kthread_should_stop())
                                break;
                        if (j->reclaim_kicked)
@@ -714,9 +721,9 @@ static int bch2_journal_reclaim_thread(void *arg)
                        spin_unlock(&j->lock);
 
                        if (journal_empty)
-                               freezable_schedule();
+                               schedule();
                        else if (time_after(j->next_reclaim, jiffies))
-                               freezable_schedule_timeout(j->next_reclaim - jiffies);
+                               schedule_timeout(j->next_reclaim - jiffies);
                        else
                                break;
                }
@@ -772,7 +779,11 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
 
        mutex_lock(&j->reclaim_lock);
 
-       if (journal_flush_pins(j, seq_to_flush, 0, 0))
+       if (journal_flush_pins(j, seq_to_flush,
+                              (1U << JOURNAL_PIN_key_cache)|
+                              (1U << JOURNAL_PIN_other), 0, 0, 0) ||
+           journal_flush_pins(j, seq_to_flush,
+                              (1U << JOURNAL_PIN_btree), 0, 0, 0))
                *did_work = true;
 
        spin_lock(&j->lock);
@@ -827,8 +838,18 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
        mutex_lock(&c->replicas_gc_lock);
        bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
 
-       seq = 0;
+       /*
+        * Now that we've populated replicas_gc, write to the journal to mark
+        * active journal devices. This handles the case where the journal might
+        * be empty. Otherwise we could clear all journal replicas and
+        * temporarily put the fs into an unrecoverable state. Journal recovery
+        * expects to find devices marked for journal data on unclean mount.
+        */
+       ret = bch2_journal_meta(&c->journal);
+       if (ret)
+               goto err;
 
+       seq = 0;
        spin_lock(&j->lock);
        while (!ret) {
                struct bch_replicas_padded replicas;
@@ -845,7 +866,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
                spin_lock(&j->lock);
        }
        spin_unlock(&j->lock);
-
+err:
        ret = bch2_replicas_gc_end(c, ret);
        mutex_unlock(&c->replicas_gc_lock);