]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/journal_types.h
Disable pristine-tar option in gbp.conf, since there is no pristine-tar branch.
[bcachefs-tools-debian] / libbcachefs / journal_types.h
index 8772e53fb64cf1251c2e19b2a7190ed41017290d..011f7a0d4ebd8cd1b88c2a9c483d33138aaff592 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _BCACHEFS_JOURNAL_TYPES_H
 #define _BCACHEFS_JOURNAL_TYPES_H
 
@@ -8,26 +9,39 @@
 #include "super_types.h"
 #include "fifo.h"
 
-struct journal_res;
+#define JOURNAL_BUF_BITS       2
+#define JOURNAL_BUF_NR         (1U << JOURNAL_BUF_BITS)
+#define JOURNAL_BUF_MASK       (JOURNAL_BUF_NR - 1)
 
 /*
- * We put two of these in struct journal; we used them for writes to the
- * journal that are being staged or in flight.
+ * We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to
+ * the journal that are being staged or in flight.
  */
 struct journal_buf {
+       struct closure          io;
        struct jset             *data;
 
-       BKEY_PADDED(key);
+       __BKEY_PADDED(key, BCH_REPLICAS_MAX);
+       struct bch_devs_list    devs_written;
 
        struct closure_waitlist wait;
+       u64                     last_seq;       /* copy of data->last_seq */
+       long                    expires;
+       u64                     flush_time;
 
        unsigned                buf_size;       /* size in bytes of @data */
        unsigned                sectors;        /* maximum size for current entry */
        unsigned                disk_sectors;   /* maximum size entry could have been, if
                                                   buf_size was bigger */
        unsigned                u64s_reserved;
-       /* bloom filter: */
-       unsigned long           has_inode[1024 / sizeof(unsigned long)];
+       bool                    noflush:1;      /* write has already been kicked off, and was noflush */
+       bool                    must_flush:1;   /* something wants a flush */
+       bool                    separate_flush:1;
+       bool                    need_flush_to_write_buffer:1;
+       bool                    write_started:1;
+       bool                    write_allocated:1;
+       bool                    write_done:1;
+       u8                      idx;
 };
 
 /*
@@ -35,8 +49,15 @@ struct journal_buf {
  * flushed:
  */
 
+enum journal_pin_type {
+       JOURNAL_PIN_btree,
+       JOURNAL_PIN_key_cache,
+       JOURNAL_PIN_other,
+       JOURNAL_PIN_NR,
+};
+
 struct journal_entry_pin_list {
-       struct list_head                list;
+       struct list_head                list[JOURNAL_PIN_NR];
        struct list_head                flushed;
        atomic_t                        count;
        struct bch_devs_list            devs;
@@ -44,7 +65,7 @@ struct journal_entry_pin_list {
 
 struct journal;
 struct journal_entry_pin;
-typedef void (*journal_pin_flush_fn)(struct journal *j,
+typedef int (*journal_pin_flush_fn)(struct journal *j,
                                struct journal_entry_pin *, u64);
 
 struct journal_entry_pin {
@@ -53,24 +74,6 @@ struct journal_entry_pin {
        u64                             seq;
 };
 
-/* corresponds to a btree node with a blacklisted bset: */
-struct blacklisted_node {
-       __le64                  seq;
-       enum btree_id           btree_id;
-       struct bpos             pos;
-};
-
-struct journal_seq_blacklist {
-       struct list_head        list;
-       u64                     start;
-       u64                     end;
-
-       struct journal_entry_pin pin;
-
-       struct blacklisted_node *entries;
-       size_t                  nr_entries;
-};
-
 struct journal_res {
        bool                    ref;
        u8                      idx;
@@ -90,10 +93,12 @@ union journal_res_state {
 
        struct {
                u64             cur_entry_offset:20,
-                               idx:1,
-                               prev_buf_unwritten:1,
-                               buf0_count:21,
-                               buf1_count:21;
+                               idx:2,
+                               unwritten_idx:2,
+                               buf0_count:10,
+                               buf1_count:10,
+                               buf2_count:10,
+                               buf3_count:10;
        };
 };
 
@@ -110,47 +115,97 @@ union journal_res_state {
 #define JOURNAL_ENTRY_CLOSED_VAL       (JOURNAL_ENTRY_OFFSET_MAX - 1)
 #define JOURNAL_ENTRY_ERROR_VAL                (JOURNAL_ENTRY_OFFSET_MAX)
 
-/*
- * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
- * either because something's waiting on the write to complete or because it's
- * been dirty too long and the timer's expired.
- */
+struct journal_space {
+       /* Units of 512 bytes sectors: */
+       unsigned        next_entry; /* How big the next journal entry can be */
+       unsigned        total;
+};
 
-enum {
+enum journal_space_from {
+       journal_space_discarded,
+       journal_space_clean_ondisk,
+       journal_space_clean,
+       journal_space_total,
+       journal_space_nr,
+};
+
+enum journal_flags {
        JOURNAL_REPLAY_DONE,
        JOURNAL_STARTED,
-       JOURNAL_NEED_WRITE,
-       JOURNAL_NOT_EMPTY,
+       JOURNAL_MAY_SKIP_FLUSH,
+       JOURNAL_NEED_FLUSH_WRITE,
+};
+
+/* Reasons we may fail to get a journal reservation: */
+#define JOURNAL_ERRORS()               \
+       x(ok)                           \
+       x(retry)                        \
+       x(blocked)                      \
+       x(max_in_flight)                \
+       x(journal_full)                 \
+       x(journal_pin_full)             \
+       x(journal_stuck)                \
+       x(insufficient_devices)
+
+enum journal_errors {
+#define x(n)   JOURNAL_ERR_##n,
+       JOURNAL_ERRORS()
+#undef x
+};
+
+typedef DARRAY(u64)            darray_u64;
+
+struct journal_bio {
+       struct bch_dev          *ca;
+       unsigned                buf_idx;
+
+       struct bio              bio;
 };
 
 /* Embedded in struct bch_fs */
 struct journal {
        /* Fastpath stuff up front: */
-
-       unsigned long           flags;
+       struct {
 
        union journal_res_state reservations;
+       enum bch_watermark      watermark;
+
+       } __aligned(SMP_CACHE_BYTES);
+
+       unsigned long           flags;
 
        /* Max size of current journal entry */
        unsigned                cur_entry_u64s;
        unsigned                cur_entry_sectors;
 
+       /* Reserved space in journal entry to be used just prior to write */
+       unsigned                entry_u64s_reserved;
+
+
        /*
         * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
         * insufficient devices:
         */
-       int                     cur_entry_error;
-
-       /* Reserved space in journal entry to be used just prior to write */
-       unsigned                entry_u64s_reserved;
+       enum journal_errors     cur_entry_error;
 
        unsigned                buf_size_want;
+       /*
+        * We may queue up some things to be journalled (log messages) before
+        * the journal has actually started - stash them here:
+        */
+       darray_u64              early_journal_entries;
 
+       /*
+        * Protects journal_buf->data, when accessing without a jorunal
+        * reservation: for synchronization between the btree write buffer code
+        * and the journal write path:
+        */
+       struct mutex            buf_lock;
        /*
         * Two journal entries -- one is currently open for new entries, the
         * other is possibly being written out.
         */
-       struct journal_buf      buf[2];
+       struct journal_buf      buf[JOURNAL_BUF_NR];
 
        spinlock_t              lock;
 
@@ -161,15 +216,18 @@ struct journal {
        wait_queue_head_t       wait;
        struct closure_waitlist async_wait;
 
-       struct closure          io;
        struct delayed_work     write_work;
+       struct workqueue_struct *wq;
 
        /* Sequence number of most recent journal entry (last entry in @pin) */
        atomic64_t              seq;
 
        /* seq, last_seq from the most recent journal entry successfully written */
        u64                     seq_ondisk;
+       u64                     flushed_seq_ondisk;
        u64                     last_seq_ondisk;
+       u64                     err_seq;
+       u64                     last_empty_seq;
 
        /*
         * FIFO of journal entries whose btree updates have not yet been
@@ -192,39 +250,51 @@ struct journal {
                struct journal_entry_pin_list *data;
        }                       pin;
 
-       struct journal_entry_pin *flush_in_progress;
-       wait_queue_head_t       pin_flush_wait;
+       struct journal_space    space[journal_space_nr];
 
        u64                     replay_journal_seq;
-
-       struct mutex            blacklist_lock;
-       struct list_head        seq_blacklist;
-       struct journal_seq_blacklist *new_blacklist;
+       u64                     replay_journal_seq_end;
 
        struct write_point      wp;
        spinlock_t              err_lock;
 
-       struct delayed_work     reclaim_work;
+       struct mutex            reclaim_lock;
+       /*
+        * Used for waiting until journal reclaim has freed up space in the
+        * journal:
+        */
+       wait_queue_head_t       reclaim_wait;
+       struct task_struct      *reclaim_thread;
+       bool                    reclaim_kicked;
+       unsigned long           next_reclaim;
+       u64                     nr_direct_reclaim;
+       u64                     nr_background_reclaim;
+
        unsigned long           last_flushed;
+       struct journal_entry_pin *flush_in_progress;
+       bool                    flush_in_progress_dropped;
+       wait_queue_head_t       pin_flush_wait;
 
-       /* protects advancing ja->last_idx: */
-       struct mutex            reclaim_lock;
-       unsigned                write_delay_ms;
-       unsigned                reclaim_delay_ms;
+       /* protects advancing ja->discard_idx: */
+       struct mutex            discard_lock;
+       bool                    can_discard;
+
+       unsigned long           last_flush_write;
 
-       u64                     res_get_blocked_start;
-       u64                     need_write_time;
        u64                     write_start_time;
 
-       struct time_stats       *write_time;
-       struct time_stats       *delay_time;
-       struct time_stats       *blocked_time;
+       u64                     nr_flush_writes;
+       u64                     nr_noflush_writes;
+       u64                     entry_bytes_written;
+
+       struct time_stats       *flush_write_time;
+       struct time_stats       *noflush_write_time;
        struct time_stats       *flush_seq_time;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map      res_map;
 #endif
-};
+} __aligned(SMP_CACHE_BYTES);
 
 /*
  * Embedded in struct bch_dev. First three fields refer to the array of journal
@@ -239,21 +309,19 @@ struct journal_device {
 
        unsigned                sectors_free;
 
-       /* Journal bucket we're currently writing to */
-       unsigned                cur_idx;
-
-       /* Last journal bucket that still contains an open journal entry */
-
        /*
-        * j->lock and j->reclaim_lock must both be held to modify, j->lock
-        * sufficient to read:
+        * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
         */
-       unsigned                last_idx;
+       unsigned                discard_idx;            /* Next bucket to discard */
+       unsigned                dirty_idx_ondisk;
+       unsigned                dirty_idx;
+       unsigned                cur_idx;                /* Journal bucket we're currently writing to */
        unsigned                nr;
+
        u64                     *buckets;
 
        /* Bio for journal reads/writes to this device */
-       struct bio              *bio;
+       struct journal_bio      *bio[JOURNAL_BUF_NR];
 
        /* for bch_journal_read_device */
        struct closure          read;