]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/journal_types.h
Disable pristine-tar option in gbp.conf, since there is no pristine-tar branch.
[bcachefs-tools-debian] / libbcachefs / journal_types.h
index 9757e3d55991ed8b29b2ca7cb9c745326ef4c6cc..011f7a0d4ebd8cd1b88c2a9c483d33138aaff592 100644 (file)
@@ -9,26 +9,39 @@
 #include "super_types.h"
 #include "fifo.h"
 
-struct journal_res;
+#define JOURNAL_BUF_BITS       2
+#define JOURNAL_BUF_NR         (1U << JOURNAL_BUF_BITS)
+#define JOURNAL_BUF_MASK       (JOURNAL_BUF_NR - 1)
 
 /*
- * We put two of these in struct journal; we used them for writes to the
- * journal that are being staged or in flight.
+ * We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to
+ * the journal that are being staged or in flight.
  */
 struct journal_buf {
+       struct closure          io;
        struct jset             *data;
 
-       BKEY_PADDED(key);
+       __BKEY_PADDED(key, BCH_REPLICAS_MAX);
+       struct bch_devs_list    devs_written;
 
        struct closure_waitlist wait;
+       u64                     last_seq;       /* copy of data->last_seq */
+       long                    expires;
+       u64                     flush_time;
 
        unsigned                buf_size;       /* size in bytes of @data */
        unsigned                sectors;        /* maximum size for current entry */
        unsigned                disk_sectors;   /* maximum size entry could have been, if
                                                   buf_size was bigger */
        unsigned                u64s_reserved;
-       /* bloom filter: */
-       unsigned long           has_inode[1024 / sizeof(unsigned long)];
+       bool                    noflush:1;      /* write has already been kicked off, and was noflush */
+       bool                    must_flush:1;   /* something wants a flush */
+       bool                    separate_flush:1;
+       bool                    need_flush_to_write_buffer:1;
+       bool                    write_started:1;
+       bool                    write_allocated:1;
+       bool                    write_done:1;
+       u8                      idx;
 };
 
 /*
@@ -36,8 +49,15 @@ struct journal_buf {
  * flushed:
  */
 
+enum journal_pin_type {
+       JOURNAL_PIN_btree,
+       JOURNAL_PIN_key_cache,
+       JOURNAL_PIN_other,
+       JOURNAL_PIN_NR,
+};
+
 struct journal_entry_pin_list {
-       struct list_head                list;
+       struct list_head                list[JOURNAL_PIN_NR];
        struct list_head                flushed;
        atomic_t                        count;
        struct bch_devs_list            devs;
@@ -45,7 +65,7 @@ struct journal_entry_pin_list {
 
 struct journal;
 struct journal_entry_pin;
-typedef void (*journal_pin_flush_fn)(struct journal *j,
+typedef int (*journal_pin_flush_fn)(struct journal *j,
                                struct journal_entry_pin *, u64);
 
 struct journal_entry_pin {
@@ -62,14 +82,6 @@ struct journal_res {
        u64                     seq;
 };
 
-/*
- * For reserving space in the journal prior to getting a reservation on a
- * particular journal entry:
- */
-struct journal_preres {
-       unsigned                u64s;
-};
-
 union journal_res_state {
        struct {
                atomic64_t      counter;
@@ -81,25 +93,12 @@ union journal_res_state {
 
        struct {
                u64             cur_entry_offset:20,
-                               idx:1,
-                               prev_buf_unwritten:1,
-                               buf0_count:21,
-                               buf1_count:21;
-       };
-};
-
-union journal_preres_state {
-       struct {
-               atomic64_t      counter;
-       };
-
-       struct {
-               u64             v;
-       };
-
-       struct {
-               u32             reserved;
-               u32             remaining;
+                               idx:2,
+                               unwritten_idx:2,
+                               buf0_count:10,
+                               buf1_count:10,
+                               buf2_count:10,
+                               buf3_count:10;
        };
 };
 
@@ -116,50 +115,97 @@ union journal_preres_state {
 #define JOURNAL_ENTRY_CLOSED_VAL       (JOURNAL_ENTRY_OFFSET_MAX - 1)
 #define JOURNAL_ENTRY_ERROR_VAL                (JOURNAL_ENTRY_OFFSET_MAX)
 
-/*
- * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
- * either because something's waiting on the write to complete or because it's
- * been dirty too long and the timer's expired.
- */
+struct journal_space {
+       /* Units of 512 bytes sectors: */
+       unsigned        next_entry; /* How big the next journal entry can be */
+       unsigned        total;
+};
 
-enum {
+enum journal_space_from {
+       journal_space_discarded,
+       journal_space_clean_ondisk,
+       journal_space_clean,
+       journal_space_total,
+       journal_space_nr,
+};
+
+enum journal_flags {
        JOURNAL_REPLAY_DONE,
        JOURNAL_STARTED,
-       JOURNAL_RECLAIM_STARTED,
-       JOURNAL_NEED_WRITE,
-       JOURNAL_MAY_GET_UNRESERVED,
+       JOURNAL_MAY_SKIP_FLUSH,
+       JOURNAL_NEED_FLUSH_WRITE,
+};
+
+/* Reasons we may fail to get a journal reservation: */
+#define JOURNAL_ERRORS()               \
+       x(ok)                           \
+       x(retry)                        \
+       x(blocked)                      \
+       x(max_in_flight)                \
+       x(journal_full)                 \
+       x(journal_pin_full)             \
+       x(journal_stuck)                \
+       x(insufficient_devices)
+
+enum journal_errors {
+#define x(n)   JOURNAL_ERR_##n,
+       JOURNAL_ERRORS()
+#undef x
+};
+
+typedef DARRAY(u64)            darray_u64;
+
+struct journal_bio {
+       struct bch_dev          *ca;
+       unsigned                buf_idx;
+
+       struct bio              bio;
 };
 
 /* Embedded in struct bch_fs */
 struct journal {
        /* Fastpath stuff up front: */
-
-       unsigned long           flags;
+       struct {
 
        union journal_res_state reservations;
+       enum bch_watermark      watermark;
+
+       } __aligned(SMP_CACHE_BYTES);
+
+       unsigned long           flags;
 
        /* Max size of current journal entry */
        unsigned                cur_entry_u64s;
        unsigned                cur_entry_sectors;
 
+       /* Reserved space in journal entry to be used just prior to write */
+       unsigned                entry_u64s_reserved;
+
+
        /*
         * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
         * insufficient devices:
         */
-       int                     cur_entry_error;
-
-       union journal_preres_state prereserved;
-
-       /* Reserved space in journal entry to be used just prior to write */
-       unsigned                entry_u64s_reserved;
+       enum journal_errors     cur_entry_error;
 
        unsigned                buf_size_want;
+       /*
+        * We may queue up some things to be journalled (log messages) before
+        * the journal has actually started - stash them here:
+        */
+       darray_u64              early_journal_entries;
 
+       /*
+        * Protects journal_buf->data, when accessing without a jorunal
+        * reservation: for synchronization between the btree write buffer code
+        * and the journal write path:
+        */
+       struct mutex            buf_lock;
        /*
         * Two journal entries -- one is currently open for new entries, the
         * other is possibly being written out.
         */
-       struct journal_buf      buf[2];
+       struct journal_buf      buf[JOURNAL_BUF_NR];
 
        spinlock_t              lock;
 
@@ -169,16 +215,16 @@ struct journal {
        /* Used when waiting because the journal was full */
        wait_queue_head_t       wait;
        struct closure_waitlist async_wait;
-       struct closure_waitlist preres_wait;
 
-       struct closure          io;
        struct delayed_work     write_work;
+       struct workqueue_struct *wq;
 
        /* Sequence number of most recent journal entry (last entry in @pin) */
        atomic64_t              seq;
 
        /* seq, last_seq from the most recent journal entry successfully written */
        u64                     seq_ondisk;
+       u64                     flushed_seq_ondisk;
        u64                     last_seq_ondisk;
        u64                     err_seq;
        u64                     last_empty_seq;
@@ -204,38 +250,51 @@ struct journal {
                struct journal_entry_pin_list *data;
        }                       pin;
 
+       struct journal_space    space[journal_space_nr];
+
        u64                     replay_journal_seq;
        u64                     replay_journal_seq_end;
 
        struct write_point      wp;
        spinlock_t              err_lock;
 
-       struct delayed_work     reclaim_work;
        struct mutex            reclaim_lock;
+       /*
+        * Used for waiting until journal reclaim has freed up space in the
+        * journal:
+        */
+       wait_queue_head_t       reclaim_wait;
+       struct task_struct      *reclaim_thread;
+       bool                    reclaim_kicked;
+       unsigned long           next_reclaim;
+       u64                     nr_direct_reclaim;
+       u64                     nr_background_reclaim;
+
        unsigned long           last_flushed;
        struct journal_entry_pin *flush_in_progress;
+       bool                    flush_in_progress_dropped;
        wait_queue_head_t       pin_flush_wait;
 
        /* protects advancing ja->discard_idx: */
        struct mutex            discard_lock;
        bool                    can_discard;
 
-       unsigned                write_delay_ms;
-       unsigned                reclaim_delay_ms;
+       unsigned long           last_flush_write;
 
-       u64                     res_get_blocked_start;
-       u64                     need_write_time;
        u64                     write_start_time;
 
-       struct time_stats       *write_time;
-       struct time_stats       *delay_time;
-       struct time_stats       *blocked_time;
+       u64                     nr_flush_writes;
+       u64                     nr_noflush_writes;
+       u64                     entry_bytes_written;
+
+       struct time_stats       *flush_write_time;
+       struct time_stats       *noflush_write_time;
        struct time_stats       *flush_seq_time;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map      res_map;
 #endif
-};
+} __aligned(SMP_CACHE_BYTES);
 
 /*
  * Embedded in struct bch_dev. First three fields refer to the array of journal
@@ -262,7 +321,7 @@ struct journal_device {
        u64                     *buckets;
 
        /* Bio for journal reads/writes to this device */
-       struct bio              *bio;
+       struct journal_bio      *bio[JOURNAL_BUF_NR];
 
        /* for bch_journal_read_device */
        struct closure          read;