]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/bcachefs.h
Update bcachefs sources to feaca6edbd24 mean and variance: Promote to lib/math
[bcachefs-tools-debian] / libbcachefs / bcachefs.h
index 25a32fd6c8f2a594b7fca3cd8dc0eaba4d003a8f..2e9f4af3ad58072a7e199462b2c7b3dbcfebea1f 100644 (file)
 #include "fifo.h"
 #include "nocow_locking_types.h"
 #include "opts.h"
+#include "recovery_types.h"
+#include "sb-errors_types.h"
+#include "seqmutex.h"
 #include "util.h"
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 #define BCH_WRITE_REF_DEBUG
 #endif
 
+#ifndef dynamic_fault
 #define dynamic_fault(...)             0
-#define race_fault(...)                        0
+#endif
+
+#define race_fault(...)                        dynamic_fault("bcachefs:race")
 
 #define trace_and_count(_c, _name, ...)                                        \
 do {                                                                   \
@@ -287,6 +293,19 @@ do {                                                                       \
 #define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \
        printk_ratelimited(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
 
+#define bch_err_fn(_c, _ret)                                           \
+do {                                                                   \
+       if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+               bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\
+} while (0)
+
+#define bch_err_msg(_c, _ret, _msg, ...)                               \
+do {                                                                   \
+       if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+               bch_err(_c, "%s(): error " _msg " %s", __func__,        \
+                       ##__VA_ARGS__, bch2_err_str(_ret));             \
+} while (0)
+
 #define bch_verbose(c, fmt, ...)                                       \
 do {                                                                   \
        if ((c)->opts.verbose)                                          \
@@ -361,7 +380,7 @@ BCH_DEBUG_PARAMS()
 #undef BCH_DEBUG_PARAM
 
 #ifndef CONFIG_BCACHEFS_DEBUG
-#define BCH_DEBUG_PARAM(name, description) static const bool bch2_##name;
+#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name;
 BCH_DEBUG_PARAMS_DEBUG()
 #undef BCH_DEBUG_PARAM
 #endif
@@ -382,7 +401,9 @@ BCH_DEBUG_PARAMS_DEBUG()
        x(journal_flush_write)                  \
        x(journal_noflush_write)                \
        x(journal_flush_seq)                    \
-       x(blocked_journal)                      \
+       x(blocked_journal_low_on_space)         \
+       x(blocked_journal_low_on_pin)           \
+       x(blocked_journal_max_in_flight)        \
        x(blocked_allocate)                     \
        x(blocked_allocate_open_bucket)         \
        x(nocow_lock_contended)
@@ -400,6 +421,7 @@ enum bch_time_stats {
 #include "buckets_types.h"
 #include "buckets_waiting_for_journal_types.h"
 #include "clock_types.h"
+#include "disk_groups_types.h"
 #include "ec_types.h"
 #include "journal_types.h"
 #include "keylist_types.h"
@@ -442,6 +464,10 @@ enum gc_phase {
        GC_PHASE_BTREE_need_discard,
        GC_PHASE_BTREE_backpointers,
        GC_PHASE_BTREE_bucket_gens,
+       GC_PHASE_BTREE_snapshot_trees,
+       GC_PHASE_BTREE_deleted_inodes,
+       GC_PHASE_BTREE_logged_ops,
+       GC_PHASE_BTREE_rebalance_work,
 
        GC_PHASE_PENDING_DELETE,
 };
@@ -479,7 +505,9 @@ struct bch_dev {
         * Committed by bch2_write_super() -> bch_fs_mi_update()
         */
        struct bch_member_cpu   mi;
-       uuid_le                 uuid;
+       atomic64_t              errors[BCH_MEMBER_ERROR_NR];
+
+       __uuid_t                uuid;
        char                    name[BDEVNAME_SIZE];
 
        struct bch_sb_handle    disk_sb;
@@ -538,38 +566,38 @@ struct bch_dev {
        struct io_count __percpu *io_done;
 };
 
-enum {
-       /* startup: */
-       BCH_FS_STARTED,
-       BCH_FS_MAY_GO_RW,
-       BCH_FS_RW,
-       BCH_FS_WAS_RW,
-
      /* shutdown: */
-       BCH_FS_STOPPING,
-       BCH_FS_EMERGENCY_RO,
-       BCH_FS_GOING_RO,
-       BCH_FS_WRITE_DISABLE_COMPLETE,
-       BCH_FS_CLEAN_SHUTDOWN,
-
-       /* fsck passes: */
-       BCH_FS_TOPOLOGY_REPAIR_DONE,
-       BCH_FS_INITIAL_GC_DONE,         /* kill when we enumerate fsck passes */
-       BCH_FS_CHECK_ALLOC_DONE,
-       BCH_FS_CHECK_LRUS_DONE,
-       BCH_FS_CHECK_BACKPOINTERS_DONE,
-       BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
-       BCH_FS_FSCK_DONE,
-       BCH_FS_INITIAL_GC_UNFIXED,      /* kill when we enumerate fsck errors */
-       BCH_FS_NEED_ANOTHER_GC,
-
-       BCH_FS_HAVE_DELETED_SNAPSHOTS,
-
-       /* errors: */
-       BCH_FS_ERROR,
-       BCH_FS_TOPOLOGY_ERROR,
-       BCH_FS_ERRORS_FIXED,
-       BCH_FS_ERRORS_NOT_FIXED,
+/*
+ * fsck_done - kill?
+ *
+ * replace with something more general from enumated fsck passes/errors:
+ * initial_gc_unfixed
+ * error
+ * topology error
+ */
+
+#define BCH_FS_FLAGS()                 \
+       x(started)                      \
+       x(may_go_rw)                    \
+       x(rw)                           \
+       x(was_rw)                       \
+       x(stopping)                     \
+       x(emergency_ro)                 \
+       x(going_ro)                     \
+       x(write_disable_complete)       \
+       x(clean_shutdown)               \
+       x(fsck_done)                    \
+       x(initial_gc_unfixed)           \
+       x(need_another_gc)              \
+       x(need_delete_dead_snapshots)   \
+       x(error)                        \
+       x(topology_error)               \
+       x(errors_fixed)                 \
+       x(errors_not_fixed)
+
+enum bch_fs_flags {
+#define x(n)           BCH_FS_##n,
+       BCH_FS_FLAGS()
+#undef x
 };
 
 struct btree_debug {
@@ -597,7 +625,7 @@ struct journal_seq_blacklist_table {
                u64             start;
                u64             end;
                bool            dirty;
-       }                       entries[0];
+       }                       entries[];
 };
 
 struct journal_keys {
@@ -618,26 +646,16 @@ struct journal_keys {
        size_t                  gap;
        size_t                  nr;
        size_t                  size;
+       atomic_t                ref;
+       bool                    initial_ref_held;
 };
 
-struct btree_path_buf {
-       struct btree_path       *path;
+struct btree_trans_buf {
+       struct btree_trans      *trans;
 };
 
 #define REPLICAS_DELTA_LIST_MAX        (1U << 16)
 
-struct snapshot_t {
-       u32                     parent;
-       u32                     children[2];
-       u32                     subvol; /* Nonzero only if a subvolume points to this node: */
-       u32                     equiv;
-};
-
-typedef struct {
-       u32             subvol;
-       u64             inum;
-} subvol_inum;
-
 #define BCACHEFS_ROOT_SUBVOL_INUM                                      \
        ((subvol_inum) { BCACHEFS_ROOT_SUBVOL,  BCACHEFS_ROOT_INO })
 
@@ -652,10 +670,10 @@ typedef struct {
        x(fallocate)                                                    \
        x(discard)                                                      \
        x(invalidate)                                                   \
-       x(move)                                                         \
        x(delete_dead_snapshots)                                        \
        x(snapshot_delete_pagecache)                                    \
-       x(sysfs)
+       x(sysfs)                                                        \
+       x(btree_write_buffer)
 
 enum bch_write_ref {
 #define x(n) BCH_WRITE_REF_##n,
@@ -710,11 +728,12 @@ struct bch_fs {
 
        /* Updated by bch2_sb_update():*/
        struct {
-               uuid_le         uuid;
-               uuid_le         user_uuid;
+               __uuid_t        uuid;
+               __uuid_t        user_uuid;
 
                u16             version;
                u16             version_min;
+               u16             version_upgrade_complete;
 
                u8              nr_devices;
                u8              clean;
@@ -740,9 +759,11 @@ struct bch_fs {
        struct mutex            sb_lock;
 
        /* snapshot.c: */
-       GENRADIX(struct snapshot_t) snapshots;
-       struct bch_snapshot_table __rcu *snapshot_table;
+       struct snapshot_table __rcu *snapshots;
+       size_t                  snapshot_table_size;
        struct mutex            snapshot_table_lock;
+       struct rw_semaphore     snapshot_create_lock;
+
        struct work_struct      snapshot_delete_work;
        struct work_struct      snapshot_wait_for_pagecache_and_delete_work;
        snapshot_id_list        snapshots_unlinked;
@@ -752,7 +773,8 @@ struct bch_fs {
        struct bio_set          btree_bio;
        struct workqueue_struct *io_complete_wq;
 
-       struct btree_root       btree_roots[BTREE_ID_NR];
+       struct btree_root       btree_roots_known[BTREE_ID_NR];
+       DARRAY(struct btree_root) btree_roots_extra;
        struct mutex            btree_root_lock;
 
        struct btree_cache      btree_cache;
@@ -788,11 +810,11 @@ struct bch_fs {
        }                       btree_write_stats[BTREE_WRITE_TYPE_NR];
 
        /* btree_iter.c: */
-       struct mutex            btree_trans_lock;
+       struct seqmutex         btree_trans_lock;
        struct list_head        btree_trans_list;
-       mempool_t               btree_paths_pool;
+       mempool_t               btree_trans_pool;
        mempool_t               btree_trans_mem_pool;
-       struct btree_path_buf  __percpu *btree_paths_bufs;
+       struct btree_trans_buf  __percpu        *btree_trans_bufs;
 
        struct srcu_struct      btree_trans_barrier;
        bool                    btree_trans_barrier_initialized;
@@ -806,6 +828,12 @@ struct bch_fs {
        struct workqueue_struct *btree_io_complete_wq;
        /* copygc needs its own workqueue for index updates.. */
        struct workqueue_struct *copygc_wq;
+       /*
+        * Use a dedicated wq for write ref holder tasks. Required to avoid
+        * dependency problems with other wq tasks that can block on ref
+        * draining, such as read-only transition.
+        */
+       struct workqueue_struct *write_ref_wq;
 
        /* ALLOCATION */
        struct bch_devs_mask    rw_devs[BCH_DATA_NR];
@@ -912,7 +940,7 @@ struct bch_fs {
        mempool_t               compression_bounce[2];
        mempool_t               compress_workspace[BCH_COMPRESSION_TYPE_NR];
        mempool_t               decompress_workspace;
-       ZSTD_parameters         zstd_params;
+       size_t                  zstd_workspace_size;
 
        struct crypto_shash     *sha256;
        struct crypto_sync_skcipher *chacha20;
@@ -922,20 +950,21 @@ struct bch_fs {
 
        mempool_t               large_bkey_pool;
 
+       /* MOVE.C */
+       struct list_head        moving_context_list;
+       struct mutex            moving_context_lock;
+
        /* REBALANCE */
        struct bch_fs_rebalance rebalance;
 
        /* COPYGC */
        struct task_struct      *copygc_thread;
        struct write_point      copygc_write_point;
+       s64                     copygc_wait_at;
        s64                     copygc_wait;
        bool                    copygc_running;
        wait_queue_head_t       copygc_running_wq;
 
-       /* DATA PROGRESS STATS */
-       struct list_head        data_progress_list;
-       struct mutex            data_progress_lock;
-
        /* STRIPES: */
        GENRADIX(struct stripe) stripes;
        GENRADIX(struct gc_stripe) gc_stripes;
@@ -952,34 +981,40 @@ struct bch_fs {
 
        struct list_head        ec_stripe_new_list;
        struct mutex            ec_stripe_new_lock;
+       wait_queue_head_t       ec_stripe_new_wait;
 
        struct work_struct      ec_stripe_create_work;
        u64                     ec_stripe_hint;
 
-       struct bio_set          ec_bioset;
-
        struct work_struct      ec_stripe_delete_work;
-       struct llist_head       ec_stripe_delete_list;
+
+       struct bio_set          ec_bioset;
 
        /* REFLINK */
-       u64                     reflink_hint;
        reflink_gc_table        reflink_gc_table;
        size_t                  reflink_gc_nr;
 
+       /* fs.c */
+       struct list_head        vfs_inodes_list;
+       struct mutex            vfs_inodes_lock;
+
        /* VFS IO PATH - fs-io.c */
        struct bio_set          writepage_bioset;
        struct bio_set          dio_write_bioset;
        struct bio_set          dio_read_bioset;
        struct bio_set          nocow_flush_bioset;
 
-       /* ERRORS */
-       struct list_head        fsck_errors;
-       struct mutex            fsck_error_lock;
-       bool                    fsck_alloc_err;
-
        /* QUOTAS */
        struct bch_memquota_type quotas[QTYP_NR];
 
+       /* RECOVERY */
+       u64                     journal_replay_seq_start;
+       u64                     journal_replay_seq_end;
+       enum bch_recovery_pass  curr_recovery_pass;
+       /* bitmap of explicitly enabled recovery passes: */
+       u64                     recovery_passes_explicit;
+       u64                     recovery_passes_complete;
+
        /* DEBUG JUNK */
        struct dentry           *fs_debug_dir;
        struct dentry           *btree_debug_dir;
@@ -1017,6 +1052,14 @@ struct bch_fs {
        struct bch2_time_stats  times[BCH_TIME_STAT_NR];
 
        struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
+
+       /* ERRORS */
+       struct list_head        fsck_error_msgs;
+       struct mutex            fsck_error_msgs_lock;
+       bool                    fsck_alloc_msgs_err;
+
+       bch_sb_errors_cpu       fsck_error_counts;
+       struct mutex            fsck_error_counts_lock;
 };
 
 extern struct wait_queue_head bch2_read_only_wait;
@@ -1033,7 +1076,7 @@ static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
 static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
 {
 #ifdef BCH_WRITE_REF_DEBUG
-       return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
+       return !test_bit(BCH_FS_going_ro, &c->flags) &&
                atomic_long_inc_not_zero(&c->writes[ref]);
 #else
        return percpu_ref_tryget_live(&c->writes);
@@ -1052,7 +1095,7 @@ static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
                if (atomic_long_read(&c->writes[i]))
                        return;
 
-       set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+       set_bit(BCH_FS_write_disable_complete, &c->flags);
        wake_up(&bch2_read_only_wait);
 #else
        percpu_ref_put(&c->writes);