+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_H
#define _BCACHEFS_H
#include <linux/bio.h>
#include <linux/closure.h>
#include <linux/kobject.h>
-#include <linux/lglock.h>
#include <linux/list.h>
+#include <linux/math64.h>
#include <linux/mutex.h>
#include <linux/percpu-refcount.h>
-#include <linux/radix-tree.h>
-#include <linux/rbtree.h>
+#include <linux/percpu-rwsem.h>
#include <linux/rhashtable.h>
#include <linux/rwsem.h>
#include <linux/seqlock.h>
#include <linux/shrinker.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <linux/zstd.h>
#include "bcachefs_format.h"
-#include "bset.h"
#include "fifo.h"
#include "opts.h"
#include "util.h"
#include <linux/dynamic_fault.h>
-#define bch2_fs_init_fault(name) \
+#define bch2_fs_init_fault(name) \
dynamic_fault("bcachefs:bch_fs_init:" name)
#define bch2_meta_read_fault(name) \
dynamic_fault("bcachefs:meta:read:" name)
printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_warn(c, fmt, ...) \
printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_warn_ratelimited(c, fmt, ...) \
+ printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err(c, fmt, ...) \
printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_err_ratelimited(c, fmt, ...) \
+ printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_verbose(c, fmt, ...) \
do { \
- if ((c)->opts.verbose_recovery) \
+ if ((c)->opts.verbose) \
bch_info(c, fmt, ##__VA_ARGS__); \
} while (0)
+#define pr_verbose_init(opts, fmt, ...) \
+do { \
+ if (opt_get(opts, verbose)) \
+ pr_info(fmt, ##__VA_ARGS__); \
+} while (0)
+
/* Parameters that are useful for debugging, but should always be compiled in: */
#define BCH_DEBUG_PARAMS_ALWAYS() \
BCH_DEBUG_PARAM(key_merging_disabled, \
BCH_DEBUG_PARAM(expensive_debug_checks, \
"Enables various runtime debugging checks that " \
"significantly affect performance") \
+ BCH_DEBUG_PARAM(debug_check_iterators, \
+ "Enables extra verification for btree iterators") \
BCH_DEBUG_PARAM(debug_check_bkeys, \
"Run bkey_debugcheck (primarily checking GC/allocation "\
"information) when iterating over keys") \
- BCH_DEBUG_PARAM(version_stress_test, \
- "Assigns random version numbers to newly written " \
- "extents, to test overlapping extent cases") \
BCH_DEBUG_PARAM(verify_btree_ondisk, \
"Reread btree nodes at various points to verify the " \
"mergesort in the read path against modifications " \
"done in memory") \
+ BCH_DEBUG_PARAM(journal_seq_verify, \
+ "Store the journal sequence number in the version " \
+ "number of every btree key, and verify that btree " \
+ "update ordering is preserved during recovery") \
+ BCH_DEBUG_PARAM(inject_invalid_keys, \
+ "Store the journal sequence number in the version " \
+ "number of every btree key, and verify that btree " \
+ "update ordering is preserved during recovery") \
+ BCH_DEBUG_PARAM(test_alloc_startup, \
+ "Force allocator startup to use the slowpath where it" \
+ "can't find enough free buckets without invalidating" \
+ "cached data") \
+ BCH_DEBUG_PARAM(force_reconstruct_read, \
+ "Force reads to use the reconstruct path, when reading" \
+ "from erasure coded extents") \
+ BCH_DEBUG_PARAM(test_restart_gc, \
+ "Test restarting mark and sweep gc when bucket gens change")
#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
#endif
-/* name, frequency_units, duration_units */
-#define BCH_TIME_STATS() \
- BCH_TIME_STAT(btree_node_mem_alloc, sec, us) \
- BCH_TIME_STAT(btree_gc, sec, ms) \
- BCH_TIME_STAT(btree_split, sec, us) \
- BCH_TIME_STAT(btree_sort, ms, us) \
- BCH_TIME_STAT(btree_read, ms, us) \
- BCH_TIME_STAT(journal_write, us, us) \
- BCH_TIME_STAT(journal_delay, ms, us) \
- BCH_TIME_STAT(journal_blocked, sec, ms) \
- BCH_TIME_STAT(journal_flush_seq, us, us)
+#define BCH_TIME_STATS() \
+ x(btree_node_mem_alloc) \
+ x(btree_node_split) \
+ x(btree_node_sort) \
+ x(btree_node_read) \
+ x(btree_gc) \
+ x(btree_lock_contended_read) \
+ x(btree_lock_contended_intent) \
+ x(btree_lock_contended_write) \
+ x(data_write) \
+ x(data_read) \
+ x(data_promote) \
+ x(journal_write) \
+ x(journal_delay) \
+ x(journal_flush_seq) \
+ x(blocked_journal) \
+ x(blocked_allocate) \
+ x(blocked_allocate_open_bucket)
+
+enum bch_time_stats {
+#define x(name) BCH_TIME_##name,
+ BCH_TIME_STATS()
+#undef x
+ BCH_TIME_STAT_NR
+};
#include "alloc_types.h"
+#include "btree_types.h"
#include "buckets_types.h"
#include "clock_types.h"
-#include "io_types.h"
+#include "ec_types.h"
#include "journal_types.h"
#include "keylist_types.h"
-#include "move_types.h"
+#include "quota_types.h"
+#include "rebalance_types.h"
+#include "replicas_types.h"
#include "super_types.h"
-/* 256k, in sectors */
-#define BTREE_NODE_SIZE_MAX 512
-
-/*
- * Number of nodes we might have to allocate in a worst case btree split
- * operation - we split all the way up to the root, then allocate a new root.
- */
-#define btree_reserve_required_nodes(depth) (((depth) + 1) * 2 + 1)
-
/* Number of nodes btree coalesce will try to coalesce at once */
#define GC_MERGE_NODES 4U
/* Maximum number of nodes we might need to allocate atomically: */
-#define BTREE_RESERVE_MAX \
- (btree_reserve_required_nodes(BTREE_MAX_DEPTH) + GC_MERGE_NODES)
+#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
/* Size of the freelist we allocate btree nodes from: */
-#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
+#define BTREE_NODE_RESERVE BTREE_RESERVE_MAX
+
+#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
struct btree;
-struct crypto_blkcipher;
-struct crypto_ahash;
enum gc_phase {
- GC_PHASE_SB_METADATA = BTREE_ID_NR + 1,
+ GC_PHASE_NOT_RUNNING,
+ GC_PHASE_START,
+ GC_PHASE_SB,
+
+ GC_PHASE_BTREE_EC,
+ GC_PHASE_BTREE_EXTENTS,
+ GC_PHASE_BTREE_INODES,
+ GC_PHASE_BTREE_DIRENTS,
+ GC_PHASE_BTREE_XATTRS,
+ GC_PHASE_BTREE_ALLOC,
+ GC_PHASE_BTREE_QUOTAS,
+ GC_PHASE_BTREE_REFLINK,
+
GC_PHASE_PENDING_DELETE,
- GC_PHASE_DONE
+ GC_PHASE_ALLOC,
};
struct gc_pos {
unsigned level;
};
-struct bch_member_cpu {
- u64 nbuckets; /* device size */
- u16 first_bucket; /* index of first bucket used */
- u16 bucket_size; /* sectors */
- u8 state;
- u8 tier;
- u8 replacement;
- u8 discard;
- u8 valid;
-};
-
-struct bch_replicas_cpu_entry {
- u8 data_type;
- u8 devs[BCH_SB_MEMBERS_MAX / 8];
-};
-
-struct bch_replicas_cpu {
- struct rcu_head rcu;
- unsigned nr;
- unsigned entry_size;
- struct bch_replicas_cpu_entry entries[];
+struct io_count {
+ u64 sectors[2][BCH_DATA_NR];
};
struct bch_dev {
struct kobject kobj;
struct percpu_ref ref;
+ struct completion ref_completion;
struct percpu_ref io_ref;
- struct completion stop_complete;
- struct completion offline_complete;
+ struct completion io_ref_completion;
struct bch_fs *fs;
uuid_le uuid;
char name[BDEVNAME_SIZE];
- struct bcache_superblock disk_sb;
+ struct bch_sb_handle disk_sb;
+ struct bch_sb *sb_read_scratch;
+ int sb_write_error;
- struct dev_group self;
+ struct bch_devs_mask self;
- /* biosets used in cloned bios for replicas and moving_gc */
+ /* biosets used in cloned bios for writing multiple replicas */
struct bio_set replica_set;
- struct task_struct *alloc_thread;
+ /*
+ * Buckets:
+ * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
+ * gc_lock, for device resize - holding any is sufficient for access:
+ * Or rcu_read_lock(), but only for ptr_stale():
+ */
+ struct bucket_array __rcu *buckets[2];
+ unsigned long *buckets_nouse;
+ struct rw_semaphore bucket_lock;
+
+ struct bch_dev_usage __percpu *usage[2];
+
+ /* Allocator: */
+ struct task_struct __rcu *alloc_thread;
/*
* free: Buckets that are ready to be used
* gens/prios, they'll be moved to the free list (and possibly discarded
* in the process)
*/
- DECLARE_FIFO(long, free)[RESERVE_NR];
- DECLARE_FIFO(long, free_inc);
- spinlock_t freelist_lock;
- unsigned nr_invalidated;
- bool alloc_thread_started;
- bool need_alloc_write;
-
- size_t fifo_last_bucket;
+ alloc_fifo free[RESERVE_NR];
+ alloc_fifo free_inc;
- /* Allocation stuff: */
+ u8 open_buckets_partial[OPEN_BUCKETS_COUNT];
+ unsigned open_buckets_partial_nr;
- /* most out of date gen in the btree */
- u8 *oldest_gens;
- struct bucket *buckets;
- unsigned short bucket_bits; /* ilog2(bucket_size) */
+ size_t fifo_last_bucket;
/* last calculated minimum prio */
- u16 min_prio[2];
+ u16 max_last_bucket_io[2];
- /*
- * Bucket book keeping. The first element is updated by GC, the
- * second contains a saved copy of the stats from the beginning
- * of GC.
- */
- struct bch_dev_usage __percpu *usage_percpu;
- struct bch_dev_usage usage_cached;
-
- atomic_long_t saturated_count;
size_t inc_gen_needs_gc;
size_t inc_gen_really_needs_gc;
- u64 allocator_journal_seq_flush;
- bool allocator_invalidating_data;
-
- alloc_heap alloc_heap;
- bucket_heap copygc_heap;
- /* Moving GC: */
- struct task_struct *moving_gc_read;
-
- struct bch_pd_controller moving_gc_pd;
+ /*
+ * XXX: this should be an enum for allocator state, so as to include
+ * error state
+ */
+ enum {
+ ALLOCATOR_STOPPED,
+ ALLOCATOR_RUNNING,
+ ALLOCATOR_BLOCKED,
+ ALLOCATOR_BLOCKED_FULL,
+ } allocator_state;
- /* Tiering: */
- struct write_point tiering_write_point;
+ alloc_heap alloc_heap;
+ /* Copying GC: */
+ struct task_struct *copygc_thread;
+ copygc_heap copygc_heap;
+ struct bch_pd_controller copygc_pd;
struct write_point copygc_write_point;
+ u64 copygc_threshold;
+
+ atomic64_t rebalance_work;
struct journal_device journal;
struct work_struct io_error_work;
/* The rest of this all shows up in sysfs */
- atomic64_t meta_sectors_written;
- atomic64_t btree_sectors_written;
- u64 __percpu *sectors_written;
+ atomic64_t cur_latency[2];
+ struct time_stats io_latency[2];
+
+#define CONGESTED_MAX 1024
+ atomic_t congested;
+ u64 congested_last;
+
+ struct io_count __percpu *io_done;
};
-/*
- * Flag bits for what phase of startup/shutdown the cache set is at, how we're
- * shutting down, etc.:
- *
- * BCH_FS_UNREGISTERING means we're not just shutting down, we're detaching
- * all the backing devices first (their cached data gets invalidated, and they
- * won't automatically reattach).
- */
enum {
+ /* startup: */
+ BCH_FS_ALLOC_READ_DONE,
+ BCH_FS_ALLOC_CLEAN,
+ BCH_FS_ALLOCATOR_STARTED,
+ BCH_FS_ALLOCATOR_RUNNING,
+ BCH_FS_ALLOCATOR_STOPPING,
BCH_FS_INITIAL_GC_DONE,
+ BCH_FS_FSCK_DONE,
+ BCH_FS_STARTED,
+ BCH_FS_RW,
+
+ /* shutdown: */
+ BCH_FS_STOPPING,
BCH_FS_EMERGENCY_RO,
BCH_FS_WRITE_DISABLE_COMPLETE,
- BCH_FS_GC_STOPPING,
- BCH_FS_GC_FAILURE,
- BCH_FS_BDEV_MOUNTED,
+
+ /* errors: */
BCH_FS_ERROR,
- BCH_FS_FSCK_FIXED_ERRORS,
- BCH_FS_FSCK_DONE,
+ BCH_FS_ERRORS_FIXED,
+
+ /* misc: */
+ BCH_FS_BDEV_MOUNTED,
BCH_FS_FIXED_GENS,
+ BCH_FS_ALLOC_WRITTEN,
BCH_FS_REBUILD_REPLICAS,
+ BCH_FS_HOLD_BTREE_WRITES,
};
struct btree_debug {
struct dentry *failed;
};
-struct bch_tier {
- unsigned idx;
- struct task_struct *migrate;
- struct bch_pd_controller pd;
+struct bch_fs_pcpu {
+ u64 sectors_available;
+};
- struct dev_group devs;
+struct journal_seq_blacklist_table {
+ size_t nr;
+ struct journal_seq_blacklist_table_entry {
+ u64 start;
+ u64 end;
+ bool dirty;
+ } entries[0];
};
-enum bch_fs_state {
- BCH_FS_STARTING = 0,
- BCH_FS_STOPPING,
- BCH_FS_RO,
- BCH_FS_RW,
+struct journal_keys {
+ struct journal_key {
+ enum btree_id btree_id:8;
+ unsigned level:8;
+ struct bkey_i *k;
+ u32 journal_seq;
+ u32 journal_offset;
+ } *d;
+ size_t nr;
+ u64 journal_seq_base;
};
struct bch_fs {
/* ro/rw, add/remove devices: */
struct mutex state_lock;
- enum bch_fs_state state;
/* Counts outstanding writes, for clean transition to read-only */
struct percpu_ref writes;
struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
- struct bch_replicas_cpu __rcu *replicas;
- struct bch_replicas_cpu __rcu *replicas_gc;
+ struct bch_replicas_cpu replicas;
+ struct bch_replicas_cpu replicas_gc;
struct mutex replicas_gc_lock;
+ struct journal_entry_res replicas_journal_res;
+
+ struct bch_disk_groups_cpu __rcu *disk_groups;
+
struct bch_opts opts;
/* Updated by bch2_sb_update():*/
uuid_le uuid;
uuid_le user_uuid;
- u16 block_size;
- u16 btree_node_size;
+ u16 version;
+ u16 encoded_extent_max;
u8 nr_devices;
u8 clean;
- u8 str_hash_type;
u8 encryption_type;
u64 time_base_lo;
u32 time_base_hi;
u32 time_precision;
+ u64 features;
+ u64 compat;
} sb;
- struct bch_sb *disk_sb;
- unsigned disk_sb_order;
+ struct bch_sb_handle disk_sb;
unsigned short block_bits; /* ilog2(block_size) */
+ u16 btree_foreground_merge_threshold;
+
struct closure sb_write;
struct mutex sb_lock;
- struct backing_dev_info bdi;
-
/* BTREE CACHE */
- struct bio_set btree_read_bio;
+ struct bio_set btree_bio;
struct btree_root btree_roots[BTREE_ID_NR];
struct mutex btree_root_lock;
- bool btree_cache_table_init_done;
- struct rhashtable btree_cache_table;
-
- /*
- * We never free a struct btree, except on shutdown - we just put it on
- * the btree_cache_freed list and reuse it later. This simplifies the
- * code, and it doesn't cost us much memory as the memory usage is
- * dominated by buffers that hold the actual btree node data and those
- * can be freed - and the number of struct btrees allocated is
- * effectively bounded.
- *
- * btree_cache_freeable effectively is a small cache - we use it because
- * high order page allocations can be rather expensive, and it's quite
- * common to delete and allocate btree nodes in quick succession. It
- * should never grow past ~2-3 nodes in practice.
- */
- struct mutex btree_cache_lock;
- struct list_head btree_cache;
- struct list_head btree_cache_freeable;
- struct list_head btree_cache_freed;
-
- /* Number of elements in btree_cache + btree_cache_freeable lists */
- unsigned btree_cache_used;
- unsigned btree_cache_reserve;
- struct shrinker btree_cache_shrink;
-
- /*
- * If we need to allocate memory for a new btree node and that
- * allocation fails, we can cannibalize another node in the btree cache
- * to satisfy the allocation - lock to guarantee only one thread does
- * this at a time:
- */
- struct closure_waitlist mca_wait;
- struct task_struct *btree_cache_alloc_lock;
-
- mempool_t btree_reserve_pool;
+ struct btree_cache btree_cache;
/*
* Cache of allocated btree nodes - if we allocate a btree node and
* when allocating btree reserves fail halfway through) - instead, we
* can stick them here:
*/
- struct btree_alloc {
- struct open_bucket *ob;
- BKEY_PADDED(k);
- } btree_reserve_cache[BTREE_NODE_RESERVE * 2];
+ struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * 2];
unsigned btree_reserve_cache_nr;
struct mutex btree_reserve_cache_lock;
mempool_t btree_interior_update_pool;
struct list_head btree_interior_update_list;
+ struct list_head btree_interior_updates_unwritten;
struct mutex btree_interior_update_lock;
+ struct closure_waitlist btree_interior_update_wait;
+
+ struct workqueue_struct *btree_interior_update_worker;
+ struct work_struct btree_interior_update_work;
+
+ /* btree_iter.c: */
+ struct mutex btree_trans_lock;
+ struct list_head btree_trans_list;
+ mempool_t btree_iters_pool;
struct workqueue_struct *wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
+ struct workqueue_struct *journal_reclaim_wq;
/* ALLOCATION */
- struct bch_pd_controller foreground_write_pd;
struct delayed_work pd_controllers_update;
unsigned pd_controllers_update_seconds;
- spinlock_t foreground_write_pd_lock;
- struct bch_write_op *write_wait_head;
- struct bch_write_op *write_wait_tail;
- struct timer_list foreground_write_wakeup;
-
- /*
- * These contain all r/w devices - i.e. devices we can currently
- * allocate from:
- */
- struct dev_group all_devs;
- struct bch_tier tiers[BCH_TIER_MAX];
- /* NULL if we only have devices in one tier: */
- struct bch_tier *fastest_tier;
+ struct bch_devs_mask rw_devs[BCH_DATA_NR];
u64 capacity; /* sectors */
* and forces them to be revalidated
*/
u32 capacity_gen;
+ unsigned bucket_size_max;
atomic64_t sectors_available;
- struct bch_fs_usage __percpu *usage_percpu;
- struct bch_fs_usage usage_cached;
- struct lglock usage_lock;
+ struct bch_fs_pcpu __percpu *pcpu;
- struct mutex bucket_lock;
+ struct percpu_rw_semaphore mark_lock;
- struct closure_waitlist freelist_wait;
+ seqcount_t usage_lock;
+ struct bch_fs_usage *usage_base;
+ struct bch_fs_usage __percpu *usage[2];
+ struct bch_fs_usage __percpu *usage_gc;
+
+ /* single element mempool: */
+ struct mutex usage_scratch_lock;
+ struct bch_fs_usage *usage_scratch;
/*
* When we invalidate buckets, we use both the priority and the amount
* those together consistently we keep track of the smallest nonzero
* priority of any bucket.
*/
- struct prio_clock prio_clock[2];
+ struct bucket_clock bucket_clock[2];
struct io_clock io_clock[2];
- /* SECTOR ALLOCATOR */
- struct list_head open_buckets_open;
- struct list_head open_buckets_free;
- unsigned open_buckets_nr_free;
+ /* JOURNAL SEQ BLACKLIST */
+ struct journal_seq_blacklist_table *
+ journal_seq_blacklist_table;
+ struct work_struct journal_seq_blacklist_gc_work;
+
+ /* ALLOCATOR */
+ spinlock_t freelist_lock;
+ struct closure_waitlist freelist_wait;
+ u64 blocked_allocate;
+ u64 blocked_allocate_open_bucket;
+ u8 open_buckets_freelist;
+ u8 open_buckets_nr_free;
struct closure_waitlist open_buckets_wait;
- spinlock_t open_buckets_lock;
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
struct write_point btree_write_point;
+ struct write_point rebalance_write_point;
- struct write_point write_points[WRITE_POINT_COUNT];
- struct write_point promote_write_point;
-
- /*
- * This write point is used for migrating data off a device
- * and can point to any other device.
- * We can't use the normal write points because those will
- * gang up n replicas, and for migration we want only one new
- * replica.
- */
- struct write_point migration_write_point;
+ struct write_point write_points[WRITE_POINT_MAX];
+ struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
+ struct mutex write_points_hash_lock;
+ unsigned write_points_nr;
/* GARBAGE COLLECTION */
struct task_struct *gc_thread;
*
* gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
*
- * gc_cur_phase == GC_PHASE_DONE indicates that gc is finished/not
- * currently running, and gc marks are currently valid
- *
* Protected by gc_pos_lock. Only written to by GC thread, so GC thread
* can read without a lock.
*/
struct bio_set bio_write;
struct mutex bio_bounce_pages_lock;
mempool_t bio_bounce_pages;
+ struct rhashtable promote_table;
- mempool_t lz4_workspace_pool;
- void *zlib_workspace;
- struct mutex zlib_workspace_lock;
mempool_t compression_bounce[2];
+ mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
+ mempool_t decompress_workspace;
+ ZSTD_parameters zstd_params;
struct crypto_shash *sha256;
- struct crypto_skcipher *chacha20;
+ struct crypto_sync_skcipher *chacha20;
struct crypto_shash *poly1305;
atomic64_t key_version;
- struct bio_list read_retry_list;
- struct work_struct read_retry_work;
- spinlock_t read_retry_lock;
+ mempool_t large_bkey_pool;
+
+ /* REBALANCE */
+ struct bch_fs_rebalance rebalance;
+
+ /* STRIPES: */
+ GENRADIX(struct stripe) stripes[2];
+ struct mutex ec_stripe_create_lock;
+
+ ec_stripes_heap ec_stripes_heap;
+ spinlock_t ec_stripes_heap_lock;
+
+ /* ERASURE CODING */
+ struct list_head ec_new_stripe_list;
+ struct mutex ec_new_stripe_lock;
+ u64 ec_stripe_hint;
+
+ struct bio_set ec_bioset;
+
+ struct work_struct ec_stripe_delete_work;
+ struct llist_head ec_stripe_delete_list;
+
+ /* REFLINK */
+ u64 reflink_hint;
+
+ /* VFS IO PATH - fs-io.c */
+ struct bio_set writepage_bioset;
+ struct bio_set dio_write_bioset;
+ struct bio_set dio_read_bioset;
+
+ struct bio_list btree_write_error_list;
+ struct work_struct btree_write_error_work;
+ spinlock_t btree_write_error_lock;
/* ERRORS */
struct list_head fsck_errors;
struct mutex fsck_error_lock;
bool fsck_alloc_err;
- /* FILESYSTEM */
- wait_queue_head_t writeback_wait;
- atomic_t writeback_pages;
- unsigned writeback_pages_max;
- atomic_long_t nr_inodes;
+ /* QUOTAS */
+ struct bch_memquota_type quotas[QTYP_NR];
/* DEBUG JUNK */
struct dentry *debug;
mempool_t btree_bounce_pool;
struct journal journal;
+ struct list_head journal_entries;
+ struct journal_keys journal_keys;
- unsigned bucket_journal_seq;
+ u64 last_bucket_seq_cleanup;
/* The rest of this all shows up in sysfs */
atomic_long_t read_realloc_races;
+ atomic_long_t extent_migrate_done;
+ atomic_long_t extent_migrate_raced;
unsigned btree_gc_periodic:1;
- unsigned foreground_write_ratelimit_enabled:1;
unsigned copy_gc_enabled:1;
- unsigned tiering_enabled:1;
- unsigned tiering_percent;
-
- /*
- * foreground writes will be throttled when the number of free
- * buckets is below this percentage
- */
- unsigned foreground_target_percent;
+ bool promote_whole_extents;
#define BCH_DEBUG_PARAM(name, description) bool name;
BCH_DEBUG_PARAMS_ALL()
#undef BCH_DEBUG_PARAM
-#define BCH_TIME_STAT(name, frequency_units, duration_units) \
- struct time_stats name##_time;
- BCH_TIME_STATS()
-#undef BCH_TIME_STAT
+ struct time_stats times[BCH_TIME_STAT_NR];
};
-static inline bool bch2_fs_running(struct bch_fs *c)
-{
- return c->state == BCH_FS_RO || c->state == BCH_FS_RW;
-}
-
-static inline unsigned bucket_pages(const struct bch_dev *ca)
+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
{
- return ca->mi.bucket_size / PAGE_SECTORS;
+#ifndef NO_BCACHEFS_FS
+ if (c->vfs_sb)
+ c->vfs_sb->s_bdi->ra_pages = ra_pages;
+#endif
}
static inline unsigned bucket_bytes(const struct bch_dev *ca)
static inline unsigned block_bytes(const struct bch_fs *c)
{
- return c->sb.block_size << 9;
+ return c->opts.block_size << 9;
+}
+
+static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
+{
+ return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
+}
+
+static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
+{
+ s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
+
+ if (c->sb.time_precision == 1)
+ return ns;
+
+ return div_s64(ns, c->sb.time_precision);
+}
+
+static inline s64 bch2_current_time(struct bch_fs *c)
+{
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
+ return timespec_to_bch2_time(c, now);
+}
+
+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
+{
+ return dev < c->sb.nr_devices && c->devs[dev];
}
#endif /* _BCACHEFS_H */