From: Kent Overstreet Date: Sat, 4 Mar 2017 06:09:52 +0000 (-0900) Subject: Delete more unused shim code, update bcache code X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=171ee48e57be78f4e95954c99851553fa523bf91;p=bcachefs-tools-debian Delete more unused shim code, update bcache code --- diff --git a/.bcache_revision b/.bcache_revision index 8fb728e..58bdf2d 100644 --- a/.bcache_revision +++ b/.bcache_revision @@ -1 +1 @@ -BCACHE_REVISION=561f3067172cbfc63a680cfb670d558724441123 +BCACHE_REVISION=aa4471ac314a1f117957f9fc59c1bfbdf965a28c diff --git a/Makefile b/Makefile index bc0402c..2defed0 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,12 @@ CFLAGS+=-std=gnu99 -O2 -g -MMD -Wall \ -D_GNU_SOURCE \ -D_LGPL_SOURCE \ -DRCU_MEMBARRIER \ + -DNO_BCACHE_ACCOUNTING \ + -DNO_BCACHE_BLOCKDEV \ + -DNO_BCACHE_CHARDEV \ + -DNO_BCACHE_FS \ + -DNO_BCACHE_NOTIFY \ + -DNO_BCACHE_WRITEBACK \ $(EXTRA_CFLAGS) LDFLAGS+=-O2 -g @@ -20,7 +26,7 @@ else LDFLAGS+=-flto endif -PKGCONFIG_LIBS="blkid uuid liburcu libsodium" +PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib" CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}` LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}` \ -lm -lpthread -lrt -lscrypt -lkeyutils diff --git a/bcache-userspace-shim.c b/bcache-userspace-shim.c index 8634d8f..021664f 100644 --- a/bcache-userspace-shim.c +++ b/bcache-userspace-shim.c @@ -5,56 +5,6 @@ #include "libbcache.h" #include "tools-util.h" -/* stub out the bcache code we aren't building: */ - -struct block_device; -struct bcache_superblock; -struct cache; -struct cache_accounting; -struct cache_set; -struct closure; -struct file; -struct kobject; - -struct kmem_cache *bch_search_cache; - -const char *bch_backing_dev_register(struct bcache_superblock *sb) -{ - return "not implemented"; -} -void bch_blockdevs_stop(struct cache_set *c) {} -int bch_blockdev_volumes_start(struct cache_set *c) { return 0; } -void bch_attach_backing_devs(struct cache_set *c) {} -bool bch_is_open_backing_dev(struct block_device *bdev) { return false; } -void bch_blockdev_exit(void) {} -int bch_blockdev_init(void) { return 0; } - -void bch_fs_exit(void) {} -int bch_fs_init(void) { return 0; } - -const struct file_operations bch_chardev_fops; - -void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, - u64 offset, int nr_sectors) {} -void bch_writeback_recalc_oldest_gens(struct cache_set *c) {} - -void bch_notify_cache_set_read_write(struct cache_set *c) {} -void bch_notify_cache_set_read_only(struct cache_set *c) {} -void bch_notify_cache_set_stopped(struct cache_set *c) {} -void bch_notify_cache_read_write(struct cache *c) {} -void bch_notify_cache_read_only(struct cache *c) {} -void bch_notify_cache_added(struct cache *c) {} -void bch_notify_cache_removing(struct cache *c) {} -void bch_notify_cache_removed(struct cache *c) {} -void bch_notify_cache_remove_failed(struct cache *c) {} -void bch_notify_cache_error(struct cache *c, bool b) {} - -int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, - struct kobject *parent) { return 0; } -void bch_cache_accounting_destroy(struct cache_accounting *acc) {} -void bch_cache_accounting_init(struct cache_accounting *acc, - struct closure *parent) {} - #define bch_fmt(_c, fmt) fmt "\n" enum fsck_err_opts fsck_err_opt; @@ -155,14 +105,14 @@ enum fsck_err_opts fsck_err_opt; #define SHIM_KTYPE(type) \ struct kobj_type type ## _ktype = { .release = type ## _release, } -static void bch_cache_set_internal_release(struct kobject *k) {} +static void bch_fs_internal_release(struct kobject *k) {} -static void bch_cache_set_opts_dir_release(struct kobject *k) {} +static void bch_fs_opts_dir_release(struct kobject *k) {} -static void bch_cache_set_time_stats_release(struct kobject *k) {} +static void bch_fs_time_stats_release(struct kobject *k) {} -SHIM_KTYPE(bch_cache); -SHIM_KTYPE(bch_cache_set); -SHIM_KTYPE(bch_cache_set_internal); -SHIM_KTYPE(bch_cache_set_time_stats); -SHIM_KTYPE(bch_cache_set_opts_dir); +SHIM_KTYPE(bch_dev); +SHIM_KTYPE(bch_fs); +SHIM_KTYPE(bch_fs_internal); +SHIM_KTYPE(bch_fs_time_stats); +SHIM_KTYPE(bch_fs_opts_dir); diff --git a/cmd_debug.c b/cmd_debug.c index df23ae1..4f2586d 100644 --- a/cmd_debug.c +++ b/cmd_debug.c @@ -84,8 +84,7 @@ static void dump_one_device(struct cache_set *c, struct cache *ca, int fd) int cmd_dump(int argc, char *argv[]) { - DECLARE_COMPLETION_ONSTACK(shutdown); - struct cache_set_opts opts = cache_set_opts_empty(); + struct bch_opts opts = bch_opts_empty(); struct cache_set *c = NULL; const char *err; char *out = NULL, *buf; @@ -120,7 +119,7 @@ int cmd_dump(int argc, char *argv[]) buf = alloca(strlen(out) + 10); strcpy(buf, out); - err = bch_register_cache_set(argv + optind, argc - optind, opts, &c); + err = bch_fs_open(argv + optind, argc - optind, opts, &c); if (err) die("error opening %s: %s", argv[optind], err); @@ -154,10 +153,7 @@ int cmd_dump(int argc, char *argv[]) up_read(&c->gc_lock); - c->stop_completion = &shutdown; - bch_cache_set_stop(c); - closure_put(&c->cl); - wait_for_completion(&shutdown); + bch_fs_stop_sync(c); return 0; } @@ -227,8 +223,7 @@ static void list_keys_usage(void) int cmd_list(int argc, char *argv[]) { - DECLARE_COMPLETION_ONSTACK(shutdown); - struct cache_set_opts opts = cache_set_opts_empty(); + struct bch_opts opts = bch_opts_empty(); struct cache_set *c = NULL; enum btree_id btree_id = BTREE_ID_EXTENTS; struct bpos start = POS_MIN, end = POS_MAX; @@ -265,7 +260,7 @@ int cmd_list(int argc, char *argv[]) if (optind >= argc) die("Please supply device(s) to check"); - err = bch_register_cache_set(argv + optind, argc - optind, opts, &c); + err = bch_fs_open(argv + optind, argc - optind, opts, &c); if (err) die("error opening %s: %s", argv[optind], err); @@ -280,9 +275,6 @@ int cmd_list(int argc, char *argv[]) die("Invalid mode"); } - c->stop_completion = &shutdown; - bch_cache_set_stop(c); - closure_put(&c->cl); - wait_for_completion(&shutdown); + bch_fs_stop_sync(c); return 0; } diff --git a/cmd_fsck.c b/cmd_fsck.c index 1b7ebbf..a8c8dc5 100644 --- a/cmd_fsck.c +++ b/cmd_fsck.c @@ -21,8 +21,7 @@ static void usage(void) int cmd_fsck(int argc, char *argv[]) { - DECLARE_COMPLETION_ONSTACK(shutdown); - struct cache_set_opts opts = cache_set_opts_empty(); + struct bch_opts opts = bch_opts_empty(); struct cache_set *c = NULL; const char *err; int opt; @@ -53,16 +52,10 @@ int cmd_fsck(int argc, char *argv[]) if (optind >= argc) die("Please supply device(s) to check"); - err = bch_register_cache_set(argv + optind, argc - optind, opts, &c); + err = bch_fs_open(argv + optind, argc - optind, opts, &c); if (err) die("error opening %s: %s", argv[optind], err); - c->stop_completion = &shutdown; - bch_cache_set_stop(c); - closure_put(&c->cl); - - /* Killable? */ - wait_for_completion(&shutdown); - + bch_fs_stop_sync(c); return 0; } diff --git a/debian/control b/debian/control index 9bbc6f8..1886dfc 100644 --- a/debian/control +++ b/debian/control @@ -5,7 +5,7 @@ Section: utils Priority: optional Standards-Version: 3.9.5 Build-Depends: debhelper (>= 9), pkg-config, libblkid-dev, uuid-dev, - libscrypt-dev, libsodium-dev, libkeyutils-dev, liburcu-dev + libscrypt-dev, libsodium-dev, libkeyutils-dev, liburcu-dev, zlib1g-dev Vcs-Browser: http://anonscm.debian.org/gitweb/?p=collab-maint/bcache-tools.git Vcs-Git: git://anonscm.debian.org/collab-maint/bcache-tools.git Homepage: http://bcache.evilpiepirate.org/ diff --git a/include/linux/bcache.h b/include/linux/bcache.h index 4179f8d..dbb0274 100644 --- a/include/linux/bcache.h +++ b/include/linux/bcache.h @@ -1021,80 +1021,6 @@ enum bch_compression_opts { BCH_COMPRESSION_NR = 3, }; -/** - * BCH_OPT(name, choices, min, max, sb_option, sysfs_writeable) - * - * @name - name of mount option, sysfs attribute, and struct cache_set_opts - * member - * - * @choices - array of strings that the user can select from - option is by - * array index - * - * Booleans are special cased; if @choices is bch_bool_opt the mount - * options name and noname will work as expected. - * - * @min, @max - * - * @sb_option - name of corresponding superblock option - * - * @sysfs_writeable - if true, option will be modifiable at runtime via sysfs - */ - -#define BCH_SB_OPTS() \ - BCH_OPT(errors, \ - bch_error_actions, \ - 0, BCH_NR_ERROR_ACTIONS, \ - BCH_SB_ERROR_ACTION, \ - true) \ - BCH_OPT(metadata_replicas, \ - bch_uint_opt, \ - 0, BCH_REPLICAS_MAX, \ - BCH_SB_META_REPLICAS_WANT, \ - false) \ - BCH_OPT(data_replicas, \ - bch_uint_opt, \ - 0, BCH_REPLICAS_MAX, \ - BCH_SB_DATA_REPLICAS_WANT, \ - false) \ - BCH_OPT(metadata_checksum, \ - bch_csum_types, \ - 0, BCH_CSUM_OPT_NR, \ - BCH_SB_META_CSUM_TYPE, \ - true) \ - BCH_OPT(data_checksum, \ - bch_csum_types, \ - 0, BCH_CSUM_OPT_NR, \ - BCH_SB_DATA_CSUM_TYPE, \ - true) \ - BCH_OPT(compression, \ - bch_compression_types, \ - 0, BCH_COMPRESSION_NR, \ - BCH_SB_COMPRESSION_TYPE, \ - true) \ - BCH_OPT(str_hash, \ - bch_str_hash_types, \ - 0, BCH_STR_HASH_NR, \ - BCH_SB_STR_HASH_TYPE, \ - true) \ - BCH_OPT(inodes_32bit, \ - bch_bool_opt, 0, 2, \ - BCH_SB_INODE_32BIT, \ - true) \ - BCH_OPT(gc_reserve_percent, \ - bch_uint_opt, \ - 5, 21, \ - BCH_SB_GC_RESERVE, \ - false) \ - BCH_OPT(root_reserve_percent, \ - bch_uint_opt, \ - 0, 100, \ - BCH_SB_ROOT_RESERVE, \ - false) \ - BCH_OPT(wide_macs, \ - bch_bool_opt, 0, 2, \ - BCH_SB_128_BIT_MACS, \ - true) - /* backing device specific stuff: */ struct backingdev_sb { diff --git a/include/linux/completion.h b/include/linux/completion.h index b8bac21..71c6b61 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -10,74 +10,26 @@ #include -/* - * struct completion - structure used to maintain state for a "completion" - * - * This is the opaque structure used to maintain the state for a "completion". - * Completions currently use a FIFO to queue threads that have to wait for - * the "completion" event. - * - * See also: complete(), wait_for_completion() (and friends _timeout, - * _interruptible, _interruptible_timeout, and _killable), init_completion(), - * reinit_completion(), and macros DECLARE_COMPLETION(), - * DECLARE_COMPLETION_ONSTACK(). - */ struct completion { unsigned int done; wait_queue_head_t wait; }; -#define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } +#define DECLARE_COMPLETION(work) \ + struct completion work = { \ + .done = 0, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) \ + } -#define COMPLETION_INITIALIZER_ONSTACK(work) \ - ({ init_completion(&work); work; }) - -#define DECLARE_COMPLETION(work) \ - struct completion work = COMPLETION_INITIALIZER(work) #define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) -/** - * init_completion - Initialize a dynamically allocated completion - * @x: pointer to completion structure that is to be initialized - * - * This inline function will initialize a dynamically created completion - * structure. - */ static inline void init_completion(struct completion *x) { x->done = 0; init_waitqueue_head(&x->wait); } -/** - * reinit_completion - reinitialize a completion structure - * @x: pointer to completion structure that is to be reinitialized - * - * This inline function should be used to reinitialize a completion structure so it can - * be reused. This is especially important after complete_all() is used. - */ -static inline void reinit_completion(struct completion *x) -{ - x->done = 0; -} - -extern void wait_for_completion(struct completion *); -extern void wait_for_completion_io(struct completion *); -extern int wait_for_completion_interruptible(struct completion *x); -extern int wait_for_completion_killable(struct completion *x); -extern unsigned long wait_for_completion_timeout(struct completion *x, - unsigned long timeout); -extern unsigned long wait_for_completion_io_timeout(struct completion *x, - unsigned long timeout); -extern long wait_for_completion_interruptible_timeout( - struct completion *x, unsigned long timeout); -extern long wait_for_completion_killable_timeout( - struct completion *x, unsigned long timeout); -extern bool try_wait_for_completion(struct completion *x); -extern bool completion_done(struct completion *x); - -extern void complete(struct completion *); -extern void complete_all(struct completion *); +void complete(struct completion *); +void wait_for_completion(struct completion *); #endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ac72858..741e0ba 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -194,6 +194,26 @@ static inline int __must_check kstrtol(const char *s, unsigned int base, long *r int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); int __must_check kstrtoint(const char *s, unsigned int base, int *res); +static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res) +{ + return kstrtoull(s, base, res); +} + +static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res) +{ + return kstrtoll(s, base, res); +} + +static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res) +{ + return kstrtouint(s, base, res); +} + +static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res) +{ + return kstrtoint(s, base, res); +} + /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ #define VERIFY_OCTAL_PERMISSIONS(perms) \ (BUILD_BUG_ON_ZERO((perms) < 0) + \ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e5b35ed..f3faea1 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -30,44 +30,15 @@ #include #include -/* - * The end of the chain is marked with a special nulls marks which has - * the following format: - * - * +-------+-----------------------------------------------------+-+ - * | Base | Hash |1| - * +-------+-----------------------------------------------------+-+ - * - * Base (4 bits) : Reserved to distinguish between multiple tables. - * Specified via &struct rhashtable_params.nulls_base. - * Hash (27 bits): Full hash (unmasked) of first element added to bucket - * 1 (1 bit) : Nulls marker (always set) - * - * The remaining bits of the next pointer remain unused for now. - */ #define RHT_BASE_BITS 4 #define RHT_HASH_BITS 27 #define RHT_BASE_SHIFT RHT_HASH_BITS - -/* Base bits plus 1 bit for nulls marker */ #define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) struct rhash_head { struct rhash_head __rcu *next; }; -/** - * struct bucket_table - Table of hash buckets - * @size: Number of hash buckets - * @rehash: Current bucket being rehashed - * @hash_rnd: Random seed to fold into hash - * @locks_mask: Mask to apply before accessing locks[] - * @locks: Array of spinlocks protecting individual buckets - * @walkers: List of active walkers - * @rcu: RCU structure for freeing the table - * @future_tbl: Table under construction during rehashing - * @buckets: size * hash buckets - */ struct bucket_table { unsigned int size; unsigned int rehash; @@ -82,11 +53,6 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -/** - * struct rhashtable_compare_arg - Key for the function rhashtable_compare - * @ht: Hash table - * @key: Key to compare against - */ struct rhashtable_compare_arg { struct rhashtable *ht; const void *key; @@ -97,25 +63,6 @@ typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, const void *obj); -struct rhashtable; - -/** - * struct rhashtable_params - Hash table construction parameters - * @nelem_hint: Hint on number of elements, should be 75% of desired size - * @key_len: Length of key - * @key_offset: Offset of key in struct to be hashed - * @head_offset: Offset of rhash_head in struct to be hashed - * @insecure_max_entries: Maximum number of entries (may be exceeded) - * @max_size: Maximum size while expanding - * @min_size: Minimum size while shrinking - * @nulls_base: Base value to generate nulls marker - * @insecure_elasticity: Set to true to disable chain length checks - * @automatic_shrinking: Enable automatic shrinking of tables - * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) - * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) - * @obj_hashfn: Function to hash object - * @obj_cmpfn: Function to compare key with object - */ struct rhashtable_params { size_t nelem_hint; size_t key_len; @@ -133,17 +80,6 @@ struct rhashtable_params { rht_obj_cmpfn_t obj_cmpfn; }; -/** - * struct rhashtable - Hash table handle - * @tbl: Bucket table - * @nelems: Number of elements in table - * @key_len: Key length for hashfn - * @elasticity: Maximum chain length before rehash - * @p: Configuration parameters - * @run_work: Deferred worker to expand/shrink asynchronously - * @mutex: Mutex to protect current/future table swapping - * @lock: Spin lock to protect walker list - */ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; @@ -155,32 +91,11 @@ struct rhashtable { spinlock_t lock; }; -/** - * struct rhashtable_walker - Hash table walker - * @list: List entry on list of walkers - * @tbl: The table that we were walking over - */ struct rhashtable_walker { struct list_head list; struct bucket_table *tbl; }; -/** - * struct rhashtable_iter - Hash table iterator, fits into netlink cb - * @ht: Table to iterate through - * @p: Current pointer - * @walker: Associated rhashtable walker - * @slot: Current slot - * @skip: Number of entries to skip in slot - */ -struct rhashtable_iter { - struct rhashtable *ht; - struct rhash_head *p; - struct rhashtable_walker *walker; - unsigned int slot; - unsigned int skip; -}; - static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) { return NULLS_MARKER(ht->p.nulls_base + hash); @@ -255,11 +170,6 @@ static inline unsigned int rht_head_hashfn( rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } -/** - * rht_grow_above_75 - returns true if nelems > 0.75 * table-size - * @ht: hash table - * @tbl: current table - */ static inline bool rht_grow_above_75(const struct rhashtable *ht, const struct bucket_table *tbl) { @@ -268,11 +178,6 @@ static inline bool rht_grow_above_75(const struct rhashtable *ht, (!ht->p.max_size || tbl->size < ht->p.max_size); } -/** - * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size - * @ht: hash table - * @tbl: current table - */ static inline bool rht_shrink_below_30(const struct rhashtable *ht, const struct bucket_table *tbl) { @@ -281,11 +186,6 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, tbl->size > ht->p.min_size; } -/** - * rht_grow_above_100 - returns true if nelems > table-size - * @ht: hash table - * @tbl: current table - */ static inline bool rht_grow_above_100(const struct rhashtable *ht, const struct bucket_table *tbl) { @@ -293,11 +193,6 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, (!ht->p.max_size || tbl->size < ht->p.max_size); } -/** - * rht_grow_above_max - returns true if table is above maximum - * @ht: hash table - * @tbl: current table - */ static inline bool rht_grow_above_max(const struct rhashtable *ht, const struct bucket_table *tbl) { @@ -305,206 +200,52 @@ static inline bool rht_grow_above_max(const struct rhashtable *ht, atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; } -/* The bucket lock is selected based on the hash and protects mutations - * on a group of hash buckets. - * - * A maximum of tbl->size/2 bucket locks is allocated. This ensures that - * a single lock always covers both buckets which may both contains - * entries which link to the same bucket of the old table during resizing. - * This allows to simplify the locking as locking the bucket in both - * tables during resize always guarantee protection. - * - * IMPORTANT: When holding the bucket lock of both the old and new table - * during expansions and shrinking, the old bucket lock must always be - * acquired first. - */ static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, unsigned int hash) { return &tbl->locks[hash & tbl->locks_mask]; } -#ifdef CONFIG_PROVE_LOCKING -int lockdep_rht_mutex_is_held(struct rhashtable *ht); -int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); -#else -static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht) -{ - return 1; -} - -static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, - u32 hash) -{ - return 1; -} -#endif /* CONFIG_PROVE_LOCKING */ +int rhashtable_insert_rehash(struct rhashtable *, struct bucket_table *); +struct bucket_table *rhashtable_insert_slow(struct rhashtable *, + const void *, + struct rhash_head *, + struct bucket_table *); -int rhashtable_init(struct rhashtable *ht, - const struct rhashtable_params *params); +int rhashtable_init(struct rhashtable *, const struct rhashtable_params *); +void rhashtable_destroy(struct rhashtable *); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *old_tbl); -int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); - -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp); -void rhashtable_walk_exit(struct rhashtable_iter *iter); -int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); -void *rhashtable_walk_next(struct rhashtable_iter *iter); -void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); - -void rhashtable_free_and_destroy(struct rhashtable *ht, - void (*free_fn)(void *ptr, void *arg), - void *arg); -void rhashtable_destroy(struct rhashtable *ht); - -#define rht_dereference(p, ht) \ - rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) - -#define rht_dereference_rcu(p, ht) \ - rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) - -#define rht_dereference_bucket(p, tbl, hash) \ - rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) - -#define rht_dereference_bucket_rcu(p, tbl, hash) \ - rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) +#define rht_dereference(p, ht) rcu_dereference(p) +#define rht_dereference_rcu(p, ht) rcu_dereference(p) +#define rht_dereference_bucket(p, tbl, hash) rcu_dereference(p) +#define rht_dereference_bucket_rcu(p, tbl, hash) rcu_dereference(p) #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -/** - * rht_for_each_continue - continue iterating over hash chain - * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - */ #define rht_for_each_continue(pos, head, tbl, hash) \ for (pos = rht_dereference_bucket(head, tbl, hash); \ !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) -/** - * rht_for_each - iterate over hash chain - * @pos: the &struct rhash_head to use as a loop cursor. - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - */ #define rht_for_each(pos, tbl, hash) \ rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) -/** - * rht_for_each_entry_continue - continue iterating over hash chain - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - * @member: name of the &struct rhash_head within the hashable struct. - */ -#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ - for (pos = rht_dereference_bucket(head, tbl, hash); \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = rht_dereference_bucket((pos)->next, tbl, hash)) - -/** - * rht_for_each_entry - iterate over hash chain of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct rhash_head to use as a loop cursor. - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - * @member: name of the &struct rhash_head within the hashable struct. - */ -#define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ - tbl, hash, member) - -/** - * rht_for_each_entry_safe - safely iterate over hash chain of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct rhash_head to use as a loop cursor. - * @next: the &struct rhash_head to use as next in loop cursor. - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - * @member: name of the &struct rhash_head within the hashable struct. - * - * This hash chain list-traversal primitive allows for the looped code to - * remove the loop cursor from the list. - */ -#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next, \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL) - -/** - * rht_for_each_rcu_continue - continue iterating over rcu hash chain - * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - * - * This hash chain list-traversal primitive may safely run concurrently with - * the _rcu mutation primitives such as rhashtable_insert() as long as the - * traversal is guarded by rcu_read_lock(). - */ #define rht_for_each_rcu_continue(pos, head, tbl, hash) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) -/** - * rht_for_each_rcu - iterate over rcu hash chain - * @pos: the &struct rhash_head to use as a loop cursor. - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - * - * This hash chain list-traversal primitive may safely run concurrently with - * the _rcu mutation primitives such as rhashtable_insert() as long as the - * traversal is guarded by rcu_read_lock(). - */ #define rht_for_each_rcu(pos, tbl, hash) \ rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) -/** - * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - * @member: name of the &struct rhash_head within the hashable struct. - * - * This hash chain list-traversal primitive may safely run concurrently with - * the _rcu mutation primitives such as rhashtable_insert() as long as the - * traversal is guarded by rcu_read_lock(). - */ #define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) -/** - * rht_for_each_entry_rcu - iterate over rcu hash chain of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct rhash_head to use as a loop cursor. - * @tbl: the &struct bucket_table - * @hash: the hash value / bucket index - * @member: name of the &struct rhash_head within the hashable struct. - * - * This hash chain list-traversal primitive may safely run concurrently with - * the _rcu mutation primitives such as rhashtable_insert() as long as the - * traversal is guarded by rcu_read_lock(). - */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) @@ -518,17 +259,6 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } -/** - * rhashtable_lookup_fast - search hash table, inlined version - * @ht: hash table - * @key: the pointer to the key - * @params: hash table parameters - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * Returns the first entry on which the compare function returned true. - */ static inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) @@ -566,7 +296,6 @@ restart: return NULL; } -/* Internal function, please use rhashtable_insert_fast() instead */ static inline int __rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) @@ -658,50 +387,6 @@ out: return err; } -/** - * rhashtable_insert_fast - insert object into hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * @params: hash table parameters - * - * Will take a per bucket spinlock to protect against mutual mutations - * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. - * - * It is safe to call this function from atomic context. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ -static inline int rhashtable_insert_fast( - struct rhashtable *ht, struct rhash_head *obj, - const struct rhashtable_params params) -{ - return __rhashtable_insert_fast(ht, NULL, obj, params); -} - -/** - * rhashtable_lookup_insert_fast - lookup and insert object into hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * @params: hash table parameters - * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * - * This lookup function may only be used for fixed key hash table (key_len - * parameter set). It will BUG() if used inappropriately. - * - * It is safe to call this function from atomic context. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ static inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) @@ -714,38 +399,6 @@ static inline int rhashtable_lookup_insert_fast( params); } -/** - * rhashtable_lookup_insert_key - search and insert object to hash table - * with explicit key - * @ht: hash table - * @key: key - * @obj: pointer to hash head inside object - * @params: hash table parameters - * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * - * Lookups may occur in parallel with hashtable mutations and resizing. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - * - * Returns zero on success. - */ -static inline int rhashtable_lookup_insert_key( - struct rhashtable *ht, const void *key, struct rhash_head *obj, - const struct rhashtable_params params) -{ - BUG_ON(!ht->p.obj_hashfn || !key); - - return __rhashtable_insert_fast(ht, key, obj, params); -} - -/* Internal function, please use rhashtable_remove_fast() instead */ static inline int __rhashtable_remove_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params) @@ -778,21 +431,6 @@ static inline int __rhashtable_remove_fast( return err; } -/** - * rhashtable_remove_fast - remove object from hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * @params: hash table parameters - * - * Since the hash chain is single linked, the removal operation needs to - * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. - * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. - * - * Returns zero on success, -ENOENT if the entry could not be found. - */ static inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) @@ -827,86 +465,4 @@ out: return err; } -/* Internal function, please use rhashtable_replace_fast() instead */ -static inline int __rhashtable_replace_fast( - struct rhashtable *ht, struct bucket_table *tbl, - struct rhash_head *obj_old, struct rhash_head *obj_new, - const struct rhashtable_params params) -{ - struct rhash_head __rcu **pprev; - struct rhash_head *he; - spinlock_t *lock; - unsigned int hash; - int err = -ENOENT; - - /* Minimally, the old and new objects must have same hash - * (which should mean identifiers are the same). - */ - hash = rht_head_hashfn(ht, tbl, obj_old, params); - if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) - return -EINVAL; - - lock = rht_bucket_lock(tbl, hash); - - spin_lock_bh(lock); - - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { - if (he != obj_old) { - pprev = &he->next; - continue; - } - - rcu_assign_pointer(obj_new->next, obj_old->next); - rcu_assign_pointer(*pprev, obj_new); - err = 0; - break; - } - - spin_unlock_bh(lock); - - return err; -} - -/** - * rhashtable_replace_fast - replace an object in hash table - * @ht: hash table - * @obj_old: pointer to hash head inside object being replaced - * @obj_new: pointer to hash head inside object which is new - * @params: hash table parameters - * - * Replacing an object doesn't affect the number of elements in the hash table - * or bucket, so we don't need to worry about shrinking or expanding the - * table here. - * - * Returns zero on success, -ENOENT if the entry could not be found, - * -EINVAL if hash is not the same for the old and new objects. - */ -static inline int rhashtable_replace_fast( - struct rhashtable *ht, struct rhash_head *obj_old, - struct rhash_head *obj_new, - const struct rhashtable_params params) -{ - struct bucket_table *tbl; - int err; - - rcu_read_lock(); - - tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Because we have already taken (and released) the bucket - * lock in old_tbl, if we find that future_tbl is not yet - * visible then that guarantees the entry to still be in - * the old tbl if it exists. - */ - while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, - obj_new, params)) && - (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) - ; - - rcu_read_unlock(); - - return err; -} - #endif /* _LINUX_RHASHTABLE_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 77cba05..f6f5757 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -2,20 +2,14 @@ #define _LINUX_WAIT_H #include - #include #include -#include #include -//#include typedef struct __wait_queue wait_queue_t; typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); -int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); -/* __wait_queue::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 -#define WQ_FLAG_WOKEN 0x02 struct __wait_queue { unsigned int flags; @@ -24,37 +18,23 @@ struct __wait_queue { struct list_head task_list; }; -struct wait_bit_key { - void *flags; - int bit_nr; -#define WAIT_ATOMIC_T_BIT_NR -1 - unsigned long timeout; -}; - -struct wait_bit_queue { - struct wait_bit_key key; - wait_queue_t wait; -}; - -struct __wait_queue_head { +typedef struct { spinlock_t lock; struct list_head task_list; -}; -typedef struct __wait_queue_head wait_queue_head_t; - -struct task_struct; +} wait_queue_head_t; -/* - * Macros for declaration and initialisaton of the datatypes - */ - -#define __WAITQUEUE_INITIALIZER(name, tsk) { \ - .private = tsk, \ - .func = default_wake_function, \ - .task_list = { NULL, NULL } } +void wake_up(wait_queue_head_t *); +void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); +void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); +int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); +int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); #define DECLARE_WAITQUEUE(name, tsk) \ - wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) + wait_queue_t name = { \ + .private = tsk, \ + .func = default_wake_function, \ + .task_list = { NULL, NULL } \ + } #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ @@ -63,178 +43,18 @@ struct task_struct; #define DECLARE_WAIT_QUEUE_HEAD(name) \ wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name) -#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ - { .flags = word, .bit_nr = bit, } - -#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ - { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } - -extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *); - -#define init_waitqueue_head(q) \ - do { \ - static struct lock_class_key __key; \ - \ - __init_waitqueue_head((q), #q, &__key); \ - } while (0) - -#ifdef CONFIG_LOCKDEP -# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ - ({ init_waitqueue_head(&name); name; }) -# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \ - wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) -#else -# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) -#endif - -static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) -{ - q->flags = 0; - q->private = p; - q->func = default_wake_function; -} - -static inline void -init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) -{ - q->flags = 0; - q->private = NULL; - q->func = func; -} - -/** - * waitqueue_active -- locklessly test for waiters on the queue - * @q: the waitqueue to test for waiters - * - * returns true if the wait list is not empty - * - * NOTE: this function is lockless and requires care, incorrect usage _will_ - * lead to sporadic and non-obvious failure. - * - * Use either while holding wait_queue_head_t::lock or when used for wakeups - * with an extra smp_mb() like: - * - * CPU0 - waker CPU1 - waiter - * - * for (;;) { - * @cond = true; prepare_to_wait(&wq, &wait, state); - * smp_mb(); // smp_mb() from set_current_state() - * if (waitqueue_active(wq)) if (@cond) - * wake_up(wq); break; - * schedule(); - * } - * finish_wait(&wq, &wait); - * - * Because without the explicit smp_mb() it's possible for the - * waitqueue_active() load to get hoisted over the @cond store such that we'll - * observe an empty wait list while the waiter might not observe @cond. - * - * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), - * which (when the lock is uncontended) are of roughly equal cost. - */ -static inline int waitqueue_active(wait_queue_head_t *q) -{ - return !list_empty(&q->task_list); -} - -/** - * wq_has_sleeper - check if there are any waiting processes - * @wq: wait queue head - * - * Returns true if wq has waiting processes - * - * Please refer to the comment for waitqueue_active. - */ -static inline bool wq_has_sleeper(wait_queue_head_t *wq) -{ - /* - * We need to be sure we are in sync with the - * add_wait_queue modifications to the wait queue. - * - * This memory barrier should be paired with one on the - * waiting side. - */ - smp_mb(); - return waitqueue_active(wq); -} - -extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); -extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); -extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); - -static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) -{ - list_add(&new->task_list, &head->task_list); -} - -/* - * Used for wake-one threads: - */ -static inline void -__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) -{ - wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue(q, wait); -} - -static inline void __add_wait_queue_tail(wait_queue_head_t *head, - wait_queue_t *new) -{ - list_add_tail(&new->task_list, &head->task_list); -} - -static inline void -__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait) -{ - wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_tail(q, wait); -} - -static inline void -__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) +static inline void init_waitqueue_head(wait_queue_head_t *q) { - list_del(&old->task_list); + spin_lock_init(&q->lock); + INIT_LIST_HEAD(&q->task_list); } -typedef int wait_bit_action_f(struct wait_bit_key *, int mode); -void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); -void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); -void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); -void __wake_up_bit(wait_queue_head_t *, void *, int); -int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); -int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); -void wake_up_bit(void *, int); -void wake_up_atomic_t(atomic_t *); -int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); -int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long); -int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); -int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); -wait_queue_head_t *bit_waitqueue(void *, int); - -#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) -#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) -#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) -#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1) -#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0) - -#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) -#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) -#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) -#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) - -/* - * Wakeup macros to be used to report events to the targets. - */ -#define wake_up_poll(x, m) \ - __wake_up(x, TASK_NORMAL, 1, (void *) (m)) -#define wake_up_locked_poll(x, m) \ - __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) -#define wake_up_interruptible_poll(x, m) \ - __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) -#define wake_up_interruptible_sync_poll(x, m) \ - __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) +#define DEFINE_WAIT(name) \ + wait_queue_t name = { \ + .private = current, \ + .func = autoremove_wake_function, \ + .task_list = LIST_HEAD_INIT((name).task_list), \ + } #define ___wait_cond_timeout(condition) \ ({ \ @@ -244,992 +64,68 @@ wait_queue_head_t *bit_waitqueue(void *, int); __cond || !__ret; \ }) -#define ___wait_is_interruptible(state) \ - (!__builtin_constant_p(state) || \ - state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ - -/* - * The below macro ___wait_event() has an explicit shadow of the __ret - * variable when used from the wait_event_*() macros. - * - * This is so that both can use the ___wait_cond_timeout() construct - * to wrap the condition. - * - * The type inconsistency of the wait_event_*() __ret variable is also - * on purpose; we use long where we can return timeout values and int - * otherwise. - */ - #define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ ({ \ - __label__ __out; \ - wait_queue_t __wait; \ - long __ret = ret; /* explicit shadow */ \ - \ - INIT_LIST_HEAD(&__wait.task_list); \ - if (exclusive) \ - __wait.flags = WQ_FLAG_EXCLUSIVE; \ - else \ - __wait.flags = 0; \ + DEFINE_WAIT(__wait); \ + long __ret = ret; \ \ for (;;) { \ - long __int = prepare_to_wait_event(&wq, &__wait, state);\ - \ + prepare_to_wait(&wq, &__wait, state); \ if (condition) \ break; \ - \ - if (___wait_is_interruptible(state) && __int) { \ - __ret = __int; \ - if (exclusive) { \ - abort_exclusive_wait(&wq, &__wait, \ - state, NULL); \ - goto __out; \ - } \ - break; \ - } \ - \ cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: __ret; \ + __ret; \ }) #define __wait_event(wq, condition) \ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) -/** - * wait_event - sleep until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the - * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - */ #define wait_event(wq, condition) \ do { \ - might_sleep(); \ if (condition) \ break; \ __wait_event(wq, condition); \ } while (0) -#define __io_wait_event(wq, condition) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ - io_schedule()) - -/* - * io_wait_event() -- like wait_event() but with io_schedule() - */ -#define io_wait_event(wq, condition) \ -do { \ - might_sleep(); \ - if (condition) \ - break; \ - __io_wait_event(wq, condition); \ -} while (0) - -#define __wait_event_freezable(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ - schedule(); try_to_freeze()) - -/** - * wait_event_freezable - sleep (or freeze) until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute - * to system load) until the @condition evaluates to true. The - * @condition is checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - */ -#define wait_event_freezable(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_freezable(wq, condition); \ - __ret; \ -}) - #define __wait_event_timeout(wq, condition, timeout) \ ___wait_event(wq, ___wait_cond_timeout(condition), \ TASK_UNINTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) -/** - * wait_event_timeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @timeout: timeout, in jiffies - * - * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the - * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * Returns: - * 0 if the @condition evaluated to %false after the @timeout elapsed, - * 1 if the @condition evaluated to %true after the @timeout elapsed, - * or the remaining jiffies (at least 1) if the @condition evaluated - * to %true before the @timeout elapsed. - */ #define wait_event_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ - might_sleep(); \ if (!___wait_cond_timeout(condition)) \ __ret = __wait_event_timeout(wq, condition, timeout); \ __ret; \ }) -#define __wait_event_freezable_timeout(wq, condition, timeout) \ - ___wait_event(wq, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ - __ret = schedule_timeout(__ret); try_to_freeze()) - -/* - * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid - * increasing load and is freezable. - */ -#define wait_event_freezable_timeout(wq, condition, timeout) \ -({ \ - long __ret = timeout; \ - might_sleep(); \ - if (!___wait_cond_timeout(condition)) \ - __ret = __wait_event_freezable_timeout(wq, condition, timeout); \ - __ret; \ -}) - -#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \ - cmd1; schedule(); cmd2) -/* - * Just like wait_event_cmd(), except it sets exclusive flag - */ -#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ -do { \ - if (condition) \ - break; \ - __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \ -} while (0) - -#define __wait_event_cmd(wq, condition, cmd1, cmd2) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ - cmd1; schedule(); cmd2) - -/** - * wait_event_cmd - sleep until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @cmd1: the command will be executed before sleep - * @cmd2: the command will be executed after sleep - * - * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the - * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - */ -#define wait_event_cmd(wq, condition, cmd1, cmd2) \ -do { \ - if (condition) \ - break; \ - __wait_event_cmd(wq, condition, cmd1, cmd2); \ -} while (0) - -#define __wait_event_interruptible(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ - schedule()) - -/** - * wait_event_interruptible - sleep until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function will return -ERESTARTSYS if it was interrupted by a - * signal and 0 if @condition evaluated to true. - */ -#define wait_event_interruptible(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_interruptible(wq, condition); \ - __ret; \ -}) - -#define __wait_event_interruptible_timeout(wq, condition, timeout) \ - ___wait_event(wq, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ - __ret = schedule_timeout(__ret)) - -/** - * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @timeout: timeout, in jiffies - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * Returns: - * 0 if the @condition evaluated to %false after the @timeout elapsed, - * 1 if the @condition evaluated to %true after the @timeout elapsed, - * the remaining jiffies (at least 1) if the @condition evaluated - * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was - * interrupted by a signal. - */ -#define wait_event_interruptible_timeout(wq, condition, timeout) \ -({ \ - long __ret = timeout; \ - might_sleep(); \ - if (!___wait_cond_timeout(condition)) \ - __ret = __wait_event_interruptible_timeout(wq, \ - condition, timeout); \ - __ret; \ -}) - -#define __wait_event_hrtimeout(wq, condition, timeout, state) \ -({ \ - int __ret = 0; \ - struct hrtimer_sleeper __t; \ - \ - hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \ - HRTIMER_MODE_REL); \ - hrtimer_init_sleeper(&__t, current); \ - if ((timeout).tv64 != KTIME_MAX) \ - hrtimer_start_range_ns(&__t.timer, timeout, \ - current->timer_slack_ns, \ - HRTIMER_MODE_REL); \ - \ - __ret = ___wait_event(wq, condition, state, 0, 0, \ - if (!__t.task) { \ - __ret = -ETIME; \ - break; \ - } \ - schedule()); \ - \ - hrtimer_cancel(&__t.timer); \ - destroy_hrtimer_on_stack(&__t.timer); \ - __ret; \ -}) - -/** - * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @timeout: timeout, as a ktime_t - * - * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function returns 0 if @condition became true, or -ETIME if the timeout - * elapsed. - */ -#define wait_event_hrtimeout(wq, condition, timeout) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_hrtimeout(wq, condition, timeout, \ - TASK_UNINTERRUPTIBLE); \ - __ret; \ -}) - -/** - * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @timeout: timeout, as a ktime_t - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function returns 0 if @condition became true, -ERESTARTSYS if it was - * interrupted by a signal, or -ETIME if the timeout elapsed. - */ -#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \ -({ \ - long __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_hrtimeout(wq, condition, timeout, \ - TASK_INTERRUPTIBLE); \ - __ret; \ -}) - -#define __wait_event_interruptible_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ - schedule()) - -#define wait_event_interruptible_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_interruptible_exclusive(wq, condition);\ - __ret; \ -}) - -#define __wait_event_killable_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_KILLABLE, 1, 0, \ - schedule()) - -#define wait_event_killable_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_killable_exclusive(wq, condition); \ - __ret; \ -}) - - -#define __wait_event_freezable_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ - schedule(); try_to_freeze()) - -#define wait_event_freezable_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_freezable_exclusive(wq, condition);\ - __ret; \ -}) - - -#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ -({ \ - int __ret = 0; \ - DEFINE_WAIT(__wait); \ - if (exclusive) \ - __wait.flags |= WQ_FLAG_EXCLUSIVE; \ - do { \ - if (likely(list_empty(&__wait.task_list))) \ - __add_wait_queue_tail(&(wq), &__wait); \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (signal_pending(current)) { \ - __ret = -ERESTARTSYS; \ - break; \ - } \ - if (irq) \ - spin_unlock_irq(&(wq).lock); \ - else \ - spin_unlock(&(wq).lock); \ - schedule(); \ - if (irq) \ - spin_lock_irq(&(wq).lock); \ - else \ - spin_lock(&(wq).lock); \ - } while (!(condition)); \ - __remove_wait_queue(&(wq), &__wait); \ - __set_current_state(TASK_RUNNING); \ - __ret; \ -}) - - -/** - * wait_event_interruptible_locked - sleep until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * It must be called with wq.lock being held. This spinlock is - * unlocked while sleeping but @condition testing is done while lock - * is held and when this macro exits the lock is held. - * - * The lock is locked/unlocked using spin_lock()/spin_unlock() - * functions which must match the way they are locked/unlocked outside - * of this macro. - * - * wake_up_locked() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function will return -ERESTARTSYS if it was interrupted by a - * signal and 0 if @condition evaluated to true. - */ -#define wait_event_interruptible_locked(wq, condition) \ - ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0)) - -/** - * wait_event_interruptible_locked_irq - sleep until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * It must be called with wq.lock being held. This spinlock is - * unlocked while sleeping but @condition testing is done while lock - * is held and when this macro exits the lock is held. - * - * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq() - * functions which must match the way they are locked/unlocked outside - * of this macro. - * - * wake_up_locked() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function will return -ERESTARTSYS if it was interrupted by a - * signal and 0 if @condition evaluated to true. - */ -#define wait_event_interruptible_locked_irq(wq, condition) \ - ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1)) - -/** - * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * It must be called with wq.lock being held. This spinlock is - * unlocked while sleeping but @condition testing is done while lock - * is held and when this macro exits the lock is held. - * - * The lock is locked/unlocked using spin_lock()/spin_unlock() - * functions which must match the way they are locked/unlocked outside - * of this macro. - * - * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag - * set thus when other process waits process on the list if this - * process is awaken further processes are not considered. - * - * wake_up_locked() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function will return -ERESTARTSYS if it was interrupted by a - * signal and 0 if @condition evaluated to true. - */ -#define wait_event_interruptible_exclusive_locked(wq, condition) \ - ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0)) - -/** - * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * It must be called with wq.lock being held. This spinlock is - * unlocked while sleeping but @condition testing is done while lock - * is held and when this macro exits the lock is held. - * - * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq() - * functions which must match the way they are locked/unlocked outside - * of this macro. - * - * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag - * set thus when other process waits process on the list if this - * process is awaken further processes are not considered. - * - * wake_up_locked() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function will return -ERESTARTSYS if it was interrupted by a - * signal and 0 if @condition evaluated to true. - */ -#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ - ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) - - -#define __wait_event_killable(wq, condition) \ - ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule()) - -/** - * wait_event_killable - sleep until a condition gets true - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * - * The process is put to sleep (TASK_KILLABLE) until the - * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * The function will return -ERESTARTSYS if it was interrupted by a - * signal and 0 if @condition evaluated to true. - */ -#define wait_event_killable(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_killable(wq, condition); \ - __ret; \ -}) - - -#define __wait_event_lock_irq(wq, condition, lock, cmd) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock)) - -/** - * wait_event_lock_irq_cmd - sleep until a condition gets true. The - * condition is checked under the lock. This - * is expected to be called with the lock - * taken. - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @lock: a locked spinlock_t, which will be released before cmd - * and schedule() and reacquired afterwards. - * @cmd: a command which is invoked outside the critical section before - * sleep - * - * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the - * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * This is supposed to be called while holding the lock. The lock is - * dropped before invoking the cmd and going to sleep and is reacquired - * afterwards. - */ -#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, cmd); \ -} while (0) - -/** - * wait_event_lock_irq - sleep until a condition gets true. The - * condition is checked under the lock. This - * is expected to be called with the lock - * taken. - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @lock: a locked spinlock_t, which will be released before schedule() - * and reacquired afterwards. - * - * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the - * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * This is supposed to be called while holding the lock. The lock is - * dropped before going to sleep and is reacquired afterwards. - */ -#define wait_event_lock_irq(wq, condition, lock) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, ); \ -} while (0) - - -#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock)) - -/** - * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true. - * The condition is checked under the lock. This is expected to - * be called with the lock taken. - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @lock: a locked spinlock_t, which will be released before cmd and - * schedule() and reacquired afterwards. - * @cmd: a command which is invoked outside the critical section before - * sleep - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or a signal is received. The @condition is - * checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * This is supposed to be called while holding the lock. The lock is - * dropped before invoking the cmd and going to sleep and is reacquired - * afterwards. - * - * The macro will return -ERESTARTSYS if it was interrupted by a signal - * and 0 if @condition evaluated to true. - */ -#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __ret = __wait_event_interruptible_lock_irq(wq, \ - condition, lock, cmd); \ - __ret; \ -}) - -/** - * wait_event_interruptible_lock_irq - sleep until a condition gets true. - * The condition is checked under the lock. This is expected - * to be called with the lock taken. - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @lock: a locked spinlock_t, which will be released before schedule() - * and reacquired afterwards. - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or signal is received. The @condition is - * checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * This is supposed to be called while holding the lock. The lock is - * dropped before going to sleep and is reacquired afterwards. - * - * The macro will return -ERESTARTSYS if it was interrupted by a signal - * and 0 if @condition evaluated to true. - */ -#define wait_event_interruptible_lock_irq(wq, condition, lock) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __ret = __wait_event_interruptible_lock_irq(wq, \ - condition, lock,); \ - __ret; \ -}) - -#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ - lock, timeout) \ - ___wait_event(wq, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ - spin_unlock_irq(&lock); \ - __ret = schedule_timeout(__ret); \ - spin_lock_irq(&lock)); - -/** - * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets - * true or a timeout elapses. The condition is checked under - * the lock. This is expected to be called with the lock taken. - * @wq: the waitqueue to wait on - * @condition: a C expression for the event to wait for - * @lock: a locked spinlock_t, which will be released before schedule() - * and reacquired afterwards. - * @timeout: timeout, in jiffies - * - * The process is put to sleep (TASK_INTERRUPTIBLE) until the - * @condition evaluates to true or signal is received. The @condition is - * checked each time the waitqueue @wq is woken up. - * - * wake_up() has to be called after changing any variable that could - * change the result of the wait condition. - * - * This is supposed to be called while holding the lock. The lock is - * dropped before going to sleep and is reacquired afterwards. - * - * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it - * was interrupted by a signal, and the remaining jiffies otherwise - * if the condition evaluated to true before the timeout elapsed. - */ -#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ - timeout) \ -({ \ - long __ret = timeout; \ - if (!___wait_cond_timeout(condition)) \ - __ret = __wait_event_interruptible_lock_irq_timeout( \ - wq, condition, lock, timeout); \ - __ret; \ -}) - -/* - * Waitqueues which are removed from the waitqueue_head at wakeup time - */ -void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); -void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); -void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); -void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key); -long wait_woken(wait_queue_t *wait, unsigned mode, long timeout); -int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); - -#define DEFINE_WAIT_FUNC(name, function) \ - wait_queue_t name = { \ - .private = current, \ - .func = function, \ - .task_list = LIST_HEAD_INIT((name).task_list), \ - } - -#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) - -#define DEFINE_WAIT_BIT(name, word, bit) \ - struct wait_bit_queue name = { \ - .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ - .wait = { \ - .private = current, \ - .func = wake_bit_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wait.task_list), \ - }, \ - } - -#define init_wait(wait) \ - do { \ - (wait)->private = current; \ - (wait)->func = autoremove_wake_function; \ - INIT_LIST_HEAD(&(wait)->task_list); \ - (wait)->flags = 0; \ - } while (0) - - -extern int bit_wait(struct wait_bit_key *, int); -extern int bit_wait_io(struct wait_bit_key *, int); -extern int bit_wait_timeout(struct wait_bit_key *, int); -extern int bit_wait_io_timeout(struct wait_bit_key *, int); +void wake_up_bit(void *, int); +void __wait_on_bit(void *, int, unsigned); +void __wait_on_bit_lock(void *, int, unsigned); -/** - * wait_on_bit - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit. - * For instance, if one were to have waiters on a bitflag, one would - * call wait_on_bit() in threads waiting for the bit to clear. - * One uses wait_on_bit() where one is waiting for the bit to clear, - * but has no intention of setting it. - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { - might_sleep(); if (!test_bit(bit, word)) return 0; - return out_of_line_wait_on_bit(word, bit, - bit_wait, - mode); + __wait_on_bit(word, bit, mode); + return 0; } -/** - * wait_on_bit_io - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), but calls - * io_schedule() instead of schedule() for the actual waiting. - * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit_io(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, - bit_wait_io, - mode); -} - -/** - * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * @timeout: timeout, in jiffies - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), except also takes a - * timeout parameter. - * - * Returned value will be zero if the bit was cleared before the - * @timeout elapsed, or non-zero if the @timeout elapsed or process - * received a signal and the mode permitted wakeup on that signal. - */ -static inline int -wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, - unsigned long timeout) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_timeout(word, bit, - bit_wait_timeout, - mode, timeout); -} - -/** - * wait_on_bit_action - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared, and allow the waiting action to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. - * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, - unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, action, mode); -} - -/** - * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit - * when one intends to set it, for instance, trying to lock bitflags. - * For instance, if one were to have waiters trying to set bitflag - * and waiting for it to clear before setting it, one would call - * wait_on_bit() in threads waiting to be able to set the bit. - * One uses wait_on_bit_lock() where one is waiting for the bit to - * clear with the intention of setting it, and when done, clearing it. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ static inline int wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) { - might_sleep(); if (!test_and_set_bit(bit, word)) return 0; - return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); + __wait_on_bit_lock(word, bit, mode); + return 0; } -/** - * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to atomically set it. This is similar - * to wait_on_bit(), but calls io_schedule() instead of schedule() - * for the actual waiting. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); -} - -/** - * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to set it, and allow the waiting action - * to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, - unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, action, mode); -} - -/** - * wait_on_atomic_t - Wait for an atomic_t to become 0 - * @val: The atomic value being waited on, a kernel virtual address - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for - * the purpose of getting a waitqueue, but we set the key to a bit number - * outside of the target 'word'. - */ -static inline -int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) -{ - might_sleep(); - if (atomic_read(val) == 0) - return 0; - return out_of_line_wait_on_atomic_t(val, action, mode); -} +#define wait_on_bit_io(w, b, m) wait_on_bit(w, b, m) +#define wait_on_bit_lock_io(w, b, m) wait_on_bit_lock(w, b, m) #endif /* _LINUX_WAIT_H */ diff --git a/include/linux/zconf.h b/include/linux/zconf.h deleted file mode 100644 index 0beb75e..0000000 --- a/include/linux/zconf.h +++ /dev/null @@ -1,57 +0,0 @@ -/* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995-1998 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* @(#) $Id$ */ - -#ifndef _ZCONF_H -#define _ZCONF_H - -/* The memory requirements for deflate are (in bytes): - (1 << (windowBits+2)) + (1 << (memLevel+9)) - that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) - plus a few kilobytes for small objects. For example, if you want to reduce - the default memory requirements from 256K to 128K, compile with - make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" - Of course this will generally degrade compression (there's no free lunch). - - The memory requirements for inflate are (in bytes) 1 << windowBits - that is, 32K for windowBits=15 (default value) plus a few kilobytes - for small objects. -*/ - -/* Maximum value for memLevel in deflateInit2 */ -#ifndef MAX_MEM_LEVEL -# define MAX_MEM_LEVEL 8 -#endif - -/* Maximum value for windowBits in deflateInit2 and inflateInit2. - * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files - * created by gzip. (Files created by minigzip can still be extracted by - * gzip.) - */ -#ifndef MAX_WBITS -# define MAX_WBITS 15 /* 32K LZ77 window */ -#endif - -/* default windowBits for decompression. MAX_WBITS is for compression only */ -#ifndef DEF_WBITS -# define DEF_WBITS MAX_WBITS -#endif - -/* default memLevel */ -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif - - /* Type declarations */ - -typedef unsigned char Byte; /* 8 bits */ -typedef unsigned int uInt; /* 16 bits or more */ -typedef unsigned long uLong; /* 32 bits or more */ -typedef void *voidp; - -#endif /* _ZCONF_H */ diff --git a/include/linux/zlib.h b/include/linux/zlib.h index 92dbbd3..45cfbd8 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -1,593 +1,18 @@ -/* zlib.h -- interface of the 'zlib' general purpose compression library - - Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Jean-loup Gailly Mark Adler - jloup@gzip.org madler@alumni.caltech.edu - - - The data format used by the zlib library is described by RFCs (Request for - Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt - (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). -*/ - #ifndef _ZLIB_H #define _ZLIB_H -#include - -/* zlib deflate based on ZLIB_VERSION "1.1.3" */ -/* zlib inflate based on ZLIB_VERSION "1.2.3" */ - -/* - This is a modified version of zlib for use inside the Linux kernel. - The main changes are to perform all memory allocation in advance. - - Inflation Changes: - * Z_PACKET_FLUSH is added and used by ppp_deflate. Before returning - this checks there is no more input data available and the next data - is a STORED block. It also resets the mode to be read for the next - data, all as per PPP requirements. - * Addition of zlib_inflateIncomp which copies incompressible data into - the history window and adjusts the accoutning without calling - zlib_inflate itself to inflate the data. -*/ - -/* - The 'zlib' compression library provides in-memory compression and - decompression functions, including integrity checks of the uncompressed - data. This version of the library supports only one compression method - (deflation) but other algorithms will be added later and will have the same - stream interface. - - Compression can be done in a single step if the buffers are large - enough (for example if an input file is mmap'ed), or can be done by - repeated calls of the compression function. In the latter case, the - application must provide more input and/or consume the output - (providing more output space) before each call. - - The compressed data format used by default by the in-memory functions is - the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped - around a deflate stream, which is itself documented in RFC 1951. - - The library also supports reading and writing files in gzip (.gz) format - with an interface similar to that of stdio. - - The zlib format was designed to be compact and fast for use in memory - and on communications channels. The gzip format was designed for single- - file compression on file systems, has a larger header than zlib to maintain - directory information, and uses a different, slower check method than zlib. - - The library does not install any signal handler. The decoder checks - the consistency of the compressed data, so the library should never - crash even in case of corrupted input. -*/ - -struct internal_state; - -typedef struct z_stream_s { - const Byte *next_in; /* next input byte */ - uLong avail_in; /* number of bytes available at next_in */ - uLong total_in; /* total nb of input bytes read so far */ - - Byte *next_out; /* next output byte should be put there */ - uLong avail_out; /* remaining free space at next_out */ - uLong total_out; /* total nb of bytes output so far */ - - char *msg; /* last error message, NULL if no error */ - struct internal_state *state; /* not visible by applications */ - - void *workspace; /* memory allocated for this stream */ - - int data_type; /* best guess about the data type: ascii or binary */ - uLong adler; /* adler32 value of the uncompressed data */ - uLong reserved; /* reserved for future use */ -} z_stream; - -typedef z_stream *z_streamp; - -/* - The application must update next_in and avail_in when avail_in has - dropped to zero. It must update next_out and avail_out when avail_out - has dropped to zero. The application must initialize zalloc, zfree and - opaque before calling the init function. All other fields are set by the - compression library and must not be updated by the application. - - The opaque value provided by the application will be passed as the first - parameter for calls of zalloc and zfree. This can be useful for custom - memory management. The compression library attaches no meaning to the - opaque value. - - zalloc must return NULL if there is not enough memory for the object. - If zlib is used in a multi-threaded application, zalloc and zfree must be - thread safe. - - On 16-bit systems, the functions zalloc and zfree must be able to allocate - exactly 65536 bytes, but will not be required to allocate more than this - if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, - pointers returned by zalloc for objects of exactly 65536 bytes *must* - have their offset normalized to zero. The default allocation function - provided by this library ensures this (see zutil.c). To reduce memory - requirements and avoid any allocation of 64K objects, at the expense of - compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). - - The fields total_in and total_out can be used for statistics or - progress reports. After compression, total_in holds the total size of - the uncompressed data and may be saved for use in the decompressor - (particularly if the decompressor wants to decompress everything in - a single step). -*/ - - /* constants */ - -#define Z_NO_FLUSH 0 -#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ -#define Z_PACKET_FLUSH 2 -#define Z_SYNC_FLUSH 3 -#define Z_FULL_FLUSH 4 -#define Z_FINISH 5 -#define Z_BLOCK 6 /* Only for inflate at present */ -/* Allowed flush values; see deflate() and inflate() below for details */ - -#define Z_OK 0 -#define Z_STREAM_END 1 -#define Z_NEED_DICT 2 -#define Z_ERRNO (-1) -#define Z_STREAM_ERROR (-2) -#define Z_DATA_ERROR (-3) -#define Z_MEM_ERROR (-4) -#define Z_BUF_ERROR (-5) -#define Z_VERSION_ERROR (-6) -/* Return codes for the compression/decompression functions. Negative - * values are errors, positive values are used for special but normal events. - */ - -#define Z_NO_COMPRESSION 0 -#define Z_BEST_SPEED 1 -#define Z_BEST_COMPRESSION 9 -#define Z_DEFAULT_COMPRESSION (-1) -/* compression levels */ - -#define Z_FILTERED 1 -#define Z_HUFFMAN_ONLY 2 -#define Z_DEFAULT_STRATEGY 0 -/* compression strategy; see deflateInit2() below for details */ - -#define Z_BINARY 0 -#define Z_ASCII 1 -#define Z_UNKNOWN 2 -/* Possible values of the data_type field */ - -#define Z_DEFLATED 8 -/* The deflate compression method (the only one supported in this version) */ - - /* basic functions */ - -extern int zlib_deflate_workspacesize (int windowBits, int memLevel); -/* - Returns the number of bytes that needs to be allocated for a per- - stream workspace with the specified parameters. A pointer to this - number of bytes should be returned in stream->workspace before - you call zlib_deflateInit() or zlib_deflateInit2(). If you call - zlib_deflateInit(), specify windowBits = MAX_WBITS and memLevel = - MAX_MEM_LEVEL here. If you call zlib_deflateInit2(), the windowBits - and memLevel parameters passed to zlib_deflateInit2() must not - exceed those passed here. -*/ - -/* -extern int deflateInit (z_streamp strm, int level); - - Initializes the internal stream state for compression. The fields - zalloc, zfree and opaque must be initialized before by the caller. - If zalloc and zfree are set to NULL, deflateInit updates them to - use default allocation functions. - - The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: - 1 gives best speed, 9 gives best compression, 0 gives no compression at - all (the input data is simply copied a block at a time). - Z_DEFAULT_COMPRESSION requests a default compromise between speed and - compression (currently equivalent to level 6). - - deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_STREAM_ERROR if level is not a valid compression level, - Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible - with the version assumed by the caller (ZLIB_VERSION). - msg is set to null if there is no error message. deflateInit does not - perform any compression: this will be done by deflate(). -*/ - - -extern int zlib_deflate (z_streamp strm, int flush); -/* - deflate compresses as much data as possible, and stops when the input - buffer becomes empty or the output buffer becomes full. It may introduce some - output latency (reading input without producing any output) except when - forced to flush. - - The detailed semantics are as follows. deflate performs one or both of the - following actions: - - - Compress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not - enough room in the output buffer), next_in and avail_in are updated and - processing will resume at this point for the next call of deflate(). - - - Provide more output starting at next_out and update next_out and avail_out - accordingly. This action is forced if the parameter flush is non zero. - Forcing flush frequently degrades the compression ratio, so this parameter - should be set only when necessary (in interactive applications). - Some output may be provided even if flush is not set. - - Before the call of deflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating avail_in or avail_out accordingly; avail_out - should never be zero before the call. The application can consume the - compressed output when it wants, for example when the output buffer is full - (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK - and with zero avail_out, it must be called again after making room in the - output buffer because there might be more output pending. - - If the parameter flush is set to Z_SYNC_FLUSH, all pending output is - flushed to the output buffer and the output is aligned on a byte boundary, so - that the decompressor can get all input data available so far. (In particular - avail_in is zero after the call if enough output space has been provided - before the call.) Flushing may degrade compression for some compression - algorithms and so it should be used only when necessary. - - If flush is set to Z_FULL_FLUSH, all output is flushed as with - Z_SYNC_FLUSH, and the compression state is reset so that decompression can - restart from this point if previous compressed data has been damaged or if - random access is desired. Using Z_FULL_FLUSH too often can seriously degrade - the compression. - - If deflate returns with avail_out == 0, this function must be called again - with the same value of the flush parameter and more output space (updated - avail_out), until the flush is complete (deflate returns with non-zero - avail_out). - - If the parameter flush is set to Z_FINISH, pending input is processed, - pending output is flushed and deflate returns with Z_STREAM_END if there - was enough output space; if deflate returns with Z_OK, this function must be - called again with Z_FINISH and more output space (updated avail_out) but no - more input data, until it returns with Z_STREAM_END or an error. After - deflate has returned Z_STREAM_END, the only possible operations on the - stream are deflateReset or deflateEnd. - - Z_FINISH can be used immediately after deflateInit if all the compression - is to be done in a single step. In this case, avail_out must be at least - 0.1% larger than avail_in plus 12 bytes. If deflate does not return - Z_STREAM_END, then it must be called again as described above. - - deflate() sets strm->adler to the adler32 checksum of all input read - so far (that is, total_in bytes). - - deflate() may update data_type if it can make a good guess about - the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered - binary. This field is only for information purposes and does not affect - the compression algorithm in any manner. - - deflate() returns Z_OK if some progress has been made (more input - processed or more output produced), Z_STREAM_END if all input has been - consumed and all output has been produced (only when flush is set to - Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example - if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible - (for example avail_in or avail_out was zero). -*/ - - -extern int zlib_deflateEnd (z_streamp strm); -/* - All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. - - deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the - stream state was inconsistent, Z_DATA_ERROR if the stream was freed - prematurely (some input or output was discarded). In the error case, - msg may be set but then points to a static string (which must not be - deallocated). -*/ - - -extern int zlib_inflate_workspacesize (void); -/* - Returns the number of bytes that needs to be allocated for a per- - stream workspace. A pointer to this number of bytes should be - returned in stream->workspace before calling zlib_inflateInit(). -*/ - -/* -extern int zlib_inflateInit (z_streamp strm); - - Initializes the internal stream state for decompression. The fields - next_in, avail_in, and workspace must be initialized before by - the caller. If next_in is not NULL and avail_in is large enough (the exact - value depends on the compression method), inflateInit determines the - compression method from the zlib header and allocates all data structures - accordingly; otherwise the allocation will be deferred to the first call of - inflate. If zalloc and zfree are set to NULL, inflateInit updates them to - use default allocation functions. - - inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_VERSION_ERROR if the zlib library version is incompatible with the - version assumed by the caller. msg is set to null if there is no error - message. inflateInit does not perform any decompression apart from reading - the zlib header if present: this will be done by inflate(). (So next_in and - avail_in may be modified, but next_out and avail_out are unchanged.) -*/ - - -extern int zlib_inflate (z_streamp strm, int flush); -/* - inflate decompresses as much data as possible, and stops when the input - buffer becomes empty or the output buffer becomes full. It may introduce - some output latency (reading input without producing any output) except when - forced to flush. - - The detailed semantics are as follows. inflate performs one or both of the - following actions: - - - Decompress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not - enough room in the output buffer), next_in is updated and processing - will resume at this point for the next call of inflate(). - - - Provide more output starting at next_out and update next_out and avail_out - accordingly. inflate() provides as much output as possible, until there - is no more input data or no more space in the output buffer (see below - about the flush parameter). - - Before the call of inflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating the next_* and avail_* values accordingly. - The application can consume the uncompressed output when it wants, for - example when the output buffer is full (avail_out == 0), or after each - call of inflate(). If inflate returns Z_OK and with zero avail_out, it - must be called again after making room in the output buffer because there - might be more output pending. - - The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, - Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much - output as possible to the output buffer. Z_BLOCK requests that inflate() stop - if and when it gets to the next deflate block boundary. When decoding the - zlib or gzip format, this will cause inflate() to return immediately after - the header and before the first block. When doing a raw inflate, inflate() - will go ahead and process the first block, and will return when it gets to - the end of that block, or when it runs out of data. - - The Z_BLOCK option assists in appending to or combining deflate streams. - Also to assist in this, on return inflate() will set strm->data_type to the - number of unused bits in the last byte taken from strm->next_in, plus 64 - if inflate() is currently decoding the last block in the deflate stream, - plus 128 if inflate() returned immediately after decoding an end-of-block - code or decoding the complete header up to just before the first byte of the - deflate stream. The end-of-block will not be indicated until all of the - uncompressed data from that block has been written to strm->next_out. The - number of unused bits may in general be greater than seven, except when - bit 7 of data_type is set, in which case the number of unused bits will be - less than eight. - - inflate() should normally be called until it returns Z_STREAM_END or an - error. However if all decompression is to be performed in a single step - (a single call of inflate), the parameter flush should be set to - Z_FINISH. In this case all pending input is processed and all pending - output is flushed; avail_out must be large enough to hold all the - uncompressed data. (The size of the uncompressed data may have been saved - by the compressor for this purpose.) The next operation on this stream must - be inflateEnd to deallocate the decompression state. The use of Z_FINISH - is never required, but can be used to inform inflate that a faster approach - may be used for the single inflate() call. - - In this implementation, inflate() always flushes as much output as - possible to the output buffer, and always uses the faster approach on the - first call. So the only effect of the flush parameter in this implementation - is on the return value of inflate(), as noted below, or when it returns early - because Z_BLOCK is used. - - If a preset dictionary is needed after this call (see inflateSetDictionary - below), inflate sets strm->adler to the adler32 checksum of the dictionary - chosen by the compressor and returns Z_NEED_DICT; otherwise it sets - strm->adler to the adler32 checksum of all output produced so far (that is, - total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described - below. At the end of the stream, inflate() checks that its computed adler32 - checksum is equal to that saved by the compressor and returns Z_STREAM_END - only if the checksum is correct. - - inflate() will decompress and check either zlib-wrapped or gzip-wrapped - deflate data. The header type is detected automatically. Any information - contained in the gzip header is not retained, so applications that need that - information should instead use raw inflate, see inflateInit2() below, or - inflateBack() and perform their own processing of the gzip header and - trailer. - - inflate() returns Z_OK if some progress has been made (more input processed - or more output produced), Z_STREAM_END if the end of the compressed data has - been reached and all uncompressed output has been produced, Z_NEED_DICT if a - preset dictionary is needed at this point, Z_DATA_ERROR if the input data was - corrupted (input stream not conforming to the zlib format or incorrect check - value), Z_STREAM_ERROR if the stream structure was inconsistent (for example - if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, - Z_BUF_ERROR if no progress is possible or if there was not enough room in the - output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and - inflate() can be called again with more input and more output space to - continue decompressing. If Z_DATA_ERROR is returned, the application may then - call inflateSync() to look for a good compression block if a partial recovery - of the data is desired. -*/ - - -extern int zlib_inflateEnd (z_streamp strm); -/* - All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. - - inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state - was inconsistent. In the error case, msg may be set but then points to a - static string (which must not be deallocated). -*/ - - /* Advanced functions */ - -/* - The following functions are needed only in some special applications. -*/ - -/* -extern int deflateInit2 (z_streamp strm, - int level, - int method, - int windowBits, - int memLevel, - int strategy); - - This is another version of deflateInit with more compression options. The - fields next_in, zalloc, zfree and opaque must be initialized before by - the caller. - - The method parameter is the compression method. It must be Z_DEFLATED in - this version of the library. - - The windowBits parameter is the base two logarithm of the window size - (the size of the history buffer). It should be in the range 8..15 for this - version of the library. Larger values of this parameter result in better - compression at the expense of memory usage. The default value is 15 if - deflateInit is used instead. - - The memLevel parameter specifies how much memory should be allocated - for the internal compression state. memLevel=1 uses minimum memory but - is slow and reduces compression ratio; memLevel=9 uses maximum memory - for optimal speed. The default value is 8. See zconf.h for total memory - usage as a function of windowBits and memLevel. - - The strategy parameter is used to tune the compression algorithm. Use the - value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a - filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no - string match). Filtered data consists mostly of small values with a - somewhat random distribution. In this case, the compression algorithm is - tuned to compress them better. The effect of Z_FILTERED is to force more - Huffman coding and less string matching; it is somewhat intermediate - between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects - the compression ratio but not the correctness of the compressed output even - if it is not set appropriately. - - deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid - method). msg is set to null if there is no error message. deflateInit2 does - not perform any compression: this will be done by deflate(). -*/ - -extern int zlib_deflateReset (z_streamp strm); -/* - This function is equivalent to deflateEnd followed by deflateInit, - but does not free and reallocate all the internal compression state. - The stream will keep the same compression level and any other attributes - that may have been set by deflateInit2. - - deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent (such as zalloc or state being NULL). -*/ - -static inline unsigned long deflateBound(unsigned long s) -{ - return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11; -} - -/* -extern int inflateInit2 (z_streamp strm, int windowBits); - - This is another version of inflateInit with an extra parameter. The - fields next_in, avail_in, zalloc, zfree and opaque must be initialized - before by the caller. - - The windowBits parameter is the base two logarithm of the maximum window - size (the size of the history buffer). It should be in the range 8..15 for - this version of the library. The default value is 15 if inflateInit is used - instead. windowBits must be greater than or equal to the windowBits value - provided to deflateInit2() while compressing, or it must be equal to 15 if - deflateInit2() was not used. If a compressed stream with a larger window - size is given as input, inflate() will return with the error code - Z_DATA_ERROR instead of trying to allocate a larger window. - - windowBits can also be -8..-15 for raw inflate. In this case, -windowBits - determines the window size. inflate() will then process raw deflate data, - not looking for a zlib or gzip header, not generating a check value, and not - looking for any check values for comparison at the end of the stream. This - is for use with other formats that use the deflate compressed data format - such as zip. Those formats provide their own check values. If a custom - format is developed using the raw deflate format for compressed data, it is - recommended that a check value such as an adler32 or a crc32 be applied to - the uncompressed data as is done in the zlib, gzip, and zip formats. For - most applications, the zlib format should be used as is. Note that comments - above on the use in deflateInit2() applies to the magnitude of windowBits. - - windowBits can also be greater than 15 for optional gzip decoding. Add - 32 to windowBits to enable zlib and gzip decoding with automatic header - detection, or add 16 to decode only the gzip format (the zlib format will - return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is - a crc32 instead of an adler32. - - inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg - is set to null if there is no error message. inflateInit2 does not perform - any decompression apart from reading the zlib header if present: this will - be done by inflate(). (So next_in and avail_in may be modified, but next_out - and avail_out are unchanged.) -*/ - -extern int zlib_inflateReset (z_streamp strm); -/* - This function is equivalent to inflateEnd followed by inflateInit, - but does not free and reallocate all the internal decompression state. - The stream will keep attributes that may have been set by inflateInit2. - - inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent (such as zalloc or state being NULL). -*/ - -extern int zlib_inflateIncomp (z_stream *strm); -/* - This function adds the data at next_in (avail_in bytes) to the output - history without performing any output. There must be no pending output, - and the decompressor must be expecting to see the start of a block. - Calling this function is equivalent to decompressing a stored block - containing the data at next_in (except that the data is not output). -*/ +#include -#define zlib_deflateInit(strm, level) \ - zlib_deflateInit2((strm), (level), Z_DEFLATED, MAX_WBITS, \ - DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY) -#define zlib_inflateInit(strm) \ - zlib_inflateInit2((strm), DEF_WBITS) +#define zlib_inflate_workspacesize() 0 +#define zlib_deflate_workspacesize(windowBits, memLevel) 0 -extern int zlib_deflateInit2(z_streamp strm, int level, int method, - int windowBits, int memLevel, - int strategy); -extern int zlib_inflateInit2(z_streamp strm, int windowBits); +#define zlib_inflateInit2 inflateInit2 +#define zlib_inflate inflate -#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL) - struct internal_state {int dummy;}; /* hack for buggy compilers */ -#endif +#define zlib_deflateInit2 deflateInit2 +#define zlib_deflate deflate +#define zlib_deflateEnd deflateEnd -/* Utility function: initialize zlib, unpack binary blob, clean up zlib, - * return len or negative error code. */ -extern int zlib_inflate_blob(void *dst, unsigned dst_sz, const void *src, unsigned src_sz); +#define DEF_MEM_LEVEL 8 #endif /* _ZLIB_H */ diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 01e4b79..06ce021 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -348,12 +348,12 @@ DEFINE_EVENT(bcache_bio, bcache_journal_write, /* Device state changes */ -DEFINE_EVENT(cache_set, bcache_cache_set_read_only, +DEFINE_EVENT(cache_set, fs_read_only, TP_PROTO(struct cache_set *c), TP_ARGS(c) ); -DEFINE_EVENT(cache_set, bcache_cache_set_read_only_done, +DEFINE_EVENT(cache_set, fs_read_only_done, TP_PROTO(struct cache_set *c), TP_ARGS(c) ); @@ -896,7 +896,7 @@ DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc_fail, TP_ARGS(ca, reserve) ); -DECLARE_EVENT_CLASS(cache_set_bucket_alloc, +TRACE_EVENT(bcache_freelist_empty_fail, TP_PROTO(struct cache_set *c, enum alloc_reserve reserve, struct closure *cl), TP_ARGS(c, reserve, cl), @@ -917,12 +917,6 @@ DECLARE_EVENT_CLASS(cache_set_bucket_alloc, __entry->cl) ); -DEFINE_EVENT(cache_set_bucket_alloc, bcache_freelist_empty_fail, - TP_PROTO(struct cache_set *c, enum alloc_reserve reserve, - struct closure *cl), - TP_ARGS(c, reserve, cl) -); - DECLARE_EVENT_CLASS(open_bucket_alloc, TP_PROTO(struct cache_set *c, struct closure *cl), TP_ARGS(c, cl), diff --git a/libbcache.c b/libbcache.c index cc294bd..6908ead 100644 --- a/libbcache.c +++ b/libbcache.c @@ -369,7 +369,7 @@ void bcache_super_print(struct bch_sb *sb, int units) last_mount ? ctime(&last_mount) : "(never)", BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR - ? bch_cache_state[BCH_MEMBER_STATE(m)] + ? bch_dev_state[BCH_MEMBER_STATE(m)] : "unknown", BCH_MEMBER_TIER(m), diff --git a/libbcache/alloc.c b/libbcache/alloc.c index cd22c38..8cb3194 100644 --- a/libbcache/alloc.c +++ b/libbcache/alloc.c @@ -78,7 +78,7 @@ static void __bch_bucket_free(struct cache *, struct bucket *); /* Allocation groups: */ -void bch_cache_group_remove_cache(struct cache_group *grp, struct cache *ca) +void bch_dev_group_remove(struct cache_group *grp, struct cache *ca) { unsigned i; @@ -96,7 +96,7 @@ void bch_cache_group_remove_cache(struct cache_group *grp, struct cache *ca) spin_unlock(&grp->lock); } -void bch_cache_group_add_cache(struct cache_group *grp, struct cache *ca) +void bch_dev_group_add(struct cache_group *grp, struct cache *ca) { unsigned i; @@ -318,7 +318,7 @@ static int bch_prio_write(struct cache *ca) bucket_bytes(ca) - sizeof(p->csum)); ret = prio_io(ca, r, REQ_OP_WRITE); - if (cache_fatal_io_err_on(ret, ca, + if (bch_dev_fatal_io_err_on(ret, ca, "prio write to bucket %zu", r) || bch_meta_write_fault("prio")) return ret; @@ -400,7 +400,7 @@ int bch_prio_read(struct cache *ca) bucket_nr++; ret = prio_io(ca, bucket, REQ_OP_READ); - if (cache_fatal_io_err_on(ret, ca, + if (bch_dev_fatal_io_err_on(ret, ca, "prior read from bucket %llu", bucket) || bch_meta_read_fault("prio")) @@ -1724,7 +1724,7 @@ static bool bch_dev_has_open_write_point(struct cache *ca) } /* device goes ro: */ -void bch_cache_allocator_stop(struct cache *ca) +void bch_dev_allocator_stop(struct cache *ca) { struct cache_set *c = ca->set; struct cache_group *tier = &c->cache_tiers[ca->mi.tier]; @@ -1736,8 +1736,8 @@ void bch_cache_allocator_stop(struct cache *ca) /* First, remove device from allocation groups: */ - bch_cache_group_remove_cache(tier, ca); - bch_cache_group_remove_cache(&c->cache_all, ca); + bch_dev_group_remove(tier, ca); + bch_dev_group_remove(&c->cache_all, ca); bch_recalc_capacity(c); @@ -1805,7 +1805,7 @@ void bch_cache_allocator_stop(struct cache *ca) /* * Startup the allocator thread for transition to RW mode: */ -int bch_cache_allocator_start(struct cache *ca) +int bch_dev_allocator_start(struct cache *ca) { struct cache_set *c = ca->set; struct cache_group *tier = &c->cache_tiers[ca->mi.tier]; @@ -1824,8 +1824,8 @@ int bch_cache_allocator_start(struct cache *ca) get_task_struct(k); ca->alloc_thread = k; - bch_cache_group_add_cache(tier, ca); - bch_cache_group_add_cache(&c->cache_all, ca); + bch_dev_group_add(tier, ca); + bch_dev_group_add(&c->cache_all, ca); bch_recalc_capacity(c); diff --git a/libbcache/alloc.h b/libbcache/alloc.h index ac83e4f..09139a5 100644 --- a/libbcache/alloc.h +++ b/libbcache/alloc.h @@ -20,8 +20,8 @@ static inline size_t prio_buckets(const struct cache *ca) return DIV_ROUND_UP((size_t) (ca)->mi.nbuckets, prios_per_bucket(ca)); } -void bch_cache_group_remove_cache(struct cache_group *, struct cache *); -void bch_cache_group_add_cache(struct cache_group *, struct cache *); +void bch_dev_group_remove(struct cache_group *, struct cache *); +void bch_dev_group_add(struct cache_group *, struct cache *); int bch_prio_read(struct cache *); @@ -103,8 +103,8 @@ static inline struct cache *cache_group_next(struct cache_group *devs, ((_ca) = __open_bucket_next_online_device(_c, _ob, _ptr, _ca));\ (_ptr)++) -void bch_cache_allocator_stop(struct cache *); -int bch_cache_allocator_start(struct cache *); +void bch_dev_allocator_stop(struct cache *); +int bch_dev_allocator_start(struct cache *); void bch_open_buckets_init(struct cache_set *); #endif /* _BCACHE_ALLOC_H */ diff --git a/libbcache/bcache.h b/libbcache/bcache.h index 8a0262f..babc08d 100644 --- a/libbcache/bcache.h +++ b/libbcache/bcache.h @@ -203,8 +203,8 @@ #include -#define cache_set_init_fault(name) \ - dynamic_fault("bcache:cache_set_init:" name) +#define bch_fs_init_fault(name) \ + dynamic_fault("bcache:bch_fs_init:" name) #define bch_meta_read_fault(name) \ dynamic_fault("bcache:meta:read:" name) #define bch_meta_write_fault(name) \ @@ -349,8 +349,8 @@ struct cache_member_rcu { /* cache->flags: */ enum { - CACHE_DEV_REMOVING, - CACHE_DEV_FORCE_REMOVE, + BCH_DEV_REMOVING, + BCH_DEV_FORCE_REMOVE, }; struct cache { @@ -367,7 +367,7 @@ struct cache { u8 dev_idx; /* * Cached version of this device's member info from superblock - * Committed by bch_write_super() -> bch_cache_set_mi_update() + * Committed by bch_write_super() -> bch_fs_mi_update() */ struct cache_member_cpu mi; uuid_le uuid; @@ -461,34 +461,34 @@ struct cache { * Flag bits for what phase of startup/shutdown the cache set is at, how we're * shutting down, etc.: * - * CACHE_SET_UNREGISTERING means we're not just shutting down, we're detaching + * BCH_FS_UNREGISTERING means we're not just shutting down, we're detaching * all the backing devices first (their cached data gets invalidated, and they * won't automatically reattach). * - * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; - * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. + * BCH_FS_STOPPING always gets set first when we're closing down a cache set; + * we'll continue to run normally for awhile with BCH_FS_STOPPING set (i.e. * flushing dirty data). * - * CACHE_SET_RUNNING means all cache devices have been registered and journal + * BCH_FS_RUNNING means all cache devices have been registered and journal * replay is complete. */ enum { /* Startup: */ - CACHE_SET_INITIAL_GC_DONE, - CACHE_SET_RUNNING, + BCH_FS_INITIAL_GC_DONE, + BCH_FS_RUNNING, /* Shutdown: */ - CACHE_SET_UNREGISTERING, - CACHE_SET_STOPPING, - CACHE_SET_RO, - CACHE_SET_RO_COMPLETE, - CACHE_SET_EMERGENCY_RO, - CACHE_SET_WRITE_DISABLE_COMPLETE, - CACHE_SET_GC_STOPPING, - CACHE_SET_GC_FAILURE, - CACHE_SET_BDEV_MOUNTED, - CACHE_SET_ERROR, - CACHE_SET_FSCK_FIXED_ERRORS, + BCH_FS_DETACHING, + BCH_FS_STOPPING, + BCH_FS_RO, + BCH_FS_RO_COMPLETE, + BCH_FS_EMERGENCY_RO, + BCH_FS_WRITE_DISABLE_COMPLETE, + BCH_FS_GC_STOPPING, + BCH_FS_GC_FAILURE, + BCH_FS_BDEV_MOUNTED, + BCH_FS_ERROR, + BCH_FS_FSCK_FIXED_ERRORS, }; struct btree_debug { @@ -520,11 +520,11 @@ struct cache_set { struct cache __rcu *cache[BCH_SB_MEMBERS_MAX]; - struct cache_set_opts opts; + struct bch_opts opts; /* * Cached copy in native endianness: - * Set by bch_cache_set_mi_update(): + * Set by bch_fs_mi_update(): */ struct cache_member_rcu __rcu *members; diff --git a/libbcache/bkey_methods.c b/libbcache/bkey_methods.c index 90f7e5f..5ae97e3 100644 --- a/libbcache/bkey_methods.c +++ b/libbcache/bkey_methods.c @@ -80,7 +80,7 @@ void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k) char buf[160]; bch_bkey_val_to_text(c, type, buf, sizeof(buf), k); - cache_set_bug(c, "invalid bkey %s: %s", buf, invalid); + bch_fs_bug(c, "invalid bkey %s: %s", buf, invalid); return; } diff --git a/libbcache/blockdev.c b/libbcache/blockdev.c index d3a373c..82b07f5 100644 --- a/libbcache/blockdev.c +++ b/libbcache/blockdev.c @@ -17,7 +17,7 @@ static int bch_blockdev_major; static DEFINE_IDA(bch_blockdev_minor); static LIST_HEAD(uncached_devices); -struct kmem_cache *bch_search_cache; +static struct kmem_cache *bch_search_cache; static void write_bdev_super_endio(struct bio *bio) { @@ -67,7 +67,7 @@ bool bch_is_open_backing_dev(struct block_device *bdev) struct cache_set *c, *tc; struct cached_dev *dc, *t; - list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + list_for_each_entry_safe(c, tc, &bch_fs_list, list) list_for_each_entry_safe(dc, t, &c->cached_devs, list) if (dc->disk_sb.bdev == bdev) return true; @@ -387,10 +387,10 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) return -EINVAL; } - if (!test_bit(CACHE_SET_RUNNING, &c->flags)) + if (!test_bit(BCH_FS_RUNNING, &c->flags)) return 0; - if (test_bit(CACHE_SET_STOPPING, &c->flags)) { + if (test_bit(BCH_FS_STOPPING, &c->flags)) { pr_err("Can't attach %s: shutting down", buf); return -EINVAL; } @@ -652,7 +652,7 @@ const char *bch_backing_dev_register(struct bcache_superblock *sb) bdevname(dc->disk_sb.bdev, name)); list_add(&dc->list, &uncached_devices); - list_for_each_entry(c, &bch_cache_sets, list) + list_for_each_entry(c, &bch_fs_list, list) bch_cached_dev_attach(dc, c); if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE || @@ -742,7 +742,7 @@ int bch_blockdev_volumes_start(struct cache_set *c) struct bkey_s_c_inode_blockdev inode; int ret = 0; - if (test_bit(CACHE_SET_STOPPING, &c->flags)) + if (test_bit(BCH_FS_STOPPING, &c->flags)) return -EINVAL; for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) { @@ -799,7 +799,7 @@ void bch_blockdevs_stop(struct cache_set *c) d = radix_tree_deref_slot(slot); if (CACHED_DEV(&d->inode.v) && - test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { + test_bit(BCH_FS_DETACHING, &c->flags)) { dc = container_of(d, struct cached_dev, disk); bch_cached_dev_detach(dc); } else { @@ -811,6 +811,16 @@ void bch_blockdevs_stop(struct cache_set *c) mutex_unlock(&bch_register_lock); } +void bch_fs_blockdev_exit(struct cache_set *c) +{ + mempool_exit(&c->search); +} + +int bch_fs_blockdev_init(struct cache_set *c) +{ + return mempool_init_slab_pool(&c->search, 1, bch_search_cache); +} + void bch_blockdev_exit(void) { kmem_cache_destroy(bch_search_cache); diff --git a/libbcache/blockdev.h b/libbcache/blockdev.h index 0fc0ed1..aa6c12b 100644 --- a/libbcache/blockdev.h +++ b/libbcache/blockdev.h @@ -4,6 +4,49 @@ #include "blockdev_types.h" #include "io_types.h" +struct search { + /* Stack frame for bio_complete */ + struct closure cl; + + union { + struct bch_read_bio rbio; + struct bch_write_bio wbio; + }; + /* Not modified */ + struct bio *orig_bio; + struct bcache_device *d; + + unsigned inode; + unsigned write:1; + + /* Flags only used for reads */ + unsigned recoverable:1; + unsigned read_dirty_data:1; + unsigned cache_miss:1; + + /* + * For reads: bypass read from cache and insertion into cache + * For writes: discard key range from cache, sending the write to + * the backing device (if there is a backing device) + */ + unsigned bypass:1; + + unsigned long start_time; + + /* + * Mostly only used for writes. For reads, we still make use of + * some trivial fields: + * - c + * - error + */ + struct bch_write_op iop; +}; + +#ifndef NO_BCACHE_BLOCKDEV + +extern struct kobj_type bch_cached_dev_ktype; +extern struct kobj_type bch_blockdev_volume_ktype; + void bch_write_bdev_super(struct cached_dev *, struct closure *); void bch_cached_dev_release(struct kobject *); @@ -24,9 +67,49 @@ int bch_blockdev_volumes_start(struct cache_set *); void bch_blockdevs_stop(struct cache_set *); +void bch_fs_blockdev_exit(struct cache_set *); +int bch_fs_blockdev_init(struct cache_set *); void bch_blockdev_exit(void); int bch_blockdev_init(void); +#else + +static inline void bch_write_bdev_super(struct cached_dev *dc, + struct closure *cl) {} + +static inline void bch_cached_dev_release(struct kobject *kobj) {} +static inline void bch_blockdev_volume_release(struct kobject *kobj) {} + +static inline int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) +{ + return 0; +} +static inline void bch_attach_backing_devs(struct cache_set *c) {} + +static inline void bch_cached_dev_detach(struct cached_dev *dc) {} +static inline void bch_cached_dev_run(struct cached_dev *dc) {} +static inline void bch_blockdev_stop(struct bcache_device *d) {} + +static inline bool bch_is_open_backing_dev(struct block_device *bdev) +{ + return false; +} +static inline const char *bch_backing_dev_register(struct bcache_superblock *sb) +{ + return "not implemented"; +} + +static inline int bch_blockdev_volume_create(struct cache_set *c, u64 s) { return 0; } +static inline int bch_blockdev_volumes_start(struct cache_set *c) { return 0; } + +static inline void bch_blockdevs_stop(struct cache_set *c) {} +static inline void bch_fs_blockdev_exit(struct cache_set *c) {} +static inline int bch_fs_blockdev_init(struct cache_set *c) { return 0; } +static inline void bch_blockdev_exit(void) {} +static inline int bch_blockdev_init(void) { return 0; } + +#endif + static inline void cached_dev_put(struct cached_dev *dc) { if (atomic_dec_and_test(&dc->count)) @@ -53,47 +136,4 @@ static inline struct bcache_device *bch_dev_find(struct cache_set *c, u64 inode) return radix_tree_lookup(&c->devices, inode); } -struct search { - /* Stack frame for bio_complete */ - struct closure cl; - - union { - struct bch_read_bio rbio; - struct bch_write_bio wbio; - }; - /* Not modified */ - struct bio *orig_bio; - struct bcache_device *d; - - unsigned inode; - unsigned write:1; - - /* Flags only used for reads */ - unsigned recoverable:1; - unsigned read_dirty_data:1; - unsigned cache_miss:1; - - /* - * For reads: bypass read from cache and insertion into cache - * For writes: discard key range from cache, sending the write to - * the backing device (if there is a backing device) - */ - unsigned bypass:1; - - unsigned long start_time; - - /* - * Mostly only used for writes. For reads, we still make use of - * some trivial fields: - * - c - * - error - */ - struct bch_write_op iop; -}; - -extern struct kmem_cache *bch_search_cache; - -extern struct kobj_type bch_cached_dev_ktype; -extern struct kobj_type bch_blockdev_volume_ktype; - #endif /* _BCACHE_BLOCKDEV_H */ diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c index 5c77b26..0eb7290 100644 --- a/libbcache/btree_gc.c +++ b/libbcache/btree_gc.c @@ -54,8 +54,7 @@ static void btree_node_range_checks(struct cache_set *c, struct btree *b, ? btree_type_successor(b->btree_id, l->max) : l->max; - cache_set_inconsistent_on(bkey_cmp(b->data->min_key, - expected_min), c, + bch_fs_inconsistent_on(bkey_cmp(b->data->min_key, expected_min), c, "btree node has incorrect min key: %llu:%llu != %llu:%llu", b->data->min_key.inode, b->data->min_key.offset, @@ -67,16 +66,14 @@ static void btree_node_range_checks(struct cache_set *c, struct btree *b, if (b->level > r->depth) { l = &r->l[b->level - 1]; - cache_set_inconsistent_on(bkey_cmp(b->data->min_key, - l->min), c, + bch_fs_inconsistent_on(bkey_cmp(b->data->min_key, l->min), c, "btree node min doesn't match min of child nodes: %llu:%llu != %llu:%llu", b->data->min_key.inode, b->data->min_key.offset, l->min.inode, l->min.offset); - cache_set_inconsistent_on(bkey_cmp(b->data->max_key, - l->max), c, + bch_fs_inconsistent_on(bkey_cmp(b->data->max_key, l->max), c, "btree node max doesn't match max of child nodes: %llu:%llu != %llu:%llu", b->data->max_key.inode, b->data->max_key.offset, @@ -308,7 +305,7 @@ static void bch_mark_pending_btree_node_frees(struct cache_set *c) &stats); /* * Don't apply stats - pending deletes aren't tracked in - * cache_set_stats: + * bch_alloc_stats: */ mutex_unlock(&c->btree_interior_update_lock); @@ -345,7 +342,7 @@ void bch_gc(struct cache_set *c) * uses, GC could skip past them */ - if (test_bit(CACHE_SET_GC_FAILURE, &c->flags)) + if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) return; trace_bcache_gc_start(c); @@ -410,7 +407,7 @@ void bch_gc(struct cache_set *c) if (ret) { bch_err(c, "btree gc failed: %d", ret); - set_bit(CACHE_SET_GC_FAILURE, &c->flags); + set_bit(BCH_FS_GC_FAILURE, &c->flags); up_write(&c->gc_lock); return; } @@ -725,7 +722,7 @@ static int bch_coalesce_btree(struct cache_set *c, enum btree_id btree_id) lock_seq[0] = merge[0]->lock.state.seq; - if (test_bit(CACHE_SET_GC_STOPPING, &c->flags)) { + if (test_bit(BCH_FS_GC_STOPPING, &c->flags)) { bch_btree_iter_unlock(&iter); return -ESHUTDOWN; } @@ -756,7 +753,7 @@ void bch_coalesce(struct cache_set *c) if (btree_gc_coalesce_disabled(c)) return; - if (test_bit(CACHE_SET_GC_FAILURE, &c->flags)) + if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) return; down_read(&c->gc_lock); @@ -771,7 +768,7 @@ void bch_coalesce(struct cache_set *c) if (ret) { if (ret != -ESHUTDOWN) bch_err(c, "btree coalescing failed: %d", ret); - set_bit(CACHE_SET_GC_FAILURE, &c->flags); + set_bit(BCH_FS_GC_FAILURE, &c->flags); return; } } @@ -824,7 +821,7 @@ static int bch_gc_thread(void *arg) void bch_gc_thread_stop(struct cache_set *c) { - set_bit(CACHE_SET_GC_STOPPING, &c->flags); + set_bit(BCH_FS_GC_STOPPING, &c->flags); if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); @@ -832,7 +829,7 @@ void bch_gc_thread_stop(struct cache_set *c) int bch_gc_thread_start(struct cache_set *c) { - clear_bit(CACHE_SET_GC_STOPPING, &c->flags); + clear_bit(BCH_FS_GC_STOPPING, &c->flags); c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc"); if (IS_ERR(c->gc_thread)) @@ -903,7 +900,7 @@ int bch_initial_gc(struct cache_set *c, struct list_head *journal) bch_mark_metadata(c); gc_pos_set(c, gc_phase(GC_PHASE_DONE)); - set_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); return 0; } diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c index e772c6a..ab67591 100644 --- a/libbcache/btree_io.c +++ b/libbcache/btree_io.c @@ -873,7 +873,7 @@ static void bset_encrypt(struct cache_set *c, struct bset *i, struct nonce nonce } #define btree_node_error(b, c, ptr, fmt, ...) \ - cache_set_inconsistent(c, \ + bch_fs_inconsistent(c, \ "btree node error at btree %u level %u/%u bucket %zu block %u u64s %u: " fmt,\ (b)->btree_id, (b)->level, btree_node_root(c, b) \ ? btree_node_root(c, b)->level : -1, \ @@ -1194,8 +1194,8 @@ void bch_btree_node_read(struct cache_set *c, struct btree *b) closure_init_stack(&cl); pick = bch_btree_pick_ptr(c, b); - if (cache_set_fatal_err_on(!pick.ca, c, - "no cache device for btree node")) { + if (bch_fs_fatal_err_on(!pick.ca, c, + "no cache device for btree node")) { set_btree_node_read_error(b); return; } @@ -1214,7 +1214,7 @@ void bch_btree_node_read(struct cache_set *c, struct btree *b) bch_generic_make_request(bio, c); closure_sync(&cl); - if (cache_fatal_io_err_on(bio->bi_error, + if (bch_dev_fatal_io_err_on(bio->bi_error, pick.ca, "IO error reading bucket %zu", PTR_BUCKET_NR(pick.ca, &pick.ptr)) || bch_meta_read_fault("btree")) { @@ -1297,7 +1297,7 @@ static void btree_node_write_endio(struct bio *bio) struct closure *cl = !wbio->split ? wbio->cl : NULL; struct cache *ca = wbio->ca; - if (cache_fatal_io_err_on(bio->bi_error, ca, "btree write") || + if (bch_dev_fatal_io_err_on(bio->bi_error, ca, "btree write") || bch_meta_write_fault("btree")) set_btree_node_write_error(b); diff --git a/libbcache/btree_update.c b/libbcache/btree_update.c index c3bb209..95d127f 100644 --- a/libbcache/btree_update.c +++ b/libbcache/btree_update.c @@ -112,7 +112,7 @@ found: d->index_update_done = true; /* - * Btree nodes are accounted as freed in cache_set_stats when they're + * Btree nodes are accounted as freed in bch_alloc_stats when they're * freed from the index: */ stats->s[S_COMPRESSED][S_META] -= c->sb.btree_node_size; @@ -149,7 +149,7 @@ found: &tmp, 0); /* * Don't apply tmp - pending deletes aren't tracked in - * cache_set_stats: + * bch_alloc_stats: */ } @@ -218,7 +218,7 @@ static void bch_btree_node_free_ondisk(struct cache_set *c, &stats, 0); /* * Don't apply stats - pending deletes aren't tracked in - * cache_set_stats: + * bch_alloc_stats: */ } @@ -384,8 +384,8 @@ static void bch_btree_set_root_inmem(struct cache_set *c, struct btree *b, bch_btree_node_free_index(c, NULL, old->btree_id, bkey_i_to_s_c(&old->key), &stats); - bch_cache_set_stats_apply(c, &stats, &btree_reserve->disk_res, - gc_pos_btree_root(b->btree_id)); + bch_fs_stats_apply(c, &stats, &btree_reserve->disk_res, + gc_pos_btree_root(b->btree_id)); } bch_recalc_btree_reserve(c); @@ -654,7 +654,7 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter, bkey_disassemble(b, k, &tmp), &stats); - bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b)); + bch_fs_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b)); bch_btree_bset_insert_key(iter, b, node_iter, insert); set_btree_node_dirty(b); diff --git a/libbcache/buckets.c b/libbcache/buckets.c index 757bc03..315cfbe 100644 --- a/libbcache/buckets.c +++ b/libbcache/buckets.c @@ -75,7 +75,7 @@ #define lg_local_lock lg_global_lock #define lg_local_unlock lg_global_unlock -static void bch_cache_set_stats_verify(struct cache_set *c) +static void bch_fs_stats_verify(struct cache_set *c) { struct bucket_stats_cache_set stats = __bch_bucket_stats_read_cache_set(c); @@ -98,7 +98,7 @@ static void bch_cache_set_stats_verify(struct cache_set *c) #else -static void bch_cache_set_stats_verify(struct cache_set *c) {} +static void bch_fs_stats_verify(struct cache_set *c) {} #endif @@ -199,10 +199,10 @@ static inline int is_cached_bucket(struct bucket_mark m) return !m.owned_by_allocator && !m.dirty_sectors && !!m.cached_sectors; } -void bch_cache_set_stats_apply(struct cache_set *c, - struct bucket_stats_cache_set *stats, - struct disk_reservation *disk_res, - struct gc_pos gc_pos) +void bch_fs_stats_apply(struct cache_set *c, + struct bucket_stats_cache_set *stats, + struct disk_reservation *disk_res, + struct gc_pos gc_pos) { s64 added = stats->s[S_COMPRESSED][S_META] + @@ -230,7 +230,7 @@ void bch_cache_set_stats_apply(struct cache_set *c, if (!gc_will_visit(c, gc_pos)) bucket_stats_add(this_cpu_ptr(c->bucket_stats_percpu), stats); - bch_cache_set_stats_verify(c); + bch_fs_stats_verify(c); lg_local_unlock(&c->bucket_stats_lock); memset(stats, 0, sizeof(*stats)); @@ -239,7 +239,7 @@ void bch_cache_set_stats_apply(struct cache_set *c, static void bucket_stats_update(struct cache *ca, struct bucket_mark old, struct bucket_mark new, bool may_make_unavailable, - struct bucket_stats_cache_set *cache_set_stats) + struct bucket_stats_cache_set *bch_alloc_stats) { struct cache_set *c = ca->set; struct bucket_stats_cache *cache_stats; @@ -249,15 +249,15 @@ static void bucket_stats_update(struct cache *ca, !is_available_bucket(new) && c->gc_pos.phase == GC_PHASE_DONE); - if (cache_set_stats) { - cache_set_stats->s[S_COMPRESSED][S_CACHED] += + if (bch_alloc_stats) { + bch_alloc_stats->s[S_COMPRESSED][S_CACHED] += (int) new.cached_sectors - (int) old.cached_sectors; - cache_set_stats->s[S_COMPRESSED] + bch_alloc_stats->s[S_COMPRESSED] [old.is_metadata ? S_META : S_DIRTY] -= old.dirty_sectors; - cache_set_stats->s[S_COMPRESSED] + bch_alloc_stats->s[S_COMPRESSED] [new.is_metadata ? S_META : S_DIRTY] += new.dirty_sectors; } @@ -312,7 +312,7 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g) * Ick: * * Only stats.sectors_cached should be nonzero: this is important - * because in this path we modify cache_set_stats based on how the + * because in this path we modify bch_alloc_stats based on how the * bucket_mark was modified, and the sector counts in bucket_mark are * subject to (saturating) overflow - and if they did overflow, the * cache set stats will now be off. We can tolerate this for @@ -620,13 +620,13 @@ void bch_mark_key(struct cache_set *c, struct bkey_s_c k, __bch_mark_key(c, k, sectors, metadata, false, stats, gc_will_visit(c, gc_pos), journal_seq); - bch_cache_set_stats_verify(c); + bch_fs_stats_verify(c); lg_local_unlock(&c->bucket_stats_lock); } static u64 __recalc_sectors_available(struct cache_set *c) { - return c->capacity - cache_set_sectors_used(c); + return c->capacity - bch_fs_sectors_used(c); } /* Used by gc when it's starting: */ @@ -653,7 +653,7 @@ void bch_disk_reservation_put(struct cache_set *c, this_cpu_sub(c->bucket_stats_percpu->online_reserved, res->sectors); - bch_cache_set_stats_verify(c); + bch_fs_stats_verify(c); lg_local_unlock(&c->bucket_stats_lock); res->sectors = 0; @@ -697,7 +697,7 @@ out: stats->online_reserved += sectors; res->sectors += sectors; - bch_cache_set_stats_verify(c); + bch_fs_stats_verify(c); lg_local_unlock(&c->bucket_stats_lock); return 0; @@ -734,7 +734,7 @@ recalculate: ret = -ENOSPC; } - bch_cache_set_stats_verify(c); + bch_fs_stats_verify(c); lg_global_unlock(&c->bucket_stats_lock); if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD)) up_read(&c->gc_lock); diff --git a/libbcache/buckets.h b/libbcache/buckets.h index 8194dd9..9c6e438 100644 --- a/libbcache/buckets.h +++ b/libbcache/buckets.h @@ -195,12 +195,12 @@ static inline u64 buckets_free_cache(struct cache *ca) struct bucket_stats_cache_set __bch_bucket_stats_read_cache_set(struct cache_set *); struct bucket_stats_cache_set bch_bucket_stats_read_cache_set(struct cache_set *); -void bch_cache_set_stats_apply(struct cache_set *, - struct bucket_stats_cache_set *, - struct disk_reservation *, +void bch_fs_stats_apply(struct cache_set *, + struct bucket_stats_cache_set *, + struct disk_reservation *, struct gc_pos); -static inline u64 __cache_set_sectors_used(struct cache_set *c) +static inline u64 __bch_fs_sectors_used(struct cache_set *c) { struct bucket_stats_cache_set stats = __bch_bucket_stats_read_cache_set(c); u64 reserved = stats.persistent_reserved + @@ -212,9 +212,9 @@ static inline u64 __cache_set_sectors_used(struct cache_set *c) (reserved >> 7); } -static inline u64 cache_set_sectors_used(struct cache_set *c) +static inline u64 bch_fs_sectors_used(struct cache_set *c) { - return min(c->capacity, __cache_set_sectors_used(c)); + return min(c->capacity, __bch_fs_sectors_used(c)); } /* XXX: kill? */ diff --git a/libbcache/chardev.c b/libbcache/chardev.c index b361b09..b142d7b 100644 --- a/libbcache/chardev.c +++ b/libbcache/chardev.c @@ -53,9 +53,7 @@ static long bch_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) } } - err = bch_register_cache_set(devs, arg.nr_devs, - cache_set_opts_empty(), - NULL); + err = bch_fs_open(devs, arg.nr_devs, bch_opts_empty(), NULL); if (err) { pr_err("Could not register cache set: %s", err); ret = -EINVAL; @@ -84,7 +82,7 @@ static long bch_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg) if (!path) return -ENOMEM; - err = bch_register_one(path); + err = bch_fs_open_incremental(path); kfree(path); if (err) { @@ -109,7 +107,7 @@ static long bch_global_ioctl(unsigned cmd, void __user *arg) static long bch_ioctl_stop(struct cache_set *c) { - bch_cache_set_stop(c); + bch_fs_stop(c); return 0; } @@ -127,7 +125,7 @@ static long bch_ioctl_disk_add(struct cache_set *c, if (!path) return -ENOMEM; - ret = bch_cache_set_add_cache(c, path); + ret = bch_dev_add(c, path); kfree(path); return ret; @@ -175,7 +173,7 @@ static long bch_ioctl_disk_remove(struct cache_set *c, if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch_cache_remove(ca, arg.flags & BCH_FORCE_IF_DATA_MISSING) + ret = bch_dev_remove(ca, arg.flags & BCH_FORCE_IF_DATA_MISSING) ? 0 : -EBUSY; percpu_ref_put(&ca->ref); @@ -197,7 +195,7 @@ static long bch_ioctl_disk_fail(struct cache_set *c, return PTR_ERR(ca); /* XXX: failed not actually implemented yet */ - ret = bch_cache_remove(ca, true); + ret = bch_dev_remove(ca, true); percpu_ref_put(&ca->ref); return ret; @@ -268,7 +266,7 @@ static long bch_ioctl_query_uuid(struct cache_set *c, sizeof(c->sb.user_uuid)); } -long bch_cache_set_ioctl(struct cache_set *c, unsigned cmd, void __user *arg) +long bch_fs_ioctl(struct cache_set *c, unsigned cmd, void __user *arg) { /* ioctls that don't require admin cap: */ switch (cmd) { @@ -309,12 +307,71 @@ static long bch_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v) void __user *arg = (void __user *) v; return c - ? bch_cache_set_ioctl(c, cmd, arg) + ? bch_fs_ioctl(c, cmd, arg) : bch_global_ioctl(cmd, arg); } -const struct file_operations bch_chardev_fops = { +static const struct file_operations bch_chardev_fops = { .owner = THIS_MODULE, .unlocked_ioctl = bch_chardev_ioctl, .open = nonseekable_open, }; + +static int bch_chardev_major; +static struct class *bch_chardev_class; +static struct device *bch_chardev; +static DEFINE_IDR(bch_chardev_minor); + +void bch_fs_chardev_exit(struct cache_set *c) +{ + if (!IS_ERR_OR_NULL(c->chardev)) + device_unregister(c->chardev); + if (c->minor >= 0) + idr_remove(&bch_chardev_minor, c->minor); +} + +int bch_fs_chardev_init(struct cache_set *c) +{ + c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL); + if (c->minor < 0) + return c->minor; + + c->chardev = device_create(bch_chardev_class, NULL, + MKDEV(bch_chardev_major, c->minor), NULL, + "bcache%u-ctl", c->minor); + if (IS_ERR(c->chardev)) + return PTR_ERR(c->chardev); + + return 0; +} + +void bch_chardev_exit(void) +{ + if (!IS_ERR_OR_NULL(bch_chardev_class)) + device_destroy(bch_chardev_class, + MKDEV(bch_chardev_major, 0)); + if (!IS_ERR_OR_NULL(bch_chardev_class)) + class_destroy(bch_chardev_class); + if (bch_chardev_major > 0) + unregister_chrdev(bch_chardev_major, "bcache"); + +} + +int __init bch_chardev_init(void) +{ + bch_chardev_major = register_chrdev(0, "bcache-ctl", &bch_chardev_fops); + if (bch_chardev_major < 0) + return bch_chardev_major; + + bch_chardev_class = class_create(THIS_MODULE, "bcache"); + if (IS_ERR(bch_chardev_class)) + return PTR_ERR(bch_chardev_class); + + bch_chardev = device_create(bch_chardev_class, NULL, + MKDEV(bch_chardev_major, 255), + NULL, "bcache-ctl"); + if (IS_ERR(bch_chardev)) + return PTR_ERR(bch_chardev); + + return 0; +} diff --git a/libbcache/chardev.h b/libbcache/chardev.h index 657bf2b..15310c1 100644 --- a/libbcache/chardev.h +++ b/libbcache/chardev.h @@ -1,7 +1,30 @@ #ifndef _BCACHE_CHARDEV_H #define _BCACHE_CHARDEV_H -long bch_cache_set_ioctl(struct cache_set *, unsigned, void __user *); -extern const struct file_operations bch_chardev_fops; +#ifndef NO_BCACHE_CHARDEV + +long bch_fs_ioctl(struct cache_set *, unsigned, void __user *); + +void bch_fs_chardev_exit(struct cache_set *); +int bch_fs_chardev_init(struct cache_set *); + +void bch_chardev_exit(void); +int __init bch_chardev_init(void); + +#else + +static inline long bch_fs_ioctl(struct cache_set *c, + unsigned cmd, void __user * arg) +{ + return -ENOSYS; +} + +static inline void bch_fs_chardev_exit(struct cache_set *c) {} +static inline int bch_fs_chardev_init(struct cache_set *c) { return 0; } + +static inline void bch_chardev_exit(void) {} +static inline int __init bch_chardev_init(void) { return 0; } + +#endif #endif /* _BCACHE_CHARDEV_H */ diff --git a/libbcache/checksum.c b/libbcache/checksum.c index eb41f2e..dae52d4 100644 --- a/libbcache/checksum.c +++ b/libbcache/checksum.c @@ -560,7 +560,7 @@ err: return ret; } -void bch_cache_set_encryption_free(struct cache_set *c) +void bch_fs_encryption_free(struct cache_set *c) { if (!IS_ERR_OR_NULL(c->poly1305)) crypto_free_shash(c->poly1305); @@ -568,7 +568,7 @@ void bch_cache_set_encryption_free(struct cache_set *c) crypto_free_blkcipher(c->chacha20); } -int bch_cache_set_encryption_init(struct cache_set *c) +int bch_fs_encryption_init(struct cache_set *c) { struct bch_sb_field_crypt *crypt; struct bch_key key; diff --git a/libbcache/checksum.h b/libbcache/checksum.h index a9a1758..137c915 100644 --- a/libbcache/checksum.h +++ b/libbcache/checksum.h @@ -43,8 +43,8 @@ void bch_encrypt_bio(struct cache_set *, unsigned, int bch_disable_encryption(struct cache_set *); int bch_enable_encryption(struct cache_set *, bool); -void bch_cache_set_encryption_free(struct cache_set *); -int bch_cache_set_encryption_init(struct cache_set *); +void bch_fs_encryption_free(struct cache_set *); +int bch_fs_encryption_init(struct cache_set *); static inline unsigned bch_data_checksum_type(struct cache_set *c) { diff --git a/libbcache/compress.c b/libbcache/compress.c index e76850b..f81a814 100644 --- a/libbcache/compress.c +++ b/libbcache/compress.c @@ -119,6 +119,13 @@ static void bio_unmap_or_unbounce(struct cache_set *c, void *data, } } +static inline void zlib_set_workspace(z_stream *strm, void *workspace) +{ +#ifdef __KERNEL__ + strm->workspace = workspace; +#endif +} + static int __bio_uncompress(struct cache_set *c, struct bio *src, void *dst_data, struct bch_extent_crc128 crc) { @@ -150,11 +157,11 @@ static int __bio_uncompress(struct cache_set *c, struct bio *src, workspace = c->zlib_workspace; } - strm.workspace = workspace; strm.next_in = src_data; strm.avail_in = src_len; strm.next_out = dst_data; strm.avail_out = dst_len; + zlib_set_workspace(&strm, workspace); zlib_inflateInit2(&strm, -MAX_WBITS); ret = zlib_inflate(&strm, Z_FINISH); @@ -310,12 +317,12 @@ static int __bio_compress(struct cache_set *c, workspace = c->zlib_workspace; } - strm.workspace = workspace; strm.next_in = src_data; strm.avail_in = min(src->bi_iter.bi_size, dst->bi_iter.bi_size); strm.next_out = dst_data; strm.avail_out = dst->bi_iter.bi_size; + zlib_set_workspace(&strm, workspace); zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); diff --git a/libbcache/error.c b/libbcache/error.c index 9ba33ef..9f39be1 100644 --- a/libbcache/error.c +++ b/libbcache/error.c @@ -6,19 +6,19 @@ void bch_inconsistent_error(struct cache_set *c) { - set_bit(CACHE_SET_ERROR, &c->flags); + set_bit(BCH_FS_ERROR, &c->flags); switch (c->opts.errors) { case BCH_ON_ERROR_CONTINUE: break; case BCH_ON_ERROR_RO: - if (!test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags)) { + if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { /* XXX do something better here? */ - bch_cache_set_stop(c); + bch_fs_stop(c); return; } - if (bch_cache_set_emergency_read_only(c)) + if (bch_fs_emergency_read_only(c)) bch_err(c, "emergency read only"); break; case BCH_ON_ERROR_PANIC: @@ -29,7 +29,7 @@ void bch_inconsistent_error(struct cache_set *c) void bch_fatal_error(struct cache_set *c) { - if (bch_cache_set_emergency_read_only(c)) + if (bch_fs_emergency_read_only(c)) bch_err(c, "emergency read only"); } @@ -116,15 +116,15 @@ void bch_nonfatal_io_error_work(struct work_struct *work) bool dev; if (errors < c->error_limit) { - bch_notify_cache_error(ca, false); + bch_notify_dev_error(ca, false); } else { - bch_notify_cache_error(ca, true); + bch_notify_dev_error(ca, true); mutex_lock(&bch_register_lock); - dev = bch_cache_may_remove(ca); + dev = bch_dev_may_remove(ca); if (dev - ? bch_cache_read_only(ca) - : bch_cache_set_emergency_read_only(c)) + ? bch_dev_read_only(ca) + : bch_fs_emergency_read_only(c)) bch_err(c, "too many IO errors on %s, setting %s RO", bdevname(ca->disk_sb.bdev, buf), diff --git a/libbcache/error.h b/libbcache/error.h index 33a28c4..3f12bbe 100644 --- a/libbcache/error.h +++ b/libbcache/error.h @@ -13,7 +13,7 @@ struct cache_set; /* Error messages: */ -#define __bch_cache_error(ca, fmt, ...) \ +#define __bch_dev_error(ca, fmt, ...) \ do { \ char _buf[BDEVNAME_SIZE]; \ bch_err((ca)->set, "%s: " fmt, \ @@ -28,16 +28,16 @@ do { \ * XXX: audit and convert to inconsistent() checks */ -#define cache_set_bug(c, ...) \ +#define bch_fs_bug(c, ...) \ do { \ bch_err(c, __VA_ARGS__); \ BUG(); \ } while (0) -#define cache_set_bug_on(cond, c, ...) \ +#define bch_fs_bug_on(cond, c, ...) \ do { \ if (cond) \ - cache_set_bug(c, __VA_ARGS__); \ + bch_fs_bug(c, __VA_ARGS__); \ } while (0) /* @@ -53,18 +53,18 @@ do { \ void bch_inconsistent_error(struct cache_set *); -#define cache_set_inconsistent(c, ...) \ +#define bch_fs_inconsistent(c, ...) \ do { \ bch_err(c, __VA_ARGS__); \ bch_inconsistent_error(c); \ } while (0) -#define cache_set_inconsistent_on(cond, c, ...) \ +#define bch_fs_inconsistent_on(cond, c, ...) \ ({ \ int _ret = !!(cond); \ \ if (_ret) \ - cache_set_inconsistent(c, __VA_ARGS__); \ + bch_fs_inconsistent(c, __VA_ARGS__); \ _ret; \ }) @@ -73,18 +73,18 @@ do { \ * entire cache set: */ -#define cache_inconsistent(ca, ...) \ +#define bch_dev_inconsistent(ca, ...) \ do { \ - __bch_cache_error(ca, __VA_ARGS__); \ + __bch_dev_error(ca, __VA_ARGS__); \ bch_inconsistent_error((ca)->set); \ } while (0) -#define cache_inconsistent_on(cond, ca, ...) \ +#define bch_dev_inconsistent_on(cond, ca, ...) \ ({ \ int _ret = !!(cond); \ \ if (_ret) \ - cache_inconsistent(ca, __VA_ARGS__); \ + bch_dev_inconsistent(ca, __VA_ARGS__); \ _ret; \ }) @@ -112,7 +112,7 @@ enum { \ if (_can_fix && (c)->opts.fix_errors) { \ bch_err(c, msg ", fixing", ##__VA_ARGS__); \ - set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \ + set_bit(BCH_FS_FSCK_FIXED_ERRORS, &(c)->flags); \ _fix = true; \ } else if (_can_ignore && \ (c)->opts.errors == BCH_ON_ERROR_CONTINUE) { \ @@ -154,28 +154,28 @@ enum { void bch_fatal_error(struct cache_set *); -#define cache_set_fatal_error(c, ...) \ +#define bch_fs_fatal_error(c, ...) \ do { \ bch_err(c, __VA_ARGS__); \ bch_fatal_error(c); \ } while (0) -#define cache_set_fatal_err_on(cond, c, ...) \ +#define bch_fs_fatal_err_on(cond, c, ...) \ ({ \ int _ret = !!(cond); \ \ if (_ret) \ - cache_set_fatal_error(c, __VA_ARGS__); \ + bch_fs_fatal_error(c, __VA_ARGS__); \ _ret; \ }) -#define cache_fatal_error(ca, ...) \ +#define bch_dev_fatal_error(ca, ...) \ do { \ - __bch_cache_error(ca, __VA_ARGS__); \ + __bch_dev_error(ca, __VA_ARGS__); \ bch_fatal_error(c); \ } while (0) -#define cache_fatal_io_error(ca, fmt, ...) \ +#define bch_dev_fatal_io_error(ca, fmt, ...) \ do { \ char _buf[BDEVNAME_SIZE]; \ \ @@ -185,12 +185,12 @@ do { \ bch_fatal_error((ca)->set); \ } while (0) -#define cache_fatal_io_err_on(cond, ca, ...) \ +#define bch_dev_fatal_io_err_on(cond, ca, ...) \ ({ \ int _ret = !!(cond); \ \ if (_ret) \ - cache_fatal_io_error(ca, __VA_ARGS__); \ + bch_dev_fatal_io_error(ca, __VA_ARGS__); \ _ret; \ }) @@ -209,7 +209,7 @@ void bch_nonfatal_io_error_work(struct work_struct *); void bch_nonfatal_io_error(struct cache *); #if 0 -#define cache_set_nonfatal_io_error(c, ...) \ +#define bch_fs_nonfatal_io_error(c, ...) \ do { \ bch_err(c, __VA_ARGS__); \ bch_nonfatal_io_error(c); \ @@ -217,7 +217,7 @@ do { \ #endif /* Logs message and handles the error: */ -#define cache_nonfatal_io_error(ca, fmt, ...) \ +#define bch_dev_nonfatal_io_error(ca, fmt, ...) \ do { \ char _buf[BDEVNAME_SIZE]; \ \ @@ -227,12 +227,12 @@ do { \ bch_nonfatal_io_error(ca); \ } while (0) -#define cache_nonfatal_io_err_on(cond, ca, ...) \ +#define bch_dev_nonfatal_io_err_on(cond, ca, ...) \ ({ \ bool _ret = (cond); \ \ if (_ret) \ - cache_nonfatal_io_error(ca, __VA_ARGS__); \ + bch_dev_nonfatal_io_error(ca, __VA_ARGS__); \ _ret; \ }) diff --git a/libbcache/extents.c b/libbcache/extents.c index 4b8a266..523f3f4 100644 --- a/libbcache/extents.c +++ b/libbcache/extents.c @@ -561,7 +561,7 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b, if (replicas < c->sb.meta_replicas_have) { bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); - cache_set_bug(c, + bch_fs_bug(c, "btree key bad (too few replicas, %u < %u): %s", replicas, c->sb.meta_replicas_have, buf); return; @@ -570,7 +570,7 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b, return; err: bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); - cache_set_bug(c, "%s btree pointer %s: bucket %zi prio %i " + bch_fs_bug(c, "%s btree pointer %s: bucket %zi prio %i " "gen %i last_gc %i mark %08x", err, buf, PTR_BUCKET_NR(ca, ptr), g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen, @@ -609,13 +609,13 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b) extent_for_each_online_device_crc(c, e, crc, ptr, ca) { struct btree *root = btree_node_root(c, b); - if (cache_set_inconsistent_on(crc, c, + if (bch_fs_inconsistent_on(crc, c, "btree node pointer with crc at btree %u level %u/%u bucket %zu", b->btree_id, b->level, root ? root->level : -1, PTR_BUCKET_NR(ca, ptr))) break; - if (cache_inconsistent_on(ptr_stale(ca, ptr), ca, + if (bch_dev_inconsistent_on(ptr_stale(ca, ptr), ca, "stale btree node pointer at btree %u level %u/%u bucket %zu", b->btree_id, b->level, root ? root->level : -1, PTR_BUCKET_NR(ca, ptr))) @@ -1556,8 +1556,8 @@ next: stop: extent_insert_committed(s); - bch_cache_set_stats_apply(c, &s->stats, s->trans->disk_res, - gc_pos_btree_node(b)); + bch_fs_stats_apply(c, &s->stats, s->trans->disk_res, + gc_pos_btree_node(b)); EBUG_ON(bkey_cmp(iter->pos, s->committed)); EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) != iter->at_end_of_leaf); @@ -1718,8 +1718,8 @@ stop: bkey_start_offset(&insert->k->k), insert->k->k.size); - bch_cache_set_stats_apply(c, &s.stats, trans->disk_res, - gc_pos_btree_node(b)); + bch_fs_stats_apply(c, &s.stats, trans->disk_res, + gc_pos_btree_node(b)); EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); EBUG_ON(bkey_cmp(iter->pos, s.committed)); @@ -1870,10 +1870,10 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, stale = ptr_stale(ca, ptr); - cache_set_bug_on(stale && !ptr->cached, c, + bch_fs_bug_on(stale && !ptr->cached, c, "stale dirty pointer"); - cache_set_bug_on(stale > 96, c, + bch_fs_bug_on(stale > 96, c, "key too stale: %i", stale); @@ -1897,7 +1897,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, if (replicas > BCH_REPLICAS_MAX) { bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), e.s_c); - cache_set_bug(c, + bch_fs_bug(c, "extent key bad (too many replicas: %u): %s", replicas, buf); return; @@ -1907,7 +1907,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, replicas < c->sb.data_replicas_have) { bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), e.s_c); - cache_set_bug(c, + bch_fs_bug(c, "extent key bad (too few replicas, %u < %u): %s", replicas, c->sb.data_replicas_have, buf); return; @@ -1918,20 +1918,20 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, bad_device: bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), e.s_c); - cache_set_bug(c, "extent pointer to dev %u missing device: %s", - ptr->dev, buf); + bch_fs_bug(c, "extent pointer to dev %u missing device: %s", + ptr->dev, buf); cache_member_info_put(); return; bad_ptr: bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), e.s_c); - cache_set_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i " - "gen %i last_gc %i mark 0x%08x", - buf, PTR_BUCKET_NR(ca, ptr), - g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen, - ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)], - (unsigned) g->mark.counter); + bch_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i " + "gen %i last_gc %i mark 0x%08x", + buf, PTR_BUCKET_NR(ca, ptr), + g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen, + ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)], + (unsigned) g->mark.counter); cache_member_info_put(); return; } diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c index a758e89..e9585fd 100644 --- a/libbcache/fs-gc.c +++ b/libbcache/fs-gc.c @@ -634,7 +634,7 @@ static int bch_gc_do_inode(struct cache_set *c, bool do_update = false; ret = bch_inode_unpack(inode, &u); - if (cache_set_inconsistent_on(ret, c, + if (bch_fs_inconsistent_on(ret, c, "error unpacking inode %llu in fs-gc", inode.k->p.inode)) return ret; diff --git a/libbcache/fs.c b/libbcache/fs.c index 76948e7..ab0d972 100644 --- a/libbcache/fs.c +++ b/libbcache/fs.c @@ -831,7 +831,8 @@ static int bch_inode_user_flags_set(struct bch_inode_info *ei, #define FS_IOC_GOINGDOWN _IOR ('X', 125, __u32) -static long bch_fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +static long bch_fs_file_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; @@ -893,12 +894,12 @@ setflags_out: down_write(&sb->s_umount); sb->s_flags |= MS_RDONLY; - bch_cache_set_emergency_read_only(c); + bch_fs_emergency_read_only(c); up_write(&sb->s_umount); return 0; default: - return bch_cache_set_ioctl(c, cmd, (void __user *) arg); + return bch_fs_ioctl(c, cmd, (void __user *) arg); } } @@ -916,7 +917,7 @@ static long bch_compat_fs_ioctl(struct file *file, unsigned int cmd, unsigned lo default: return -ENOIOCTLCMD; } - return bch_fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); + return bch_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif @@ -946,7 +947,7 @@ static const struct file_operations bch_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = bch_fallocate_dispatch, - .unlocked_ioctl = bch_fs_ioctl, + .unlocked_ioctl = bch_fs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = bch_compat_fs_ioctl, #endif @@ -982,7 +983,7 @@ static const struct file_operations bch_dir_file_operations = { .read = generic_read_dir, .iterate = bch_vfs_readdir, .fsync = bch_fsync, - .unlocked_ioctl = bch_fs_ioctl, + .unlocked_ioctl = bch_fs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = bch_compat_fs_ioctl, #endif @@ -1156,7 +1157,7 @@ static int bch_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = BCACHE_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT; - buf->f_bfree = (c->capacity - cache_set_sectors_used(c)) >> PAGE_SECTOR_SHIFT; + buf->f_bfree = (c->capacity - bch_fs_sectors_used(c)) >> PAGE_SECTOR_SHIFT; buf->f_bavail = buf->f_bfree; buf->f_files = atomic_long_read(&c->nr_inodes); buf->f_ffree = U64_MAX; @@ -1190,7 +1191,7 @@ static struct cache_set *bdev_to_cache_set(struct block_device *bdev) rcu_read_lock(); - list_for_each_entry(c, &bch_cache_sets, list) + list_for_each_entry(c, &bch_fs_list, list) for_each_cache_rcu(ca, c, i) if (ca->disk_sb.bdev == bdev) { rcu_read_unlock(); @@ -1203,7 +1204,7 @@ static struct cache_set *bdev_to_cache_set(struct block_device *bdev) } static struct cache_set *bch_open_as_blockdevs(const char *_dev_name, - struct cache_set_opts opts) + struct bch_opts opts) { size_t nr_devs = 0, i = 0; char *dev_name, *s, **devs; @@ -1226,7 +1227,7 @@ static struct cache_set *bch_open_as_blockdevs(const char *_dev_name, (s = strchr(s, ':')) && (*s++ = '\0')) devs[i++] = s; - err = bch_register_cache_set(devs, nr_devs, opts, &c); + err = bch_fs_open(devs, nr_devs, opts, &c); if (err) { /* * Already open? @@ -1256,7 +1257,7 @@ static struct cache_set *bch_open_as_blockdevs(const char *_dev_name, if (!c) goto err_unlock; - if (!test_bit(CACHE_SET_RUNNING, &c->flags)) { + if (!test_bit(BCH_FS_RUNNING, &c->flags)) { err = "incomplete cache set"; c = NULL; goto err_unlock; @@ -1266,7 +1267,7 @@ static struct cache_set *bch_open_as_blockdevs(const char *_dev_name, mutex_unlock(&bch_register_lock); } - set_bit(CACHE_SET_BDEV_MOUNTED, &c->flags); + set_bit(BCH_FS_BDEV_MOUNTED, &c->flags); err: kfree(devs); kfree(dev_name); @@ -1281,10 +1282,12 @@ err_unlock: static int bch_remount(struct super_block *sb, int *flags, char *data) { struct cache_set *c = sb->s_fs_info; - struct cache_set_opts opts; + struct bch_opts opts = bch_opts_empty(); int ret; - ret = bch_parse_options(&opts, *flags, data); + opts.read_only = (*flags & MS_RDONLY) != 0; + + ret = bch_parse_mount_opts(&opts, data); if (ret) return ret; @@ -1295,11 +1298,11 @@ static int bch_remount(struct super_block *sb, int *flags, char *data) const char *err = NULL; if (opts.read_only) { - bch_cache_set_read_only_sync(c); + bch_fs_read_only_sync(c); sb->s_flags |= MS_RDONLY; } else { - err = bch_cache_set_read_write(c); + err = bch_fs_read_write(c); if (err) { bch_err(c, "error going rw: %s", err); ret = -EINVAL; @@ -1355,11 +1358,13 @@ static struct dentry *bch_mount(struct file_system_type *fs_type, struct cache *ca; struct super_block *sb; struct inode *inode; - struct cache_set_opts opts; + struct bch_opts opts = bch_opts_empty(); unsigned i; int ret; - ret = bch_parse_options(&opts, flags, data); + opts.read_only = (flags & MS_RDONLY) != 0; + + ret = bch_parse_mount_opts(&opts, data); if (ret) return ERR_PTR(ret); @@ -1443,16 +1448,9 @@ static void bch_kill_sb(struct super_block *sb) generic_shutdown_super(sb); - if (test_bit(CACHE_SET_BDEV_MOUNTED, &c->flags)) { - DECLARE_COMPLETION_ONSTACK(complete); - - c->stop_completion = &complete; - bch_cache_set_stop(c); - closure_put(&c->cl); - - /* Killable? */ - wait_for_completion(&complete); - } else + if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags)) + bch_fs_stop_sync(c); + else closure_put(&c->cl); } diff --git a/libbcache/fs.h b/libbcache/fs.h index aec6159..933fb6d 100644 --- a/libbcache/fs.h +++ b/libbcache/fs.h @@ -41,6 +41,8 @@ static inline unsigned nlink_bias(umode_t mode) struct bch_inode_unpacked; +#ifndef NO_BCACHE_FS + /* returns 0 if we want to do the update, or error is passed up */ typedef int (*inode_set_fn)(struct bch_inode_info *, struct bch_inode_unpacked *, void *); @@ -53,4 +55,11 @@ int __must_check bch_write_inode(struct cache_set *, void bch_fs_exit(void); int bch_fs_init(void); +#else + +static inline void bch_fs_exit(void) {} +static inline int bch_fs_init(void) { return 0; } + +#endif + #endif /* _BCACHE_FS_H */ diff --git a/libbcache/inode.h b/libbcache/inode.h index 81dccf6..46abc2b 100644 --- a/libbcache/inode.h +++ b/libbcache/inode.h @@ -1,6 +1,8 @@ #ifndef _BCACHE_INODE_H #define _BCACHE_INODE_H +#include + extern const struct bkey_ops bch_bkey_inode_ops; struct bch_inode_unpacked { diff --git a/libbcache/io.c b/libbcache/io.c index 2f0e48a..be99a97 100644 --- a/libbcache/io.c +++ b/libbcache/io.c @@ -358,8 +358,8 @@ static void bch_write_endio(struct bio *bio) struct bio *orig = wbio->orig; struct cache *ca = wbio->ca; - if (cache_nonfatal_io_err_on(bio->bi_error, ca, - "data write")) + if (bch_dev_nonfatal_io_err_on(bio->bi_error, ca, + "data write")) set_closure_fn(cl, bch_write_io_error, index_update_wq(op)); bch_account_io_completion_time(ca, wbio->submit_time_us, @@ -722,8 +722,8 @@ void bch_wake_delayed_writes(unsigned long data) spin_lock_irqsave(&c->foreground_write_pd_lock, flags); while ((op = c->write_wait_head)) { - if (!test_bit(CACHE_SET_RO, &c->flags) && - !test_bit(CACHE_SET_STOPPING, &c->flags) && + if (!test_bit(BCH_FS_RO, &c->flags) && + !test_bit(BCH_FS_STOPPING, &c->flags) && time_after(op->expires, jiffies)) { mod_timer(&c->foreground_write_wakeup, op->expires); break; @@ -938,7 +938,7 @@ static int bio_checksum_uncompress(struct cache_set *c, } csum = bch_checksum_bio(c, rbio->crc.csum_type, nonce, src); - if (cache_nonfatal_io_err_on(bch_crc_cmp(rbio->crc.csum, csum), rbio->ca, + if (bch_dev_nonfatal_io_err_on(bch_crc_cmp(rbio->crc.csum, csum), rbio->ca, "data checksum error, inode %llu offset %llu: expected %0llx%0llx got %0llx%0llx (type %u)", rbio->inode, (u64) rbio->parent_iter.bi_sector << 9, rbio->crc.csum.hi, rbio->crc.csum.lo, csum.hi, csum.lo, @@ -1069,8 +1069,8 @@ static void __bch_read_endio(struct cache_set *c, struct bch_read_bio *rbio) } if (rbio->promote && - !test_bit(CACHE_SET_RO, &c->flags) && - !test_bit(CACHE_SET_STOPPING, &c->flags)) { + !test_bit(BCH_FS_RO, &c->flags) && + !test_bit(BCH_FS_STOPPING, &c->flags)) { struct cache_promote_op *promote = rbio->promote; struct closure *cl = &promote->cl; @@ -1119,14 +1119,15 @@ static void bch_read_endio(struct bio *bio) bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, REQ_OP_READ); - cache_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read"); + bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read"); if (error) { bch_read_error_maybe_retry(c, rbio, error); return; } - if (rbio->crc.compression_type != BCH_COMPRESSION_NONE) { + if (rbio->crc.compression_type != BCH_COMPRESSION_NONE || + bch_csum_type_is_encryption(rbio->crc.csum_type)) { struct bio_decompress_worker *d; preempt_disable(); diff --git a/libbcache/journal.c b/libbcache/journal.c index 3bb9e3c..99dd9f2 100644 --- a/libbcache/journal.c +++ b/libbcache/journal.c @@ -285,7 +285,7 @@ int bch_journal_seq_should_ignore(struct cache_set *c, u64 seq, struct btree *b) /* Interier updates aren't journalled: */ BUG_ON(b->level); - BUG_ON(seq > journal_seq && test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags)); + BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)); if (seq <= journal_seq) { if (list_empty_careful(&j->seq_blacklist)) @@ -301,7 +301,7 @@ int bch_journal_seq_should_ignore(struct cache_set *c, u64 seq, struct btree *b) * Decrease this back to j->seq + 2 when we next rev the on disk format: * increasing it temporarily to work around bug in old kernels */ - cache_set_inconsistent_on(seq > journal_seq + 4, c, + bch_fs_inconsistent_on(seq > journal_seq + 4, c, "bset journal seq too far in the future: %llu > %llu", seq, journal_seq); @@ -691,7 +691,7 @@ reread: sectors_read = min_t(unsigned, ret = submit_bio_wait(bio); - if (cache_fatal_io_err_on(ret, ca, + if (bch_dev_fatal_io_err_on(ret, ca, "journal read from sector %llu", offset) || bch_meta_read_fault("journal")) @@ -1412,7 +1412,7 @@ void bch_journal_start(struct cache_set *c) for_each_cache(ca, c, i) if (is_journal_device(ca)) - bch_cache_group_add_cache(&c->journal.devs, ca); + bch_dev_group_add(&c->journal.devs, ca); list_for_each_entry(bl, &j->seq_blacklist, list) new_seq = max(new_seq, bl->seq); @@ -1566,7 +1566,7 @@ static int bch_set_nr_journal_buckets(struct cache *ca, unsigned nr) return 0; } -int bch_cache_journal_alloc(struct cache *ca) +int bch_dev_journal_alloc(struct cache *ca) { struct journal_device *ja = &ca->journal; struct bch_sb_field_journal *journal_buckets; @@ -1882,7 +1882,7 @@ static void journal_reclaim_work(struct work_struct *work) j->last_flushed = jiffies; } - if (!test_bit(CACHE_SET_RO, &c->flags)) + if (!test_bit(BCH_FS_RO, &c->flags)) queue_delayed_work(system_freezable_wq, &j->reclaim_work, msecs_to_jiffies(j->reclaim_delay_ms)); } @@ -2014,7 +2014,7 @@ static void journal_write_endio(struct bio *bio) struct cache *ca = bio->bi_private; struct journal *j = &ca->set->journal; - if (cache_fatal_io_err_on(bio->bi_error, ca, "journal write") || + if (bch_dev_fatal_io_err_on(bio->bi_error, ca, "journal write") || bch_meta_write_fault("journal")) bch_journal_halt(j); diff --git a/libbcache/journal.h b/libbcache/journal.h index 9274831..02a6e67 100644 --- a/libbcache/journal.h +++ b/libbcache/journal.h @@ -361,7 +361,7 @@ int bch_journal_alloc(struct journal *, unsigned); ssize_t bch_journal_print_debug(struct journal *, char *); -int bch_cache_journal_alloc(struct cache *); +int bch_dev_journal_alloc(struct cache *); static inline unsigned bch_nr_journal_buckets(struct bch_sb_field_journal *j) { diff --git a/libbcache/movinggc.c b/libbcache/movinggc.c index 83407eb..e40dfbc 100644 --- a/libbcache/movinggc.c +++ b/libbcache/movinggc.c @@ -274,7 +274,7 @@ int bch_moving_gc_thread_start(struct cache *ca) if (ca->set->opts.nochanges) return 0; - if (cache_set_init_fault("moving_gc_start")) + if (bch_fs_init_fault("moving_gc_start")) return -ENOMEM; t = kthread_create(bch_moving_gc_thread, ca, "bch_copygc_read"); diff --git a/libbcache/notify.c b/libbcache/notify.c index 3a50f8f..00b7999 100644 --- a/libbcache/notify.c +++ b/libbcache/notify.c @@ -47,28 +47,28 @@ static void notify_put(struct cache_set *c) mutex_unlock(&c->uevent_lock); } -void bch_notify_cache_set_read_write(struct cache_set *c) +void bch_notify_fs_read_write(struct cache_set *c) { notify_get(c); notify_var(c, "STATE=active"); notify_put(c); } -void bch_notify_cache_set_read_only(struct cache_set *c) +void bch_notify_fs_read_only(struct cache_set *c) { notify_get(c); notify_var(c, "STATE=readonly"); notify_put(c); } -void bch_notify_cache_set_stopped(struct cache_set *c) +void bch_notify_fs_stopped(struct cache_set *c) { notify_get(c); notify_var(c, "STATE=stopped"); notify_put(c); } -void bch_notify_cache_read_write(struct cache *ca) +void bch_notify_dev_read_write(struct cache *ca) { struct cache_set *c = ca->set; @@ -77,7 +77,7 @@ void bch_notify_cache_read_write(struct cache *ca) notify_put(c); } -void bch_notify_cache_read_only(struct cache *ca) +void bch_notify_dev_read_only(struct cache *ca) { struct cache_set *c = ca->set; @@ -86,7 +86,7 @@ void bch_notify_cache_read_only(struct cache *ca) notify_put(c); } -void bch_notify_cache_added(struct cache *ca) +void bch_notify_dev_added(struct cache *ca) { struct cache_set *c = ca->set; @@ -95,7 +95,7 @@ void bch_notify_cache_added(struct cache *ca) notify_put(c); } -void bch_notify_cache_removing(struct cache *ca) +void bch_notify_dev_removing(struct cache *ca) { struct cache_set *c = ca->set; @@ -104,7 +104,7 @@ void bch_notify_cache_removing(struct cache *ca) notify_put(c); } -void bch_notify_cache_remove_failed(struct cache *ca) +void bch_notify_dev_remove_failed(struct cache *ca) { struct cache_set *c = ca->set; @@ -113,7 +113,7 @@ void bch_notify_cache_remove_failed(struct cache *ca) notify_put(c); } -void bch_notify_cache_removed(struct cache *ca) +void bch_notify_dev_removed(struct cache *ca) { struct cache_set *c = ca->set; @@ -122,7 +122,7 @@ void bch_notify_cache_removed(struct cache *ca) notify_put(c); } -void bch_notify_cache_error(struct cache *ca, bool fatal) +void bch_notify_dev_error(struct cache *ca, bool fatal) { struct cache_set *c = ca->set; diff --git a/libbcache/notify.h b/libbcache/notify.h index 80d6587..e1971db 100644 --- a/libbcache/notify.h +++ b/libbcache/notify.h @@ -7,16 +7,34 @@ #ifndef _NOTIFY_H #define _NOTIFY_H -void bch_notify_cache_set_read_write(struct cache_set *); -void bch_notify_cache_set_read_only(struct cache_set *); -void bch_notify_cache_set_stopped(struct cache_set *); - -void bch_notify_cache_read_write(struct cache *); -void bch_notify_cache_read_only(struct cache *); -void bch_notify_cache_added(struct cache *); -void bch_notify_cache_removing(struct cache *); -void bch_notify_cache_removed(struct cache *); -void bch_notify_cache_remove_failed(struct cache *); -void bch_notify_cache_error(struct cache *, bool); +#ifndef NO_BCACHE_NOTIFY + +void bch_notify_fs_read_write(struct cache_set *); +void bch_notify_fs_read_only(struct cache_set *); +void bch_notify_fs_stopped(struct cache_set *); + +void bch_notify_dev_read_write(struct cache *); +void bch_notify_dev_read_only(struct cache *); +void bch_notify_dev_added(struct cache *); +void bch_notify_dev_removing(struct cache *); +void bch_notify_dev_removed(struct cache *); +void bch_notify_dev_remove_failed(struct cache *); +void bch_notify_dev_error(struct cache *, bool); + +#else + +static inline void bch_notify_fs_read_write(struct cache_set *c) {} +static inline void bch_notify_fs_read_only(struct cache_set *c) {} +static inline void bch_notify_fs_stopped(struct cache_set *c) {} + +static inline void bch_notify_dev_read_write(struct cache *ca) {} +static inline void bch_notify_dev_read_only(struct cache *ca) {} +static inline void bch_notify_dev_added(struct cache *ca) {} +static inline void bch_notify_dev_removing(struct cache *ca) {} +static inline void bch_notify_dev_removed(struct cache *ca) {} +static inline void bch_notify_dev_remove_failed(struct cache *ca) {} +static inline void bch_notify_dev_error(struct cache *ca, bool b) {} + +#endif #endif /* _NOTIFY_H */ diff --git a/libbcache/opts.c b/libbcache/opts.c index 333654e..ea71dfb 100644 --- a/libbcache/opts.c +++ b/libbcache/opts.c @@ -49,7 +49,7 @@ const char * const bch_cache_modes[] = { NULL }; -const char * const bch_cache_state[] = { +const char * const bch_dev_state[] = { "active", "readonly", "failed", @@ -57,148 +57,186 @@ const char * const bch_cache_state[] = { NULL }; - -const char * const bch_bool_opt[] = { - "0", - "1", - NULL -}; - -const char * const bch_uint_opt[] = { - NULL -}; - -enum bch_opts { -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ - Opt_##_name, - +const struct bch_option bch_opt_table[] = { +#define OPT_BOOL() .type = BCH_OPT_BOOL +#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, .min = _min, .max = _max +#define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices + +#define BCH_OPT(_name, _mode, _sb_opt, _bits, _type) \ + [Opt_##_name] = { \ + .name = #_name, \ + .set_sb = SET_##_sb_opt, \ + _type \ + }, BCH_VISIBLE_OPTS() #undef BCH_OPT - - Opt_bad_opt, }; -struct bch_option { - const char *name; - const char * const *opts; - unsigned long min, max; -}; - -struct bch_opt_result { - enum bch_opts opt; - unsigned val; -}; - -static int parse_bool_opt(const struct bch_option *opt, const char *s) +static enum bch_opt_id bch_opt_lookup(const char *name) { - if (!strcmp(opt->name, s)) - return true; + const struct bch_option *i; - if (!strncmp("no", s, 2) && !strcmp(opt->name, s + 2)) - return false; + for (i = bch_opt_table; + i < bch_opt_table + ARRAY_SIZE(bch_opt_table); + i++) + if (!strcmp(name, i->name)) + return i - bch_opt_table; return -1; } -static int parse_uint_opt(const struct bch_option *opt, const char *s) +static u64 bch_opt_get(struct bch_opts *opts, enum bch_opt_id id) { - unsigned long v; - int ret; - - if (strncmp(opt->name, s, strlen(opt->name))) - return -1; + switch (id) { +#define BCH_OPT(_name, ...) \ + case Opt_##_name: \ + return opts->_name; \ - s += strlen(opt->name); - - if (*s != '=') - return -1; + BCH_VISIBLE_OPTS() +#undef BCH_OPT - s++; + default: + BUG(); + } +} - ret = kstrtoul(s, 10, &v); - if (ret) - return ret; +void bch_opt_set(struct bch_opts *opts, enum bch_opt_id id, u64 v) +{ + switch (id) { +#define BCH_OPT(_name, ...) \ + case Opt_##_name: \ + opts->_name = v; \ + break; - if (v < opt->min || v >= opt->max) - return -ERANGE; + BCH_VISIBLE_OPTS() +#undef BCH_OPT - return 0; + default: + BUG(); + } } -static int parse_string_opt(const struct bch_option *opt, const char *s) +/* + * Initial options from superblock - here we don't want any options undefined, + * any options the superblock doesn't specify are set to 0: + */ +struct bch_opts bch_sb_opts(struct bch_sb *sb) { - if (strncmp(opt->name, s, strlen(opt->name))) - return -1; + struct bch_opts opts = bch_opts_empty(); - s += strlen(opt->name); +#define BCH_OPT(_name, _mode, _sb_opt, ...) \ + if (_sb_opt != NO_SB_OPT) \ + opts._name = _sb_opt(sb); - if (*s != '=') - return -1; - - s++; + BCH_OPTS() +#undef BCH_OPT - return bch_read_string_list(s, opt->opts); + return opts; } -static struct bch_opt_result parse_one_opt(const char *opt) +int parse_one_opt(enum bch_opt_id id, const char *val, u64 *res) { - static const struct bch_option opt_table[] = { -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ - [Opt_##_name] = { \ - .name = #_name, \ - .opts = _choices, \ - .min = _min, \ - .max = _max, \ - }, - BCH_VISIBLE_OPTS() -#undef BCH_OPT - }, *i; - - for (i = opt_table; - i < opt_table + ARRAY_SIZE(opt_table); - i++) { - int res = i->opts == bch_bool_opt ? parse_bool_opt(i, opt) - : i->opts == bch_uint_opt ? parse_uint_opt(i, opt) - : parse_string_opt(i, opt); - - if (res >= 0) - return (struct bch_opt_result) { - i - opt_table, res - }; + const struct bch_option *opt = &bch_opt_table[id]; + ssize_t ret; + + switch (opt->type) { + case BCH_OPT_BOOL: + ret = kstrtou64(val, 10, res); + if (ret < 0) + return ret; + + if (*res > 1) + return -ERANGE; + break; + case BCH_OPT_UINT: + ret = kstrtou64(val, 10, res); + if (ret < 0) + return ret; + + if (*res < opt->min || *res >= opt->max) + return -ERANGE; + break; + case BCH_OPT_STR: + ret = bch_read_string_list(val, opt->choices); + if (ret < 0) + return ret; + + *res = ret; + break; } - return (struct bch_opt_result) { Opt_bad_opt }; + return 0; } -int bch_parse_options(struct cache_set_opts *opts, int flags, char *options) +int bch_parse_mount_opts(struct bch_opts *opts, char *options) { - char *p; + char *opt, *name, *val; + enum bch_opt_id id; + int ret; + u64 v; + + while ((opt = strsep(&options, ",")) != NULL) { + name = strsep(&opt, "="); + val = opt; + + if (val) { + id = bch_opt_lookup(name); + if (id < 0) + return -EINVAL; + + ret = parse_one_opt(id, val, &v); + if (ret < 0) + return ret; + } else { + id = bch_opt_lookup(name); + v = 1; + + if (id < 0 && + !strncmp("no", name, 2)) { + id = bch_opt_lookup(name + 2); + v = 0; + } + + if (bch_opt_table[id].type != BCH_OPT_BOOL) + return -EINVAL; + } - *opts = cache_set_opts_empty(); + bch_opt_set(opts, id, v); + } - opts->read_only = (flags & MS_RDONLY) != 0; + return 0; +} - if (!options) - return 0; +enum bch_opt_id bch_parse_sysfs_opt(const char *name, const char *val, + u64 *res) +{ + enum bch_opt_id id = bch_opt_lookup(name); + int ret; - while ((p = strsep(&options, ",")) != NULL) { - struct bch_opt_result res = parse_one_opt(p); + if (id < 0) + return -EINVAL; - switch (res.opt) { -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ - case Opt_##_name: \ - opts->_name = res.val; \ - break; + ret = parse_one_opt(id, val, res); + if (ret < 0) + return ret; - BCH_VISIBLE_OPTS() -#undef BCH_OPT + return id; +} - case Opt_bad_opt: - return -EINVAL; - default: - BUG(); - } - } +ssize_t bch_opt_show(struct bch_opts *opts, const char *name, + char *buf, size_t size) +{ + enum bch_opt_id id = bch_opt_lookup(name); + const struct bch_option *opt; + u64 v; - return 0; + if (id < 0) + return -EINVAL; + + v = bch_opt_get(opts, id); + opt = &bch_opt_table[id]; + + return opt->type == BCH_OPT_STR + ? bch_snprint_string_list(buf, size, opt->choices, v) + : snprintf(buf, size, "%lli\n", v); } diff --git a/libbcache/opts.h b/libbcache/opts.h index 1d30848..95184db 100644 --- a/libbcache/opts.h +++ b/libbcache/opts.h @@ -12,7 +12,7 @@ extern const char * const bch_compression_types[]; extern const char * const bch_str_hash_types[]; extern const char * const bch_cache_replacement_policies[]; extern const char * const bch_cache_modes[]; -extern const char * const bch_cache_state[]; +extern const char * const bch_dev_state[]; /* * Mount options; we also store defaults in the superblock. @@ -22,92 +22,135 @@ extern const char * const bch_cache_state[]; * updates the superblock. * * We store options as signed integers, where -1 means undefined. This means we - * can pass the mount options to cache_set_alloc() as a whole struct, and then - * only apply the options from that struct that are defined. + * can pass the mount options to bch_fs_alloc() as a whole struct, and then only + * apply the options from that struct that are defined. */ -extern const char * const bch_bool_opt[]; -extern const char * const bch_uint_opt[]; - /* dummy option, for options that aren't stored in the superblock */ LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0); -#define BCH_VISIBLE_OPTS() \ - BCH_OPT(verbose_recovery, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, false) \ - BCH_OPT(posix_acl, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, false) \ - BCH_OPT(journal_flush_disabled, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, true) \ - BCH_OPT(nofsck, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, true) \ - BCH_OPT(fix_errors, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, true) \ - BCH_OPT(nochanges, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, 0) \ - BCH_OPT(noreplay, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, 0) \ - BCH_OPT(norecovery, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, 0) \ - BCH_SB_OPTS() - -#define BCH_OPTS() \ - BCH_OPT(read_only, \ - bch_bool_opt, 0, 2, \ - NO_SB_OPT, 0) \ +/** + * BCH_OPT(name, mode, sb_opt, type, ...) + * + * @name - name of mount option, sysfs attribute, and struct bch_opts + * member + * + * @mode - sysfs attr permissions + * + * @sb_option - name of corresponding superblock option + * + * @type - one of OPT_BOOL, OPT_UINT, OPT_STR + */ + +enum opt_type { + BCH_OPT_BOOL, + BCH_OPT_UINT, + BCH_OPT_STR, +}; + +#define BCH_VISIBLE_OPTS() \ + BCH_OPT(errors, 0644, BCH_SB_ERROR_ACTION, \ + s8, OPT_STR(bch_error_actions)) \ + BCH_OPT(metadata_replicas, 0444, BCH_SB_META_REPLICAS_WANT,\ + s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \ + BCH_OPT(data_replicas, 0444, BCH_SB_DATA_REPLICAS_WANT,\ + s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \ + BCH_OPT(metadata_checksum, 0644, BCH_SB_META_CSUM_TYPE, \ + s8, OPT_STR(bch_csum_types)) \ + BCH_OPT(data_checksum, 0644, BCH_SB_DATA_CSUM_TYPE, \ + s8, OPT_STR(bch_csum_types)) \ + BCH_OPT(compression, 0644, BCH_SB_COMPRESSION_TYPE,\ + s8, OPT_STR(bch_compression_types)) \ + BCH_OPT(str_hash, 0644, BCH_SB_STR_HASH_TYPE, \ + s8, OPT_STR(bch_str_hash_types)) \ + BCH_OPT(inodes_32bit, 0644, BCH_SB_INODE_32BIT, \ + s8, OPT_BOOL()) \ + BCH_OPT(gc_reserve_percent, 0444, BCH_SB_GC_RESERVE, \ + s8, OPT_UINT(5, 21)) \ + BCH_OPT(root_reserve_percent, 0444, BCH_SB_ROOT_RESERVE, \ + s8, OPT_UINT(0, 100)) \ + BCH_OPT(wide_macs, 0644, BCH_SB_128_BIT_MACS, \ + s8, OPT_BOOL()) \ + BCH_OPT(verbose_recovery, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) \ + BCH_OPT(posix_acl, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) \ + BCH_OPT(journal_flush_disabled, 0644, NO_SB_OPT, \ + s8, OPT_BOOL()) \ + BCH_OPT(nofsck, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) \ + BCH_OPT(fix_errors, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) \ + BCH_OPT(nochanges, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) \ + BCH_OPT(noreplay, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) \ + BCH_OPT(norecovery, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) + +#define BCH_OPTS() \ + BCH_OPT(read_only, 0444, NO_SB_OPT, \ + s8, OPT_BOOL()) \ BCH_VISIBLE_OPTS() -struct cache_set_opts { -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\ - s8 _name; +struct bch_opts { +#define BCH_OPT(_name, _mode, _sb_opt, _bits, ...) \ + _bits _name; BCH_OPTS() #undef BCH_OPT }; -static inline struct cache_set_opts cache_set_opts_empty(void) +enum bch_opt_id { +#define BCH_OPT(_name, ...) \ + Opt_##_name, + + BCH_VISIBLE_OPTS() +#undef BCH_OPT +}; + +struct bch_option { + const char *name; + void (*set_sb)(struct bch_sb *, u64); + enum opt_type type; + + union { + struct { + u64 min, max; + }; + struct { + const char * const *choices; + }; + }; + +}; + +extern const struct bch_option bch_opt_table[]; + +static inline struct bch_opts bch_opts_empty(void) { - struct cache_set_opts ret; + struct bch_opts ret; memset(&ret, 255, sizeof(ret)); return ret; } -/* - * Initial options from superblock - here we don't want any options undefined, - * any options the superblock doesn't specify are set to 0: - */ -static inline struct cache_set_opts cache_superblock_opts(struct bch_sb *sb) +static inline void bch_opts_apply(struct bch_opts *dst, struct bch_opts src) { - return (struct cache_set_opts) { -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\ - ._name = _sb_opt##_BITS ? _sb_opt(sb) : 0, +#define BCH_OPT(_name, ...) \ + if (src._name >= 0) \ + dst->_name = src._name; - BCH_SB_OPTS() + BCH_OPTS() #undef BCH_OPT - }; } -static inline void cache_set_opts_apply(struct cache_set_opts *dst, - struct cache_set_opts src) -{ -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\ - BUILD_BUG_ON(_max > S8_MAX); \ - if (src._name >= 0) \ - dst->_name = src._name; +void bch_opt_set(struct bch_opts *, enum bch_opt_id, u64); +struct bch_opts bch_sb_opts(struct bch_sb *); - BCH_SB_OPTS() -#undef BCH_OPT -} +int bch_parse_mount_opts(struct bch_opts *, char *); +enum bch_opt_id bch_parse_sysfs_opt(const char *, const char *, u64 *); -int bch_parse_options(struct cache_set_opts *, int, char *); +ssize_t bch_opt_show(struct bch_opts *, const char *, char *, size_t); #endif /* _BCACHE_OPTS_H */ diff --git a/libbcache/stats.h b/libbcache/stats.h index 39877f9..c177ce3 100644 --- a/libbcache/stats.h +++ b/libbcache/stats.h @@ -7,11 +7,27 @@ struct cache_set; struct cached_dev; struct bcache_device; +#ifndef NO_BCACHE_ACCOUNTING + void bch_cache_accounting_init(struct cache_accounting *, struct closure *); int bch_cache_accounting_add_kobjs(struct cache_accounting *, struct kobject *); void bch_cache_accounting_clear(struct cache_accounting *); void bch_cache_accounting_destroy(struct cache_accounting *); +#else + +static inline void bch_cache_accounting_init(struct cache_accounting *acc, + struct closure *cl) {} +static inline int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, + struct kobject *cl) +{ + return 0; +} +static inline void bch_cache_accounting_clear(struct cache_accounting *acc) {} +static inline void bch_cache_accounting_destroy(struct cache_accounting *acc) {} + +#endif + static inline void mark_cache_stats(struct cache_stat_collector *stats, bool hit, bool bypass) { diff --git a/libbcache/super-io.c b/libbcache/super-io.c index 66338a1..be27d3e 100644 --- a/libbcache/super-io.c +++ b/libbcache/super-io.c @@ -370,7 +370,7 @@ static bool bch_is_open_cache(struct block_device *bdev) unsigned i; rcu_read_lock(); - list_for_each_entry(c, &bch_cache_sets, list) + list_for_each_entry(c, &bch_fs_list, list) for_each_cache_rcu(ca, c, i) if (ca->disk_sb.bdev == bdev) { rcu_read_unlock(); @@ -388,7 +388,7 @@ static bool bch_is_open(struct block_device *bdev) } static const char *bch_blkdev_open(const char *path, void *holder, - struct cache_set_opts opts, + struct bch_opts opts, struct block_device **ret) { struct block_device *bdev; @@ -423,9 +423,8 @@ static const char *bch_blkdev_open(const char *path, void *holder, } /* Update cached mi: */ -int bch_cache_set_mi_update(struct cache_set *c, - struct bch_member *mi, - unsigned nr_devices) +int bch_fs_mi_update(struct cache_set *c, struct bch_member *mi, + unsigned nr_devices) { struct cache_member_rcu *new, *old; struct cache *ca; @@ -529,7 +528,7 @@ int bch_sb_to_cache_set(struct cache_set *c, struct bch_sb *src) if (bch_fs_sb_realloc(c, le32_to_cpu(src->u64s) - journal_u64s)) return -ENOMEM; - if (bch_cache_set_mi_update(c, members->members, src->nr_devices)) + if (bch_fs_mi_update(c, members->members, src->nr_devices)) return -ENOMEM; __copy_super(c->disk_sb, src); @@ -608,7 +607,7 @@ reread: } const char *bch_read_super(struct bcache_superblock *sb, - struct cache_set_opts opts, + struct bch_opts opts, const char *path) { struct bch_sb_layout layout; @@ -628,7 +627,7 @@ const char *bch_read_super(struct bcache_superblock *sb, goto err; err = "dynamic fault"; - if (cache_set_init_fault("read_super")) + if (bch_fs_init_fault("read_super")) goto err; err = read_one_super(sb, BCH_SB_SECTOR); @@ -698,7 +697,7 @@ static void write_super_endio(struct bio *bio) /* XXX: return errors directly */ - cache_fatal_io_err_on(bio->bi_error, ca, "superblock write"); + bch_dev_fatal_io_err_on(bio->bi_error, ca, "superblock write"); bch_account_io_completion(ca); @@ -766,7 +765,7 @@ void bch_write_super(struct cache_set *c) } while (wrote); /* Make new options visible after they're persistent: */ - bch_cache_set_mi_update(c, members->members, c->sb.nr_devices); + bch_fs_mi_update(c, members->members, c->sb.nr_devices); bch_sb_update(c); } diff --git a/libbcache/super-io.h b/libbcache/super-io.h index 1eda57b..665de81 100644 --- a/libbcache/super-io.h +++ b/libbcache/super-io.h @@ -80,7 +80,7 @@ static inline struct cache_member_cpu cache_mi_to_cpu_mi(struct bch_member *mi) }; } -int bch_cache_set_mi_update(struct cache_set *, struct bch_member *, unsigned); +int bch_fs_mi_update(struct cache_set *, struct bch_member *, unsigned); int bch_sb_to_cache_set(struct cache_set *, struct bch_sb *); int bch_sb_from_cache_set(struct cache_set *, struct cache *); @@ -96,7 +96,7 @@ int bch_super_realloc(struct bcache_superblock *, unsigned); const char *bch_validate_cache_super(struct bcache_superblock *); const char *bch_read_super(struct bcache_superblock *, - struct cache_set_opts, const char *); + struct bch_opts, const char *); void bch_write_super(struct cache_set *); void bch_check_mark_super_slowpath(struct cache_set *, diff --git a/libbcache/super.c b/libbcache/super.c index c026c0d..fab3480 100644 --- a/libbcache/super.c +++ b/libbcache/super.c @@ -63,18 +63,14 @@ static const uuid_le invalid_uuid = { static struct kset *bcache_kset; struct mutex bch_register_lock; -LIST_HEAD(bch_cache_sets); +LIST_HEAD(bch_fs_list); -static int bch_chardev_major; -static struct class *bch_chardev_class; -static struct device *bch_chardev; -static DEFINE_IDR(bch_chardev_minor); static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait); struct workqueue_struct *bcache_io_wq; struct crypto_shash *bch_sha256; -static void bch_cache_stop(struct cache *); -static int bch_cache_online(struct cache *); +static void bch_dev_stop(struct cache *); +static int bch_dev_online(struct cache *); static int bch_congested_fn(void *data, int bdi_bits) { @@ -128,7 +124,7 @@ static int bch_congested_fn(void *data, int bdi_bits) * - allocator depends on the journal (when it rewrites prios and gens) */ -static void __bch_cache_set_read_only(struct cache_set *c) +static void __bch_fs_read_only(struct cache_set *c) { struct cache *ca; unsigned i; @@ -145,7 +141,7 @@ static void __bch_cache_set_read_only(struct cache_set *c) bch_btree_flush(c); for_each_cache(ca, c, i) - bch_cache_allocator_stop(ca); + bch_dev_allocator_stop(ca); /* * Write a journal entry after flushing the btree, so we don't end up @@ -167,11 +163,11 @@ static void bch_writes_disabled(struct percpu_ref *writes) { struct cache_set *c = container_of(writes, struct cache_set, writes); - set_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags); + set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); wake_up(&bch_read_only_wait); } -static void bch_cache_set_read_only_work(struct work_struct *work) +static void bch_fs_read_only_work(struct work_struct *work) { struct cache_set *c = container_of(work, struct cache_set, read_only_work); @@ -184,19 +180,19 @@ static void bch_cache_set_read_only_work(struct work_struct *work) c->foreground_write_pd.rate.rate = UINT_MAX; bch_wake_delayed_writes((unsigned long) c); - if (!test_bit(CACHE_SET_EMERGENCY_RO, &c->flags)) { + if (!test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) { /* * If we're not doing an emergency shutdown, we want to wait on * outstanding writes to complete so they don't see spurious * errors due to shutting down the allocator: */ wait_event(bch_read_only_wait, - test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags)); + test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); - __bch_cache_set_read_only(c); + __bch_fs_read_only(c); if (!bch_journal_error(&c->journal) && - !test_bit(CACHE_SET_ERROR, &c->flags)) { + !test_bit(BCH_FS_ERROR, &c->flags)) { mutex_lock(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb, true); bch_write_super(c); @@ -210,25 +206,25 @@ static void bch_cache_set_read_only_work(struct work_struct *work) * we do need to wait on them before returning and signalling * that going RO is complete: */ - __bch_cache_set_read_only(c); + __bch_fs_read_only(c); wait_event(bch_read_only_wait, - test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags)); + test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); } - bch_notify_cache_set_read_only(c); - trace_bcache_cache_set_read_only_done(c); + bch_notify_fs_read_only(c); + trace_fs_read_only_done(c); - set_bit(CACHE_SET_RO_COMPLETE, &c->flags); + set_bit(BCH_FS_RO_COMPLETE, &c->flags); wake_up(&bch_read_only_wait); } -bool bch_cache_set_read_only(struct cache_set *c) +bool bch_fs_read_only(struct cache_set *c) { - if (test_and_set_bit(CACHE_SET_RO, &c->flags)) + if (test_and_set_bit(BCH_FS_RO, &c->flags)) return false; - trace_bcache_cache_set_read_only(c); + trace_fs_read_only(c); percpu_ref_get(&c->writes); @@ -238,7 +234,7 @@ bool bch_cache_set_read_only(struct cache_set *c) * * (This is really blocking new _allocations_, writes to previously * allocated space can still happen until stopping the allocator in - * bch_cache_allocator_stop()). + * bch_dev_allocator_stop()). */ percpu_ref_kill(&c->writes); @@ -246,30 +242,30 @@ bool bch_cache_set_read_only(struct cache_set *c) return true; } -bool bch_cache_set_emergency_read_only(struct cache_set *c) +bool bch_fs_emergency_read_only(struct cache_set *c) { - bool ret = !test_and_set_bit(CACHE_SET_EMERGENCY_RO, &c->flags); + bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags); - bch_cache_set_read_only(c); + bch_fs_read_only(c); bch_journal_halt(&c->journal); wake_up(&bch_read_only_wait); return ret; } -void bch_cache_set_read_only_sync(struct cache_set *c) +void bch_fs_read_only_sync(struct cache_set *c) { - /* so we don't race with bch_cache_set_read_write() */ + /* so we don't race with bch_fs_read_write() */ lockdep_assert_held(&bch_register_lock); - bch_cache_set_read_only(c); + bch_fs_read_only(c); wait_event(bch_read_only_wait, - test_bit(CACHE_SET_RO_COMPLETE, &c->flags) && - test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags)); + test_bit(BCH_FS_RO_COMPLETE, &c->flags) && + test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); } -static const char *__bch_cache_set_read_write(struct cache_set *c) +static const char *__bch_fs_read_write(struct cache_set *c) { struct cache *ca; const char *err; @@ -280,7 +276,7 @@ static const char *__bch_cache_set_read_write(struct cache_set *c) err = "error starting allocator thread"; for_each_cache(ca, c, i) if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_cache_allocator_start(ca)) { + bch_dev_allocator_start(ca)) { percpu_ref_put(&ca->ref); goto err; } @@ -308,35 +304,35 @@ static const char *__bch_cache_set_read_write(struct cache_set *c) return NULL; err: - __bch_cache_set_read_only(c); + __bch_fs_read_only(c); return err; } -const char *bch_cache_set_read_write(struct cache_set *c) +const char *bch_fs_read_write(struct cache_set *c) { const char *err; lockdep_assert_held(&bch_register_lock); - if (!test_bit(CACHE_SET_RO_COMPLETE, &c->flags)) + if (!test_bit(BCH_FS_RO_COMPLETE, &c->flags)) return NULL; - err = __bch_cache_set_read_write(c); + err = __bch_fs_read_write(c); if (err) return err; percpu_ref_reinit(&c->writes); - clear_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags); - clear_bit(CACHE_SET_EMERGENCY_RO, &c->flags); - clear_bit(CACHE_SET_RO_COMPLETE, &c->flags); - clear_bit(CACHE_SET_RO, &c->flags); + clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); + clear_bit(BCH_FS_EMERGENCY_RO, &c->flags); + clear_bit(BCH_FS_RO_COMPLETE, &c->flags); + clear_bit(BCH_FS_RO, &c->flags); return NULL; } /* Cache set startup/shutdown: */ -static void cache_set_free(struct cache_set *c) +static void bch_fs_free(struct cache_set *c) { del_timer_sync(&c->foreground_write_wakeup); cancel_delayed_work_sync(&c->pd_controllers_update); @@ -344,12 +340,13 @@ static void cache_set_free(struct cache_set *c) cancel_work_sync(&c->bio_submit_work); cancel_work_sync(&c->read_retry_work); - bch_cache_set_encryption_free(c); + bch_fs_encryption_free(c); bch_btree_cache_free(c); bch_journal_free(&c->journal); bch_io_clock_exit(&c->io_clock[WRITE]); bch_io_clock_exit(&c->io_clock[READ]); bch_compress_free(c); + bch_fs_blockdev_exit(c); bdi_destroy(&c->bdi); lg_lock_free(&c->bucket_stats_lock); free_percpu(c->bucket_stats_percpu); @@ -362,7 +359,6 @@ static void cache_set_free(struct cache_set *c) mempool_exit(&c->btree_interior_update_pool); mempool_exit(&c->btree_reserve_pool); mempool_exit(&c->fill_iter); - mempool_exit(&c->search); percpu_ref_exit(&c->writes); if (c->copygc_wq) @@ -377,18 +373,18 @@ static void cache_set_free(struct cache_set *c) } /* - * should be __cache_set_stop4 - block devices are closed, now we can finally + * should be __bch_fs_stop4 - block devices are closed, now we can finally * free it */ -void bch_cache_set_release(struct kobject *kobj) +void bch_fs_release(struct kobject *kobj) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); struct completion *stop_completion = c->stop_completion; - bch_notify_cache_set_stopped(c); + bch_notify_fs_stopped(c); bch_info(c, "stopped"); - cache_set_free(c); + bch_fs_free(c); if (stop_completion) complete(stop_completion); @@ -397,7 +393,7 @@ void bch_cache_set_release(struct kobject *kobj) /* * All activity on the cache_set should have stopped now - close devices: */ -static void __cache_set_stop3(struct closure *cl) +static void __bch_fs_stop3(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, cl); struct cache *ca; @@ -405,13 +401,9 @@ static void __cache_set_stop3(struct closure *cl) mutex_lock(&bch_register_lock); for_each_cache(ca, c, i) - bch_cache_stop(ca); - mutex_unlock(&bch_register_lock); + bch_dev_stop(ca); - mutex_lock(&bch_register_lock); list_del(&c->list); - if (c->minor >= 0) - idr_remove(&bch_chardev_minor, c->minor); mutex_unlock(&bch_register_lock); closure_debug_destroy(&c->cl); @@ -422,14 +414,12 @@ static void __cache_set_stop3(struct closure *cl) * Openers (i.e. block devices) should have exited, shutdown all userspace * interfaces and wait for &c->cl to hit 0 */ -static void __cache_set_stop2(struct closure *cl) +static void __bch_fs_stop2(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); bch_debug_exit_cache_set(c); - - if (!IS_ERR_OR_NULL(c->chardev)) - device_unregister(c->chardev); + bch_fs_chardev_exit(c); if (c->kobj.state_in_sysfs) kobject_del(&c->kobj); @@ -441,39 +431,52 @@ static void __cache_set_stop2(struct closure *cl) kobject_put(&c->internal); mutex_lock(&bch_register_lock); - bch_cache_set_read_only_sync(c); + bch_fs_read_only_sync(c); mutex_unlock(&bch_register_lock); closure_return(cl); } /* - * First phase of the shutdown process that's kicked off by cache_set_stop(); we + * First phase of the shutdown process that's kicked off by bch_fs_stop(); we * haven't waited for anything to stop yet, we're just punting to process * context to shut down block devices: */ -static void __cache_set_stop1(struct closure *cl) +static void __bch_fs_stop1(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); bch_blockdevs_stop(c); - continue_at(cl, __cache_set_stop2, system_wq); + continue_at(cl, __bch_fs_stop2, system_wq); } -void bch_cache_set_stop(struct cache_set *c) +void bch_fs_stop(struct cache_set *c) { - if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) + if (!test_and_set_bit(BCH_FS_STOPPING, &c->flags)) closure_queue(&c->caching); } -void bch_cache_set_unregister(struct cache_set *c) +void bch_fs_stop_sync(struct cache_set *c) { - if (!test_and_set_bit(CACHE_SET_UNREGISTERING, &c->flags)) - bch_cache_set_stop(c); + DECLARE_COMPLETION_ONSTACK(complete); + + c->stop_completion = &complete; + bch_fs_stop(c); + closure_put(&c->cl); + + /* Killable? */ + wait_for_completion(&complete); } -static unsigned cache_set_nr_devices(struct cache_set *c) +/* Stop, detaching from backing devices: */ +void bch_fs_detach(struct cache_set *c) +{ + if (!test_and_set_bit(BCH_FS_DETACHING, &c->flags)) + bch_fs_stop(c); +} + +static unsigned bch_fs_nr_devices(struct cache_set *c) { struct bch_sb_field_members *mi; unsigned i, nr = 0; @@ -490,7 +493,7 @@ static unsigned cache_set_nr_devices(struct cache_set *c) return nr; } -static unsigned cache_set_nr_online_devices(struct cache_set *c) +static unsigned bch_fs_nr_online_devices(struct cache_set *c) { unsigned i, nr = 0; @@ -504,8 +507,7 @@ static unsigned cache_set_nr_online_devices(struct cache_set *c) #define alloc_bucket_pages(gfp, ca) \ ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca)))) -static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb, - struct cache_set_opts opts) +static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) { struct cache_set *c; unsigned iter_size, journal_entry_bytes; @@ -523,7 +525,7 @@ static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb, mutex_init(&c->btree_cache_lock); mutex_init(&c->bucket_lock); mutex_init(&c->btree_root_lock); - INIT_WORK(&c->read_only_work, bch_cache_set_read_only_work); + INIT_WORK(&c->read_only_work, bch_fs_read_only_work); init_rwsem(&c->gc_lock); @@ -591,15 +593,15 @@ static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb, scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid); - c->opts = cache_superblock_opts(sb); - cache_set_opts_apply(&c->opts, opts); + bch_opts_apply(&c->opts, bch_sb_opts(sb)); + bch_opts_apply(&c->opts, opts); c->opts.nochanges |= c->opts.noreplay; c->opts.read_only |= c->opts.nochanges; c->block_bits = ilog2(c->sb.block_size); - if (cache_set_init_fault("cache_set_alloc")) + if (bch_fs_init_fault("fs_alloc")) goto err; iter_size = (btree_blocks(c) + 1) * 2 * @@ -612,7 +614,6 @@ static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb, !(c->copygc_wq = alloc_workqueue("bcache_copygc", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || percpu_ref_init(&c->writes, bch_writes_disabled, 0, GFP_KERNEL) || - mempool_init_slab_pool(&c->search, 1, bch_search_cache) || mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1, sizeof(struct btree_reserve)) || mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, @@ -632,11 +633,12 @@ static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb, mempool_init_page_pool(&c->btree_bounce_pool, 1, ilog2(btree_pages(c))) || bdi_setup_and_register(&c->bdi, "bcache") || + bch_fs_blockdev_init(c) || bch_io_clock_init(&c->io_clock[READ]) || bch_io_clock_init(&c->io_clock[WRITE]) || bch_journal_alloc(&c->journal, journal_entry_bytes) || bch_btree_cache_alloc(c) || - bch_cache_set_encryption_init(c) || + bch_fs_encryption_init(c) || bch_compress_init(c) || bch_check_set_has_compressed_data(c, c->opts.compression)) goto err; @@ -652,42 +654,39 @@ static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb, closure_init(&c->cl, NULL); c->kobj.kset = bcache_kset; - kobject_init(&c->kobj, &bch_cache_set_ktype); - kobject_init(&c->internal, &bch_cache_set_internal_ktype); - kobject_init(&c->opts_dir, &bch_cache_set_opts_dir_ktype); - kobject_init(&c->time_stats, &bch_cache_set_time_stats_ktype); + kobject_init(&c->kobj, &bch_fs_ktype); + kobject_init(&c->internal, &bch_fs_internal_ktype); + kobject_init(&c->opts_dir, &bch_fs_opts_dir_ktype); + kobject_init(&c->time_stats, &bch_fs_time_stats_ktype); bch_cache_accounting_init(&c->accounting, &c->cl); closure_init(&c->caching, &c->cl); - set_closure_fn(&c->caching, __cache_set_stop1, system_wq); + set_closure_fn(&c->caching, __bch_fs_stop1, system_wq); - continue_at_noreturn(&c->cl, __cache_set_stop3, system_wq); + continue_at_noreturn(&c->cl, __bch_fs_stop3, system_wq); return c; err: - cache_set_free(c); + bch_fs_free(c); return NULL; } -static int bch_cache_set_online(struct cache_set *c) +static int bch_fs_online(struct cache_set *c) { struct cache *ca; unsigned i; + int ret; lockdep_assert_held(&bch_register_lock); - if (c->kobj.state_in_sysfs) + if (!list_empty(&c->list)) return 0; - c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL); - if (c->minor < 0) - return c->minor; + list_add(&c->list, &bch_fs_list); - c->chardev = device_create(bch_chardev_class, NULL, - MKDEV(bch_chardev_major, c->minor), NULL, - "bcache%u-ctl", c->minor); - if (IS_ERR(c->chardev)) - return PTR_ERR(c->chardev); + ret = bch_fs_chardev_init(c); + if (ret) + return ret; if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) || kobject_add(&c->internal, &c->kobj, "internal") || @@ -697,16 +696,15 @@ static int bch_cache_set_online(struct cache_set *c) return -1; for_each_cache(ca, c, i) - if (bch_cache_online(ca)) { + if (bch_dev_online(ca)) { percpu_ref_put(&ca->ref); return -1; } - list_add(&c->list, &bch_cache_sets); return 0; } -static const char *run_cache_set(struct cache_set *c) +static const char *bch_fs_start(struct cache_set *c) { const char *err = "cannot allocate memory"; struct bch_sb_field_members *mi; @@ -718,7 +716,7 @@ static const char *run_cache_set(struct cache_set *c) int ret = -EINVAL; lockdep_assert_held(&bch_register_lock); - BUG_ON(test_bit(CACHE_SET_RUNNING, &c->flags)); + BUG_ON(test_bit(BCH_FS_RUNNING, &c->flags)); /* We don't want bch_fatal_error() to free underneath us */ closure_get(&c->caching); @@ -756,12 +754,6 @@ static const char *run_cache_set(struct cache_set *c) bch_recalc_min_prio(ca, WRITE); } - /* - * If bch_prio_read() fails it'll call cache_set_error and we'll - * tear everything down right away, but if we perhaps checked - * sooner we could avoid journal replay. - */ - for (id = 0; id < BTREE_ID_NR; id++) { unsigned level; struct bkey_i *k; @@ -801,7 +793,7 @@ static const char *run_cache_set(struct cache_set *c) err = "error starting allocator thread"; for_each_cache(ca, c, i) if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_cache_allocator_start(ca)) { + bch_dev_allocator_start(ca)) { percpu_ref_put(&ca->ref); goto err; } @@ -836,7 +828,7 @@ static const char *run_cache_set(struct cache_set *c) err = "unable to allocate journal buckets"; for_each_cache(ca, c, i) - if (bch_cache_journal_alloc(ca)) { + if (bch_dev_journal_alloc(ca)) { percpu_ref_put(&ca->ref); goto err; } @@ -853,7 +845,7 @@ static const char *run_cache_set(struct cache_set *c) err = "error starting allocator thread"; for_each_cache(ca, c, i) if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_cache_allocator_start(ca)) { + bch_dev_allocator_start(ca)) { percpu_ref_put(&ca->ref); goto err; } @@ -886,9 +878,9 @@ static const char *run_cache_set(struct cache_set *c) } recovery_done: if (c->opts.read_only) { - bch_cache_set_read_only_sync(c); + bch_fs_read_only_sync(c); } else { - err = __bch_cache_set_read_write(c); + err = __bch_fs_read_write(c); if (err) goto err; } @@ -910,11 +902,11 @@ recovery_done: mutex_unlock(&c->sb_lock); err = "dynamic fault"; - if (cache_set_init_fault("run_cache_set")) + if (bch_fs_init_fault("fs_start")) goto err; err = "error creating kobject"; - if (bch_cache_set_online(c)) + if (bch_fs_online(c)) goto err; err = "can't bring up blockdev volumes"; @@ -922,10 +914,10 @@ recovery_done: goto err; bch_debug_init_cache_set(c); - set_bit(CACHE_SET_RUNNING, &c->flags); + set_bit(BCH_FS_RUNNING, &c->flags); bch_attach_backing_devs(c); - bch_notify_cache_set_read_write(c); + bch_notify_fs_read_write(c); err = NULL; out: bch_journal_entries_free(&journal); @@ -959,12 +951,11 @@ err: } BUG_ON(!err); - set_bit(CACHE_SET_ERROR, &c->flags); + set_bit(BCH_FS_ERROR, &c->flags); goto out; } -static const char *can_add_cache(struct bch_sb *sb, - struct cache_set *c) +static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c) { struct bch_sb_field_members *sb_mi; @@ -982,14 +973,14 @@ static const char *can_add_cache(struct bch_sb *sb, return NULL; } -static const char *can_attach_cache(struct bch_sb *sb, struct cache_set *c) +static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c) { struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb); struct bch_sb_field_members *dev_mi = bch_sb_get_members(sb); uuid_le dev_uuid = dev_mi->members[sb->dev_idx].uuid; const char *err; - err = can_add_cache(sb, c); + err = bch_dev_may_add(sb, c); if (err) return err; @@ -1010,7 +1001,7 @@ static const char *can_attach_cache(struct bch_sb *sb, struct cache_set *c) /* Cache device */ -bool bch_cache_read_only(struct cache *ca) +bool bch_dev_read_only(struct cache *ca) { struct cache_set *c = ca->set; struct bch_sb_field_members *mi; @@ -1023,9 +1014,9 @@ bool bch_cache_read_only(struct cache *ca) if (ca->mi.state != BCH_MEMBER_STATE_ACTIVE) return false; - if (!bch_cache_may_remove(ca)) { + if (!bch_dev_may_remove(ca)) { bch_err(c, "required member %s going RO, forcing fs RO", buf); - bch_cache_set_read_only_sync(c); + bch_fs_read_only_sync(c); } trace_bcache_cache_read_only(ca); @@ -1037,9 +1028,9 @@ bool bch_cache_read_only(struct cache *ca) * buckets) and then waits for all existing writes to * complete. */ - bch_cache_allocator_stop(ca); + bch_dev_allocator_stop(ca); - bch_cache_group_remove_cache(&c->journal.devs, ca); + bch_dev_group_remove(&c->journal.devs, ca); /* * Device data write barrier -- no non-meta-data writes should @@ -1049,7 +1040,7 @@ bool bch_cache_read_only(struct cache *ca) trace_bcache_cache_read_only_done(ca); bch_notice(c, "%s read only", bdevname(ca->disk_sb.bdev, buf)); - bch_notify_cache_read_only(ca); + bch_notify_dev_read_only(ca); mutex_lock(&c->sb_lock); mi = bch_sb_get_members(c->disk_sb); @@ -1060,41 +1051,41 @@ bool bch_cache_read_only(struct cache *ca) return true; } -static const char *__bch_cache_read_write(struct cache_set *c, struct cache *ca) +static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca) { lockdep_assert_held(&bch_register_lock); if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) return NULL; - if (test_bit(CACHE_DEV_REMOVING, &ca->flags)) + if (test_bit(BCH_DEV_REMOVING, &ca->flags)) return "removing"; trace_bcache_cache_read_write(ca); - if (bch_cache_allocator_start(ca)) + if (bch_dev_allocator_start(ca)) return "error starting allocator thread"; if (bch_moving_gc_thread_start(ca)) return "error starting moving GC thread"; - bch_cache_group_add_cache(&c->journal.devs, ca); + bch_dev_group_add(&c->journal.devs, ca); wake_up_process(c->tiering_read); - bch_notify_cache_read_write(ca); + bch_notify_dev_read_write(ca); trace_bcache_cache_read_write_done(ca); return NULL; } -const char *bch_cache_read_write(struct cache *ca) +const char *bch_dev_read_write(struct cache *ca) { struct cache_set *c = ca->set; struct bch_sb_field_members *mi; const char *err; - err = __bch_cache_read_write(c, ca); + err = __bch_dev_read_write(c, ca); if (err) return err; @@ -1109,11 +1100,11 @@ const char *bch_cache_read_write(struct cache *ca) } /* - * bch_cache_stop has already returned, so we no longer hold the register + * bch_dev_stop has already returned, so we no longer hold the register * lock at the point this is called. */ -void bch_cache_release(struct kobject *kobj) +void bch_dev_release(struct kobject *kobj) { struct cache *ca = container_of(kobj, struct cache, kobj); @@ -1121,7 +1112,7 @@ void bch_cache_release(struct kobject *kobj) kfree(ca); } -static void bch_cache_free_work(struct work_struct *work) +static void bch_dev_free_work(struct work_struct *work) { struct cache *ca = container_of(work, struct cache, free_work); struct cache_set *c = ca->set; @@ -1142,7 +1133,7 @@ static void bch_cache_free_work(struct work_struct *work) bch_free_super(&ca->disk_sb); /* - * bch_cache_stop can be called in the middle of initialization + * bch_dev_stop can be called in the middle of initialization * of the struct cache object. * As such, not all the sub-structures may be initialized. * However, they were zeroed when the object was allocated. @@ -1170,23 +1161,23 @@ static void bch_cache_free_work(struct work_struct *work) kobject_put(&c->kobj); } -static void bch_cache_percpu_ref_release(struct percpu_ref *ref) +static void bch_dev_percpu_ref_release(struct percpu_ref *ref) { struct cache *ca = container_of(ref, struct cache, ref); schedule_work(&ca->free_work); } -static void bch_cache_free_rcu(struct rcu_head *rcu) +static void bch_dev_free_rcu(struct rcu_head *rcu) { struct cache *ca = container_of(rcu, struct cache, free_rcu); /* * This decrements the ref count to ca, and once the ref count * is 0 (outstanding bios to the ca also incremented it and - * decrement it on completion/error), bch_cache_percpu_ref_release - * is called, and that eventually results in bch_cache_free_work - * being called, which in turn results in bch_cache_release being + * decrement it on completion/error), bch_dev_percpu_ref_release + * is called, and that eventually results in bch_dev_free_work + * being called, which in turn results in bch_dev_release being * called. * * In particular, these functions won't be called until there are no @@ -1198,7 +1189,7 @@ static void bch_cache_free_rcu(struct rcu_head *rcu) percpu_ref_kill(&ca->ref); } -static void bch_cache_stop(struct cache *ca) +static void bch_dev_stop(struct cache *ca) { struct cache_set *c = ca->set; @@ -1209,16 +1200,16 @@ static void bch_cache_stop(struct cache *ca) rcu_assign_pointer(c->cache[ca->dev_idx], NULL); } - call_rcu(&ca->free_rcu, bch_cache_free_rcu); + call_rcu(&ca->free_rcu, bch_dev_free_rcu); } -static void bch_cache_remove_work(struct work_struct *work) +static void bch_dev_remove_work(struct work_struct *work) { struct cache *ca = container_of(work, struct cache, remove_work); struct bch_sb_field_members *mi; struct cache_set *c = ca->set; char name[BDEVNAME_SIZE]; - bool force = test_bit(CACHE_DEV_FORCE_REMOVE, &ca->flags); + bool force = test_bit(BCH_DEV_FORCE_REMOVE, &ca->flags); unsigned dev_idx = ca->dev_idx; bdevname(ca->disk_sb.bdev, name); @@ -1226,8 +1217,8 @@ static void bch_cache_remove_work(struct work_struct *work) /* * Device should already be RO, now migrate data off: * - * XXX: locking is sketchy, bch_cache_read_write() has to check - * CACHE_DEV_REMOVING bit + * XXX: locking is sketchy, bch_dev_read_write() has to check + * BCH_DEV_REMOVING bit */ if (!ca->mi.has_data) { /* Nothing to do: */ @@ -1250,7 +1241,7 @@ static void bch_cache_remove_work(struct work_struct *work) } else { bch_err(c, "Remove of %s failed, unable to migrate data off", name); - clear_bit(CACHE_DEV_REMOVING, &ca->flags); + clear_bit(BCH_DEV_REMOVING, &ca->flags); return; } @@ -1268,7 +1259,7 @@ static void bch_cache_remove_work(struct work_struct *work) } else { bch_err(c, "Remove of %s failed, unable to migrate metadata off", name); - clear_bit(CACHE_DEV_REMOVING, &ca->flags); + clear_bit(BCH_DEV_REMOVING, &ca->flags); return; } @@ -1276,7 +1267,7 @@ static void bch_cache_remove_work(struct work_struct *work) * Ok, really doing the remove: * Drop device's prio pointer before removing it from superblock: */ - bch_notify_cache_removed(ca); + bch_notify_dev_removed(ca); spin_lock(&c->journal.lock); c->journal.prio_buckets[dev_idx] = 0; @@ -1291,7 +1282,7 @@ static void bch_cache_remove_work(struct work_struct *work) closure_get(&c->cl); mutex_lock(&bch_register_lock); - bch_cache_stop(ca); + bch_dev_stop(ca); /* * RCU barrier between dropping between c->cache and dropping from @@ -1317,27 +1308,27 @@ static void bch_cache_remove_work(struct work_struct *work) closure_put(&c->cl); } -bool bch_cache_remove(struct cache *ca, bool force) +bool bch_dev_remove(struct cache *ca, bool force) { mutex_lock(&bch_register_lock); - if (test_bit(CACHE_DEV_REMOVING, &ca->flags)) + if (test_bit(BCH_DEV_REMOVING, &ca->flags)) return false; - if (!bch_cache_may_remove(ca)) { + if (!bch_dev_may_remove(ca)) { bch_err(ca->set, "Can't remove last device in tier %u", ca->mi.tier); - bch_notify_cache_remove_failed(ca); + bch_notify_dev_remove_failed(ca); return false; } /* First, go RO before we try to migrate data off: */ - bch_cache_read_only(ca); + bch_dev_read_only(ca); if (force) - set_bit(CACHE_DEV_FORCE_REMOVE, &ca->flags); - set_bit(CACHE_DEV_REMOVING, &ca->flags); - bch_notify_cache_removing(ca); + set_bit(BCH_DEV_FORCE_REMOVE, &ca->flags); + set_bit(BCH_DEV_REMOVING, &ca->flags); + bch_notify_dev_removing(ca); mutex_unlock(&bch_register_lock); @@ -1347,7 +1338,7 @@ bool bch_cache_remove(struct cache *ca, bool force) return true; } -static int bch_cache_online(struct cache *ca) +static int bch_dev_online(struct cache *ca) { char buf[12]; @@ -1365,9 +1356,9 @@ static int bch_cache_online(struct cache *ca) return 0; } -static const char *cache_alloc(struct bcache_superblock *sb, - struct cache_set *c, - struct cache **ret) +static const char *bch_dev_alloc(struct bcache_superblock *sb, + struct cache_set *c, + struct cache **ret) { struct bch_member *member; size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve; @@ -1379,28 +1370,28 @@ static const char *cache_alloc(struct bcache_superblock *sb, if (c->sb.nr_devices == 1) bdevname(sb->bdev, c->name); - if (cache_set_init_fault("cache_alloc")) + if (bch_fs_init_fault("dev_alloc")) return err; ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) return err; - if (percpu_ref_init(&ca->ref, bch_cache_percpu_ref_release, + if (percpu_ref_init(&ca->ref, bch_dev_percpu_ref_release, 0, GFP_KERNEL)) { kfree(ca); return err; } - kobject_init(&ca->kobj, &bch_cache_ktype); + kobject_init(&ca->kobj, &bch_dev_ktype); spin_lock_init(&ca->self.lock); ca->self.nr_devices = 1; rcu_assign_pointer(ca->self.d[0].dev, ca); ca->dev_idx = sb->sb->dev_idx; - INIT_WORK(&ca->free_work, bch_cache_free_work); - INIT_WORK(&ca->remove_work, bch_cache_remove_work); + INIT_WORK(&ca->free_work, bch_dev_free_work); + INIT_WORK(&ca->remove_work, bch_dev_remove_work); spin_lock_init(&ca->freelist_lock); spin_lock_init(&ca->prio_buckets_lock); mutex_init(&ca->heap_lock); @@ -1413,7 +1404,7 @@ static const char *cache_alloc(struct bcache_superblock *sb, INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work); err = "dynamic fault"; - if (cache_set_init_fault("cache_alloc")) + if (bch_fs_init_fault("dev_alloc")) goto err; member = bch_sb_get_members(ca->disk_sb.sb)->members + @@ -1490,7 +1481,7 @@ static const char *cache_alloc(struct bcache_superblock *sb, err = "error creating kobject"; if (c->kobj.state_in_sysfs && - bch_cache_online(ca)) + bch_dev_online(ca)) goto err; if (ret) @@ -1499,73 +1490,24 @@ static const char *cache_alloc(struct bcache_superblock *sb, kobject_put(&ca->kobj); return NULL; err: - bch_cache_stop(ca); + bch_dev_stop(ca); return err; } -static struct cache_set *cache_set_lookup(uuid_le uuid) +static struct cache_set *bch_fs_lookup(uuid_le uuid) { struct cache_set *c; lockdep_assert_held(&bch_register_lock); - list_for_each_entry(c, &bch_cache_sets, list) + list_for_each_entry(c, &bch_fs_list, list) if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le))) return c; return NULL; } -static const char *register_cache(struct bcache_superblock *sb, - struct cache_set_opts opts) -{ - char name[BDEVNAME_SIZE]; - const char *err; - struct cache_set *c; - bool allocated_cache_set = false; - - err = bch_validate_cache_super(sb); - if (err) - return err; - - bdevname(sb->bdev, name); - - c = cache_set_lookup(sb->sb->uuid); - if (c) { - err = can_attach_cache(sb->sb, c); - if (err) - return err; - } else { - c = bch_cache_set_alloc(sb->sb, opts); - if (!c) - return "cannot allocate memory"; - - allocated_cache_set = true; - } - - err = cache_alloc(sb, c, NULL); - if (err) - goto err; - - if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) { - err = run_cache_set(c); - if (err) - goto err; - } else { - err = "error creating kobject"; - if (bch_cache_set_online(c)) - goto err; - } - - bch_info(c, "started"); - return NULL; -err: - if (allocated_cache_set) - bch_cache_set_stop(c); - return err; -} - -int bch_cache_set_add_cache(struct cache_set *c, const char *path) +int bch_dev_add(struct cache_set *c, const char *path) { struct bcache_superblock sb; const char *err; @@ -1588,7 +1530,7 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path) mutex_lock(&c->sb_lock); - err = can_add_cache(sb.sb, c); + err = bch_dev_may_add(sb.sb, c); if (err) goto err_unlock; @@ -1605,7 +1547,7 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path) if (dynamic_fault("bcache:add:no_slot")) goto no_slot; - if (test_bit(CACHE_SET_GC_FAILURE, &c->flags)) + if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) goto no_slot; mi = bch_sb_get_members(c->disk_sb); @@ -1646,7 +1588,7 @@ have_slot: sb.sb->dev_idx = dev_idx; sb.sb->nr_devices = nr_devices; - if (bch_cache_set_mi_update(c, dev_mi->members, nr_devices)) { + if (bch_fs_mi_update(c, dev_mi->members, nr_devices)) { err = "cannot allocate memory"; ret = -ENOMEM; goto err_unlock; @@ -1657,20 +1599,20 @@ have_slot: c->disk_sb->nr_devices = nr_devices; c->sb.nr_devices = nr_devices; - err = cache_alloc(&sb, c, &ca); + err = bch_dev_alloc(&sb, c, &ca); if (err) goto err_unlock; bch_write_super(c); err = "journal alloc failed"; - if (bch_cache_journal_alloc(ca)) + if (bch_dev_journal_alloc(ca)) goto err_put; - bch_notify_cache_added(ca); + bch_notify_dev_added(ca); if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) { - err = __bch_cache_read_write(c, ca); + err = __bch_dev_read_write(c, ca); if (err) goto err_put; } @@ -1680,7 +1622,7 @@ have_slot: mutex_unlock(&bch_register_lock); return 0; err_put: - bch_cache_stop(ca); + bch_dev_stop(ca); err_unlock: mutex_unlock(&c->sb_lock); err_unlock_register: @@ -1691,9 +1633,8 @@ err_unlock_register: return ret ?: -EINVAL; } -const char *bch_register_cache_set(char * const *devices, unsigned nr_devices, - struct cache_set_opts opts, - struct cache_set **ret) +const char *bch_fs_open(char * const *devices, unsigned nr_devices, + struct bch_opts opts, struct cache_set **ret) { const char *err; struct cache_set *c = NULL; @@ -1736,30 +1677,30 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices, } err = "cache set already registered"; - if (cache_set_lookup(sb->sb->uuid)) + if (bch_fs_lookup(sb->sb->uuid)) goto err_unlock; err = "cannot allocate memory"; - c = bch_cache_set_alloc(sb[0].sb, opts); + c = bch_fs_alloc(sb[0].sb, opts); if (!c) goto err_unlock; for (i = 0; i < nr_devices; i++) { - err = cache_alloc(&sb[i], c, NULL); + err = bch_dev_alloc(&sb[i], c, NULL); if (err) goto err_unlock; } err = "insufficient devices"; - if (cache_set_nr_online_devices(c) != cache_set_nr_devices(c)) + if (bch_fs_nr_online_devices(c) != bch_fs_nr_devices(c)) goto err_unlock; - err = run_cache_set(c); + err = bch_fs_start(c); if (err) goto err_unlock; err = "error creating kobject"; - if (bch_cache_set_online(c)) + if (bch_fs_online(c)) goto err_unlock; if (ret) { @@ -1778,7 +1719,7 @@ out: return err; err_unlock: if (c) - bch_cache_set_stop(c); + bch_fs_stop(c); mutex_unlock(&bch_register_lock); err: for (i = 0; i < nr_devices; i++) @@ -1786,10 +1727,59 @@ err: goto out; } -const char *bch_register_one(const char *path) +static const char *__bch_fs_open_incremental(struct bcache_superblock *sb, + struct bch_opts opts) +{ + char name[BDEVNAME_SIZE]; + const char *err; + struct cache_set *c; + bool allocated_cache_set = false; + + err = bch_validate_cache_super(sb); + if (err) + return err; + + bdevname(sb->bdev, name); + + c = bch_fs_lookup(sb->sb->uuid); + if (c) { + err = bch_dev_in_fs(sb->sb, c); + if (err) + return err; + } else { + c = bch_fs_alloc(sb->sb, opts); + if (!c) + return "cannot allocate memory"; + + allocated_cache_set = true; + } + + err = bch_dev_alloc(sb, c, NULL); + if (err) + goto err; + + if (bch_fs_nr_online_devices(c) == bch_fs_nr_devices(c)) { + err = bch_fs_start(c); + if (err) + goto err; + } else { + err = "error creating kobject"; + if (bch_fs_online(c)) + goto err; + } + + bch_info(c, "started"); + return NULL; +err: + if (allocated_cache_set) + bch_fs_stop(c); + return err; +} + +const char *bch_fs_open_incremental(const char *path) { struct bcache_superblock sb; - struct cache_set_opts opts = cache_set_opts_empty(); + struct bch_opts opts = bch_opts_empty(); const char *err; mutex_lock(&bch_register_lock); @@ -1801,7 +1791,7 @@ const char *bch_register_one(const char *path) if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) err = bch_backing_dev_register(&sb); else - err = register_cache(&sb, opts); + err = __bch_fs_open_incremental(&sb, opts); bch_free_super(&sb); err: @@ -1837,7 +1827,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!(path = kstrndup(skip_spaces(buffer), size, GFP_KERNEL))) goto err; - err = bch_register_one(strim(path)); + err = bch_fs_open_incremental(strim(path)); if (err) goto err; @@ -1860,14 +1850,14 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) mutex_lock(&bch_register_lock); - if (!list_empty(&bch_cache_sets)) + if (!list_empty(&bch_fs_list)) pr_info("Setting all devices read only:"); - list_for_each_entry(c, &bch_cache_sets, list) - bch_cache_set_read_only(c); + list_for_each_entry(c, &bch_fs_list, list) + bch_fs_read_only(c); - list_for_each_entry(c, &bch_cache_sets, list) - bch_cache_set_read_only_sync(c); + list_for_each_entry(c, &bch_fs_list, list) + bch_fs_read_only_sync(c); mutex_unlock(&bch_register_lock); } @@ -1894,17 +1884,11 @@ static void bcache_exit(void) bch_debug_exit(); bch_fs_exit(); bch_blockdev_exit(); + bch_chardev_exit(); if (bcache_kset) kset_unregister(bcache_kset); if (bcache_io_wq) destroy_workqueue(bcache_io_wq); - if (!IS_ERR_OR_NULL(bch_chardev_class)) - device_destroy(bch_chardev_class, - MKDEV(bch_chardev_major, 0)); - if (!IS_ERR_OR_NULL(bch_chardev_class)) - class_destroy(bch_chardev_class); - if (bch_chardev_major > 0) - unregister_chrdev(bch_chardev_major, "bcache"); if (!IS_ERR_OR_NULL(bch_sha256)) crypto_free_shash(bch_sha256); unregister_reboot_notifier(&reboot); @@ -1928,23 +1912,10 @@ static int __init bcache_init(void) if (IS_ERR(bch_sha256)) goto err; - bch_chardev_major = register_chrdev(0, "bcache-ctl", &bch_chardev_fops); - if (bch_chardev_major < 0) - goto err; - - bch_chardev_class = class_create(THIS_MODULE, "bcache"); - if (IS_ERR(bch_chardev_class)) - goto err; - - bch_chardev = device_create(bch_chardev_class, NULL, - MKDEV(bch_chardev_major, 255), - NULL, "bcache-ctl"); - if (IS_ERR(bch_chardev)) - goto err; - if (!(bcache_io_wq = create_freezable_workqueue("bcache_io")) || !(bcache_kset = kset_create_and_add("bcache", NULL, fs_kobj)) || sysfs_create_files(&bcache_kset->kobj, files) || + bch_chardev_init() || bch_blockdev_init() || bch_fs_init() || bch_debug_init()) diff --git a/libbcache/super.h b/libbcache/super.h index 014d7ae..bcf7d98 100644 --- a/libbcache/super.h +++ b/libbcache/super.h @@ -54,7 +54,7 @@ static inline struct cache *bch_get_next_cache(struct cache_set *c, (ca = bch_get_next_cache(c, &(iter))); \ percpu_ref_put(&ca->ref), (iter)++) -static inline bool bch_cache_may_remove(struct cache *ca) +static inline bool bch_dev_may_remove(struct cache *ca) { struct cache_set *c = ca->set; struct cache_group *tier = &c->cache_tiers[ca->mi.tier]; @@ -80,37 +80,37 @@ static inline bool bch_cache_may_remove(struct cache *ca) rcu_access_pointer(tier->d[0].dev) != ca; } -void bch_cache_set_release(struct kobject *); -void bch_cache_release(struct kobject *); +void bch_dev_release(struct kobject *); -void bch_cache_set_unregister(struct cache_set *); -void bch_cache_set_stop(struct cache_set *); +bool bch_dev_read_only(struct cache *); +const char *bch_dev_read_write(struct cache *); +bool bch_dev_remove(struct cache *, bool force); +int bch_dev_add(struct cache_set *, const char *); -const char *bch_register_one(const char *path); -const char *bch_register_cache_set(char * const *, unsigned, - struct cache_set_opts, - struct cache_set **); +void bch_fs_detach(struct cache_set *); -bool bch_cache_set_read_only(struct cache_set *); -bool bch_cache_set_emergency_read_only(struct cache_set *); -void bch_cache_set_read_only_sync(struct cache_set *); -const char *bch_cache_set_read_write(struct cache_set *); +bool bch_fs_read_only(struct cache_set *); +bool bch_fs_emergency_read_only(struct cache_set *); +void bch_fs_read_only_sync(struct cache_set *); +const char *bch_fs_read_write(struct cache_set *); -bool bch_cache_read_only(struct cache *); -const char *bch_cache_read_write(struct cache *); -bool bch_cache_remove(struct cache *, bool force); -int bch_cache_set_add_cache(struct cache_set *, const char *); +void bch_fs_release(struct kobject *); +void bch_fs_stop(struct cache_set *); +void bch_fs_stop_sync(struct cache_set *); + +const char *bch_fs_open(char * const *, unsigned, struct bch_opts, + struct cache_set **); +const char *bch_fs_open_incremental(const char *path); extern struct mutex bch_register_lock; -extern struct list_head bch_cache_sets; -extern struct idr bch_cache_set_minor; +extern struct list_head bch_fs_list; extern struct workqueue_struct *bcache_io_wq; extern struct crypto_shash *bch_sha256; -extern struct kobj_type bch_cache_set_ktype; -extern struct kobj_type bch_cache_set_internal_ktype; -extern struct kobj_type bch_cache_set_time_stats_ktype; -extern struct kobj_type bch_cache_set_opts_dir_ktype; -extern struct kobj_type bch_cache_ktype; +extern struct kobj_type bch_fs_ktype; +extern struct kobj_type bch_fs_internal_ktype; +extern struct kobj_type bch_fs_time_stats_ktype; +extern struct kobj_type bch_fs_opts_dir_ktype; +extern struct kobj_type bch_dev_ktype; #endif /* _BCACHE_SUPER_H */ diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c index 57b7dd9..9f45a6b 100644 --- a/libbcache/sysfs.c +++ b/libbcache/sysfs.c @@ -142,10 +142,9 @@ read_attribute(tier); BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ +#define BCH_OPT(_name, _mode, ...) \ static struct attribute sysfs_opt_##_name = { \ - .name = #_name, \ - .mode = S_IRUGO|(_perm ? S_IWUSR : 0) \ + .name = #_name, .mode = _mode, \ }; BCH_VISIBLE_OPTS() @@ -298,7 +297,7 @@ STORE(__cached_dev) if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid)) return -EINVAL; - list_for_each_entry(c, &bch_cache_sets, list) { + list_for_each_entry(c, &bch_fs_list, list) { v = bch_cached_dev_attach(dc, c); if (!v) return size; @@ -515,7 +514,7 @@ lock_root: return (bytes * 100) / btree_bytes(c); } -static size_t bch_cache_size(struct cache_set *c) +static size_t bch_btree_cache_size(struct cache_set *c) { size_t ret = 0; struct btree *b; @@ -528,7 +527,7 @@ static size_t bch_cache_size(struct cache_set *c) return ret; } -static unsigned bch_cache_available_percent(struct cache_set *c) +static unsigned bch_fs_available_percent(struct cache_set *c) { return div64_u64((u64) sectors_available(c) * 100, c->capacity ?: 1); @@ -549,7 +548,7 @@ static unsigned bch_average_key_size(struct cache_set *c) } #endif -static ssize_t show_cache_set_alloc_debug(struct cache_set *c, char *buf) +static ssize_t show_fs_alloc_debug(struct cache_set *c, char *buf) { struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c); @@ -624,7 +623,7 @@ static ssize_t bch_compression_stats(struct cache_set *c, char *buf) compressed_sectors_uncompressed << 9); } -SHOW(bch_cache_set) +SHOW(bch_fs) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); @@ -639,8 +638,8 @@ SHOW(bch_cache_set) sysfs_hprint(btree_node_size, c->sb.btree_node_size << 9); sysfs_print(btree_node_size_bytes, c->sb.btree_node_size << 9); - sysfs_hprint(btree_cache_size, bch_cache_size(c)); - sysfs_print(cache_available_percent, bch_cache_available_percent(c)); + sysfs_hprint(btree_cache_size, bch_btree_cache_size(c)); + sysfs_print(cache_available_percent, bch_fs_available_percent(c)); sysfs_print(btree_gc_running, c->gc_pos.phase != GC_PHASE_DONE); @@ -695,13 +694,13 @@ SHOW(bch_cache_set) BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM - if (!test_bit(CACHE_SET_RUNNING, &c->flags)) + if (!test_bit(BCH_FS_RUNNING, &c->flags)) return -EPERM; if (attr == &sysfs_bset_tree_stats) return bch_bset_print_stats(c, buf); if (attr == &sysfs_alloc_debug) - return show_cache_set_alloc_debug(c, buf); + return show_fs_alloc_debug(c, buf); sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level); sysfs_print(root_usage_percent, bch_root_usage(c)); @@ -714,17 +713,17 @@ SHOW(bch_cache_set) return 0; } -STORE(__bch_cache_set) +STORE(__bch_fs) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); if (attr == &sysfs_unregister) { - bch_cache_set_unregister(c); + bch_fs_detach(c); return size; } if (attr == &sysfs_stop) { - bch_cache_set_stop(c); + bch_fs_stop(c); return size; } @@ -800,10 +799,10 @@ STORE(__bch_cache_set) BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM - if (!test_bit(CACHE_SET_RUNNING, &c->flags)) + if (!test_bit(BCH_FS_RUNNING, &c->flags)) return -EPERM; - if (test_bit(CACHE_SET_STOPPING, &c->flags)) + if (test_bit(BCH_FS_STOPPING, &c->flags)) return -EINTR; if (attr == &sysfs_blockdev_volume_create) { @@ -833,17 +832,17 @@ STORE(__bch_cache_set) return size; } -STORE(bch_cache_set) +STORE(bch_fs) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); mutex_lock(&bch_register_lock); - size = __bch_cache_set_store(kobj, attr, buf, size); + size = __bch_fs_store(kobj, attr, buf, size); mutex_unlock(&bch_register_lock); if (attr == &sysfs_add_device) { char *path = kstrdup(buf, GFP_KERNEL); - int r = bch_cache_set_add_cache(c, strim(path)); + int r = bch_dev_add(c, strim(path)); kfree(path); if (r) @@ -853,7 +852,7 @@ STORE(bch_cache_set) return size; } -static struct attribute *bch_cache_set_files[] = { +static struct attribute *bch_fs_files[] = { &sysfs_unregister, &sysfs_stop, &sysfs_journal_write_delay_ms, @@ -890,27 +889,27 @@ static struct attribute *bch_cache_set_files[] = { &sysfs_journal_flush, NULL }; -KTYPE(bch_cache_set); +KTYPE(bch_fs); /* internal dir - just a wrapper */ -SHOW(bch_cache_set_internal) +SHOW(bch_fs_internal) { struct cache_set *c = container_of(kobj, struct cache_set, internal); - return bch_cache_set_show(&c->kobj, attr, buf); + return bch_fs_show(&c->kobj, attr, buf); } -STORE(bch_cache_set_internal) +STORE(bch_fs_internal) { struct cache_set *c = container_of(kobj, struct cache_set, internal); - return bch_cache_set_store(&c->kobj, attr, buf, size); + return bch_fs_store(&c->kobj, attr, buf, size); } -static void bch_cache_set_internal_release(struct kobject *k) +static void bch_fs_internal_release(struct kobject *k) { } -static struct attribute *bch_cache_set_internal_files[] = { +static struct attribute *bch_fs_internal_files[] = { &sysfs_journal_debug, &sysfs_alloc_debug, @@ -941,73 +940,58 @@ static struct attribute *bch_cache_set_internal_files[] = { NULL }; -KTYPE(bch_cache_set_internal); +KTYPE(bch_fs_internal); /* options */ -SHOW(bch_cache_set_opts_dir) +SHOW(bch_fs_opts_dir) { struct cache_set *c = container_of(kobj, struct cache_set, opts_dir); -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ - if (attr == &sysfs_opt_##_name) \ - return _choices == bch_bool_opt || _choices == bch_uint_opt\ - ? snprintf(buf, PAGE_SIZE, "%i\n", c->opts._name)\ - : bch_snprint_string_list(buf, PAGE_SIZE, \ - _choices, c->opts._name);\ - - BCH_VISIBLE_OPTS() -#undef BCH_OPT - - return 0; + return bch_opt_show(&c->opts, attr->name, buf, PAGE_SIZE); } -STORE(bch_cache_set_opts_dir) +STORE(bch_fs_opts_dir) { struct cache_set *c = container_of(kobj, struct cache_set, opts_dir); + const struct bch_option *opt; + enum bch_opt_id id; + u64 v; + + id = bch_parse_sysfs_opt(attr->name, buf, &v); + if (id < 0) + return id; + + opt = &bch_opt_table[id]; + + mutex_lock(&c->sb_lock); -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ - if (attr == &sysfs_opt_##_name) { \ - ssize_t v = (_choices == bch_bool_opt || \ - _choices == bch_uint_opt) \ - ? strtoul_restrict_or_return(buf, _min, _max - 1)\ - : bch_read_string_list(buf, _choices); \ - \ - if (v < 0) \ - return v; \ - \ - mutex_lock(&c->sb_lock); \ - if (attr == &sysfs_opt_compression) { \ - int ret = bch_check_set_has_compressed_data(c, v);\ - if (ret) { \ - mutex_unlock(&c->sb_lock); \ - return ret; \ - } \ - } \ - \ - if (_sb_opt##_BITS && v != _sb_opt(c->disk_sb)) { \ - SET_##_sb_opt(c->disk_sb, v); \ - bch_write_super(c); \ - } \ - \ - c->opts._name = v; \ - mutex_unlock(&c->sb_lock); \ - \ - return size; \ + if (id == Opt_compression) { + int ret = bch_check_set_has_compressed_data(c, v); + if (ret) { + mutex_unlock(&c->sb_lock); + return ret; + } } - BCH_VISIBLE_OPTS() -#undef BCH_OPT + if (opt->set_sb != SET_NO_SB_OPT) { + opt->set_sb(c->disk_sb, v); + bch_write_super(c); + } + + bch_opt_set(&c->opts, id, v); + + mutex_unlock(&c->sb_lock); return size; } -static void bch_cache_set_opts_dir_release(struct kobject *k) +static void bch_fs_opts_dir_release(struct kobject *k) { } -static struct attribute *bch_cache_set_opts_dir_files[] = { -#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ +static struct attribute *bch_fs_opts_dir_files[] = { +#define BCH_OPT(_name, ...) \ &sysfs_opt_##_name, BCH_VISIBLE_OPTS() @@ -1015,11 +999,11 @@ static struct attribute *bch_cache_set_opts_dir_files[] = { NULL }; -KTYPE(bch_cache_set_opts_dir); +KTYPE(bch_fs_opts_dir); /* time stats */ -SHOW(bch_cache_set_time_stats) +SHOW(bch_fs_time_stats) { struct cache_set *c = container_of(kobj, struct cache_set, time_stats); @@ -1032,7 +1016,7 @@ SHOW(bch_cache_set_time_stats) return 0; } -STORE(bch_cache_set_time_stats) +STORE(bch_fs_time_stats) { struct cache_set *c = container_of(kobj, struct cache_set, time_stats); @@ -1044,11 +1028,11 @@ STORE(bch_cache_set_time_stats) return size; } -static void bch_cache_set_time_stats_release(struct kobject *k) +static void bch_fs_time_stats_release(struct kobject *k) { } -static struct attribute *bch_cache_set_time_stats_files[] = { +static struct attribute *bch_fs_time_stats_files[] = { #define BCH_TIME_STAT(name, frequency_units, duration_units) \ sysfs_time_stats_attribute_list(name, frequency_units, duration_units) BCH_TIME_STATS() @@ -1056,7 +1040,7 @@ static struct attribute *bch_cache_set_time_stats_files[] = { NULL }; -KTYPE(bch_cache_set_time_stats); +KTYPE(bch_fs_time_stats); typedef unsigned (bucket_map_fn)(struct cache *, struct bucket *, void *); @@ -1141,7 +1125,7 @@ static ssize_t show_reserve_stats(struct cache *ca, char *buf) return ret; } -static ssize_t show_cache_alloc_debug(struct cache *ca, char *buf) +static ssize_t show_dev_alloc_debug(struct cache *ca, char *buf) { struct cache_set *c = ca->set; struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca); @@ -1184,7 +1168,7 @@ static u64 sectors_written(struct cache *ca) return ret; } -SHOW(bch_cache) +SHOW(bch_dev) { struct cache *ca = container_of(kobj, struct cache, kobj); struct cache_set *c = ca->set; @@ -1233,7 +1217,7 @@ SHOW(bch_cache) if (attr == &sysfs_state_rw) return bch_snprint_string_list(buf, PAGE_SIZE, - bch_cache_state, + bch_dev_state, ca->mi.state); if (attr == &sysfs_read_priority_stats) @@ -1247,12 +1231,12 @@ SHOW(bch_cache) if (attr == &sysfs_reserve_stats) return show_reserve_stats(ca, buf); if (attr == &sysfs_alloc_debug) - return show_cache_alloc_debug(ca, buf); + return show_dev_alloc_debug(ca, buf); return 0; } -STORE(__bch_cache) +STORE(__bch_dev) { struct cache *ca = container_of(kobj, struct cache, kobj); struct cache_set *c = ca->set; @@ -1292,7 +1276,7 @@ STORE(__bch_cache) if (attr == &sysfs_state_rw) { char name[BDEVNAME_SIZE]; const char *err = NULL; - ssize_t v = bch_read_string_list(buf, bch_cache_state); + ssize_t v = bch_read_string_list(buf, bch_dev_state); if (v < 0) return v; @@ -1302,10 +1286,10 @@ STORE(__bch_cache) switch (v) { case BCH_MEMBER_STATE_ACTIVE: - err = bch_cache_read_write(ca); + err = bch_dev_read_write(ca); break; case BCH_MEMBER_STATE_RO: - bch_cache_read_only(ca); + bch_dev_read_only(ca); break; case BCH_MEMBER_STATE_FAILED: case BCH_MEMBER_STATE_SPARE: @@ -1314,14 +1298,14 @@ STORE(__bch_cache) */ pr_err("can't set %s %s: not supported", bdevname(ca->disk_sb.bdev, name), - bch_cache_state[v]); + bch_dev_state[v]); return -EINVAL; } if (err) { pr_err("can't set %s %s: %s", bdevname(ca->disk_sb.bdev, name), - bch_cache_state[v], err); + bch_dev_state[v], err); return -EINVAL; } } @@ -1332,7 +1316,7 @@ STORE(__bch_cache) if (!strncmp(buf, "force", 5) && (buf[5] == '\0' || buf[5] == '\n')) force = true; - bch_cache_remove(ca, force); + bch_dev_remove(ca, force); } if (attr == &sysfs_clear_stats) { @@ -1349,9 +1333,9 @@ STORE(__bch_cache) return size; } -STORE_LOCKED(bch_cache) +STORE_LOCKED(bch_dev) -static struct attribute *bch_cache_files[] = { +static struct attribute *bch_dev_files[] = { &sysfs_uuid, &sysfs_unregister, &sysfs_bucket_size, @@ -1391,4 +1375,4 @@ static struct attribute *bch_cache_files[] = { sysfs_pd_controller_files(copy_gc), NULL }; -KTYPE(bch_cache); +KTYPE(bch_dev); diff --git a/libbcache/writeback.c b/libbcache/writeback.c index 600bfbf..b19a83c 100644 --- a/libbcache/writeback.c +++ b/libbcache/writeback.c @@ -169,7 +169,7 @@ static void read_dirty_endio(struct bio *bio) { struct dirty_io *io = container_of(bio, struct dirty_io, bio); - cache_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read"); + bch_dev_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read"); bch_account_io_completion(io->ca); diff --git a/libbcache/writeback.h b/libbcache/writeback.h index 77e5965..250b709 100644 --- a/libbcache/writeback.h +++ b/libbcache/writeback.h @@ -87,6 +87,8 @@ static inline void bch_writeback_add(struct cached_dev *dc) } } +#ifndef NO_BCACHE_WRITEBACK + void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, u64, int); void bch_writeback_recalc_oldest_gens(struct cache_set *); @@ -97,4 +99,24 @@ void bch_cached_dev_writeback_free(struct cached_dev *); int bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); +#else + +static inline void bcache_dev_sectors_dirty_add(struct cache_set *c, + unsigned i, u64 o, int n) {} +static inline void bch_writeback_recalc_oldest_gens(struct cache_set *c) {} +static inline void bch_sectors_dirty_init(struct cached_dev *dc, + struct cache_set *c) {} +static inline void bch_cached_dev_writeback_stop(struct cached_dev *dc) {} +static inline void bch_cached_dev_writeback_free(struct cached_dev *dc) {} +static inline int bch_cached_dev_writeback_init(struct cached_dev *dc) +{ + return 0; +} +static inline int bch_cached_dev_writeback_start(struct cached_dev *dc) +{ + return 0; +} + +#endif + #endif diff --git a/libbcache/xattr.c b/libbcache/xattr.c index 56a8e8f..7855236 100644 --- a/libbcache/xattr.c +++ b/libbcache/xattr.c @@ -165,18 +165,18 @@ int bch_xattr_get(struct cache_set *c, struct inode *inode, return ret; } -int bch_xattr_set(struct cache_set *c, struct inode *inode, +int __bch_xattr_set(struct cache_set *c, u64 inum, + const struct bch_hash_info *hash_info, const char *name, const void *value, size_t size, - int flags, int type) + int flags, int type, u64 *journal_seq) { - struct bch_inode_info *ei = to_bch_ei(inode); struct xattr_search_key search = X_SEARCH(type, name, strlen(name)); int ret; if (!value) { - ret = bch_hash_delete(xattr_hash_desc, &ei->str_hash, - c, ei->vfs_inode.i_ino, - &ei->journal_seq, &search); + ret = bch_hash_delete(xattr_hash_desc, hash_info, + c, inum, + journal_seq, &search); } else { struct bkey_i_xattr *xattr; unsigned u64s = BKEY_U64s + @@ -199,8 +199,8 @@ int bch_xattr_set(struct cache_set *c, struct inode *inode, memcpy(xattr->v.x_name, search.name.name, search.name.len); memcpy(xattr_val(&xattr->v), value, size); - ret = bch_hash_set(xattr_hash_desc, &ei->str_hash, c, - ei->vfs_inode.i_ino, &ei->journal_seq, + ret = bch_hash_set(xattr_hash_desc, hash_info, c, + inum, journal_seq, &xattr->k_i, (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)| (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0)); @@ -213,6 +213,17 @@ int bch_xattr_set(struct cache_set *c, struct inode *inode, return ret; } +int bch_xattr_set(struct cache_set *c, struct inode *inode, + const char *name, const void *value, size_t size, + int flags, int type) +{ + struct bch_inode_info *ei = to_bch_ei(inode); + + return __bch_xattr_set(c, inode->i_ino, &ei->str_hash, + name, value, size, flags, type, + &ei->journal_seq); +} + static const struct xattr_handler *bch_xattr_type_to_handler(unsigned); static size_t bch_xattr_emit(struct dentry *dentry, diff --git a/libbcache/xattr.h b/libbcache/xattr.h index 54eb920..429031a 100644 --- a/libbcache/xattr.h +++ b/libbcache/xattr.h @@ -5,9 +5,12 @@ extern const struct bkey_ops bch_bkey_xattr_ops; struct dentry; struct xattr_handler; +struct bch_hash_info; int bch_xattr_get(struct cache_set *, struct inode *, const char *, void *, size_t, int); +int __bch_xattr_set(struct cache_set *, u64, const struct bch_hash_info *, + const char *, const void *, size_t, int, int, u64 *); int bch_xattr_set(struct cache_set *, struct inode *, const char *, const void *, size_t, int, int); ssize_t bch_xattr_list(struct dentry *, char *, size_t); diff --git a/linux/completion.c b/linux/completion.c deleted file mode 100644 index fcc77a0..0000000 --- a/linux/completion.c +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Generic wait-for-completion handler; - * - * It differs from semaphores in that their default case is the opposite, - * wait_for_completion default blocks whereas semaphore default non-block. The - * interface also makes it easy to 'complete' multiple waiting threads, - * something which isn't entirely natural for semaphores. - * - * But more importantly, the primitive documents the usage. Semaphores would - * typically be used for exclusion which gives rise to priority inversion. - * Waiting for completion is a typically sync point, but not an exclusion point. - */ - -#include -#include - -/** - * complete: - signals a single thread waiting on this completion - * @x: holds the state of this particular completion - * - * This will wake up a single thread waiting on this completion. Threads will be - * awakened in the same order in which they were queued. - * - * See also complete_all(), wait_for_completion() and related routines. - * - * It may be assumed that this function implies a write memory barrier before - * changing the task state if and only if any tasks are woken up. - */ -void complete(struct completion *x) -{ - unsigned long flags; - - spin_lock_irqsave(&x->wait.lock, flags); - x->done++; - __wake_up_locked(&x->wait, TASK_NORMAL, 1); - spin_unlock_irqrestore(&x->wait.lock, flags); -} -EXPORT_SYMBOL(complete); - -/** - * complete_all: - signals all threads waiting on this completion - * @x: holds the state of this particular completion - * - * This will wake up all threads waiting on this particular completion event. - * - * It may be assumed that this function implies a write memory barrier before - * changing the task state if and only if any tasks are woken up. - */ -void complete_all(struct completion *x) -{ - unsigned long flags; - - spin_lock_irqsave(&x->wait.lock, flags); - x->done += UINT_MAX/2; - __wake_up_locked(&x->wait, TASK_NORMAL, 0); - spin_unlock_irqrestore(&x->wait.lock, flags); -} -EXPORT_SYMBOL(complete_all); - -static inline long __sched -do_wait_for_common(struct completion *x, - long (*action)(long), long timeout, int state) -{ - if (!x->done) { - DECLARE_WAITQUEUE(wait, current); - - __add_wait_queue_tail_exclusive(&x->wait, &wait); - do { - __set_current_state(state); - spin_unlock_irq(&x->wait.lock); - timeout = action(timeout); - spin_lock_irq(&x->wait.lock); - } while (!x->done && timeout); - __remove_wait_queue(&x->wait, &wait); - if (!x->done) - return timeout; - } - x->done--; - return timeout ?: 1; -} - -static inline long __sched -__wait_for_common(struct completion *x, - long (*action)(long), long timeout, int state) -{ - might_sleep(); - - spin_lock_irq(&x->wait.lock); - timeout = do_wait_for_common(x, action, timeout, state); - spin_unlock_irq(&x->wait.lock); - return timeout; -} - -static long __sched -wait_for_common(struct completion *x, long timeout, int state) -{ - return __wait_for_common(x, schedule_timeout, timeout, state); -} - -static long __sched -wait_for_common_io(struct completion *x, long timeout, int state) -{ - return __wait_for_common(x, io_schedule_timeout, timeout, state); -} - -/** - * wait_for_completion: - waits for completion of a task - * @x: holds the state of this particular completion - * - * This waits to be signaled for completion of a specific task. It is NOT - * interruptible and there is no timeout. - * - * See also similar routines (i.e. wait_for_completion_timeout()) with timeout - * and interrupt capability. Also see complete(). - */ -void __sched wait_for_completion(struct completion *x) -{ - wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL(wait_for_completion); - -/** - * wait_for_completion_timeout: - waits for completion of a task (w/timeout) - * @x: holds the state of this particular completion - * @timeout: timeout value in jiffies - * - * This waits for either a completion of a specific task to be signaled or for a - * specified timeout to expire. The timeout is in jiffies. It is not - * interruptible. - * - * Return: 0 if timed out, and positive (at least 1, or number of jiffies left - * till timeout) if completed. - */ -unsigned long __sched -wait_for_completion_timeout(struct completion *x, unsigned long timeout) -{ - return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL(wait_for_completion_timeout); - -/** - * wait_for_completion_io: - waits for completion of a task - * @x: holds the state of this particular completion - * - * This waits to be signaled for completion of a specific task. It is NOT - * interruptible and there is no timeout. The caller is accounted as waiting - * for IO (which traditionally means blkio only). - */ -void __sched wait_for_completion_io(struct completion *x) -{ - wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL(wait_for_completion_io); - -/** - * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout) - * @x: holds the state of this particular completion - * @timeout: timeout value in jiffies - * - * This waits for either a completion of a specific task to be signaled or for a - * specified timeout to expire. The timeout is in jiffies. It is not - * interruptible. The caller is accounted as waiting for IO (which traditionally - * means blkio only). - * - * Return: 0 if timed out, and positive (at least 1, or number of jiffies left - * till timeout) if completed. - */ -unsigned long __sched -wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) -{ - return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL(wait_for_completion_io_timeout); - -/** - * wait_for_completion_interruptible: - waits for completion of a task (w/intr) - * @x: holds the state of this particular completion - * - * This waits for completion of a specific task to be signaled. It is - * interruptible. - * - * Return: -ERESTARTSYS if interrupted, 0 if completed. - */ -int __sched wait_for_completion_interruptible(struct completion *x) -{ - wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE); - return 0; -} -EXPORT_SYMBOL(wait_for_completion_interruptible); - -/** - * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr)) - * @x: holds the state of this particular completion - * @timeout: timeout value in jiffies - * - * This waits for either a completion of a specific task to be signaled or for a - * specified timeout to expire. It is interruptible. The timeout is in jiffies. - * - * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1, - * or number of jiffies left till timeout) if completed. - */ -long __sched -wait_for_completion_interruptible_timeout(struct completion *x, - unsigned long timeout) -{ - return wait_for_common(x, timeout, TASK_INTERRUPTIBLE); -} -EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); - -/** - * wait_for_completion_killable: - waits for completion of a task (killable) - * @x: holds the state of this particular completion - * - * This waits to be signaled for completion of a specific task. It can be - * interrupted by a kill signal. - * - * Return: -ERESTARTSYS if interrupted, 0 if completed. - */ -int __sched wait_for_completion_killable(struct completion *x) -{ - wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE); - return 0; -} -EXPORT_SYMBOL(wait_for_completion_killable); - -/** - * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable)) - * @x: holds the state of this particular completion - * @timeout: timeout value in jiffies - * - * This waits for either a completion of a specific task to be - * signaled or for a specified timeout to expire. It can be - * interrupted by a kill signal. The timeout is in jiffies. - * - * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1, - * or number of jiffies left till timeout) if completed. - */ -long __sched -wait_for_completion_killable_timeout(struct completion *x, - unsigned long timeout) -{ - return wait_for_common(x, timeout, TASK_KILLABLE); -} -EXPORT_SYMBOL(wait_for_completion_killable_timeout); - -/** - * try_wait_for_completion - try to decrement a completion without blocking - * @x: completion structure - * - * Return: 0 if a decrement cannot be done without blocking - * 1 if a decrement succeeded. - * - * If a completion is being used as a counting completion, - * attempt to decrement the counter without blocking. This - * enables us to avoid waiting if the resource the completion - * is protecting is not available. - */ -bool try_wait_for_completion(struct completion *x) -{ - unsigned long flags; - int ret = 1; - - /* - * Since x->done will need to be locked only - * in the non-blocking case, we check x->done - * first without taking the lock so we can - * return early in the blocking case. - */ - if (!READ_ONCE(x->done)) - return 0; - - spin_lock_irqsave(&x->wait.lock, flags); - if (!x->done) - ret = 0; - else - x->done--; - spin_unlock_irqrestore(&x->wait.lock, flags); - return ret; -} -EXPORT_SYMBOL(try_wait_for_completion); - -/** - * completion_done - Test to see if a completion has any waiters - * @x: completion structure - * - * Return: 0 if there are waiters (wait_for_completion() in progress) - * 1 if there are no waiters. - * - */ -bool completion_done(struct completion *x) -{ - if (!READ_ONCE(x->done)) - return false; - - /* - * If ->done, we need to wait for complete() to release ->wait.lock - * otherwise we can end up freeing the completion before complete() - * is done referencing it. - * - * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders - * the loads of ->done and ->wait.lock such that we cannot observe - * the lock before complete() acquires it while observing the ->done - * after it's acquired the lock. - */ - smp_rmb(); - //spin_unlock_wait(&x->wait.lock); - spin_lock(&x->wait.lock); - spin_unlock(&x->wait.lock); - return true; -} -EXPORT_SYMBOL(completion_done); diff --git a/linux/lz4_compress.c b/linux/lz4_compress.c index 65243c7..808fe93 100644 --- a/linux/lz4_compress.c +++ b/linux/lz4_compress.c @@ -35,7 +35,6 @@ */ #include -#include #include #include #include @@ -81,40 +80,36 @@ static inline const u8 *hash_table_add16(const struct lz4_hash_table hash, return hash.base + offset; } -static inline const u8 *try_match(const struct lz4_hash_table hash, - const u8 *ip) -{ - const u8 *ref = hash.add(hash, ip); - - return ref >= ip - MAX_DISTANCE && - A32(ref) == A32(ip) ? ref : NULL; -} - static inline const u8 *find_match(const struct lz4_hash_table hash, const u8 **ip, const u8 *anchor, - const u8 *start, const u8 *end) + const u8 *start, const u8 *mflimit) { - int findmatchattempts = (1U << SKIPSTRENGTH) + 3; - const u8 *next_ip = *ip, *ref; - - do { - *ip = next_ip; - next_ip += findmatchattempts++ >> SKIPSTRENGTH; - - if (unlikely(next_ip > end)) - return NULL; - } while (!(ref = try_match(hash, *ip))); - - /* Catch up */ - while (*ip > anchor && - ref > start && - unlikely((*ip)[-1] == ref[-1])) { - (*ip)--; - ref--; + + while (*ip <= mflimit) { + const u8 *ref = hash.add(hash, *ip); + + if (ref >= *ip - MAX_DISTANCE && A32(ref) == A32(*ip)) { + /* found match: */ + while (*ip > anchor && + ref > start && + unlikely((*ip)[-1] == ref[-1])) { + (*ip)--; + ref--; + } + + return ref; + } + + *ip += findmatchattempts++ >> SKIPSTRENGTH; } - return ref; + return NULL; +} + +static inline int length_len(unsigned length) +{ + return length / 255 + 1; } /* @@ -130,102 +125,77 @@ static inline int lz4_compressctx(const struct lz4_hash_table hash, const u8 *src, size_t src_len, u8 *dst, size_t *dst_len) { - const u8 *ip = src; - const u8 *anchor = ip, *ref; + const u8 *ip = src, *anchor = ip, *ref; const u8 *const iend = ip + src_len; const u8 *const mflimit = iend - MFLIMIT; const u8 *const matchlimit = iend - LASTLITERALS; - size_t maxoutputsize = *dst_len; - u8 *op = dst; - u8 *const oend = op + maxoutputsize; - int length; - u8 *token; + u8 *op = dst, *token; + u8 *const oend = op + *dst_len; + size_t literal_len, match_len, match_offset; /* Init */ - if (src_len < MINLENGTH) - goto _last_literals; - memset(hash.ctx, 0, LZ4_MEM_COMPRESS); hash.add(hash, ip); - /* Main Loop */ - while (1) { - /* Starting a literal: */ - anchor = ip++; - ref = find_match(hash, &ip, anchor, src, mflimit); - if (!ref) - goto _last_literals; + /* Always start with a literal: */ + ip++; + while ((ref = find_match(hash, &ip, anchor, src, mflimit))) { /* * We found a match; @ip now points to the match and @ref points * to the prior part of the input we matched with. Everything up * to @anchor has been encoded; the range from @anchor to @ip * didn't match and now has to be encoded as a literal: */ - length = ip - anchor; - token = op++; - - /* check output limit */ - if (unlikely(op + length + (2 + 1 + LASTLITERALS) + - (length >> 8) > oend)) - return -(ip - src); - - *token = encode_length(&op, length) << ML_BITS; - - /* Copy Literals */ - MEMCPY_ADVANCE_CHUNKED(op, anchor, length); - - /* Encode matches: */ - while (1) { - /* Match offset: */ - PUT_LE16_ADVANCE(op, ip - ref); - - /* MINMATCH bytes already matched from find_match(): */ - ip += MINMATCH; - ref += MINMATCH; - - length = common_length(ip, ref, matchlimit); + literal_len = ip - anchor; + match_offset = ip - ref; - /* Check output limit */ - if (unlikely(op + (1 + LASTLITERALS) + - (length >> 8) > oend)) - return -(ip - src); + /* MINMATCH bytes already matched from find_match(): */ + ip += MINMATCH; + ref += MINMATCH; + match_len = common_length(ip, ref, matchlimit); + ip += match_len; - ip += length; + /* check output limit */ + if (unlikely(op + + 1 + /* token */ + 2 + /* match ofset */ + literal_len + + length_len(literal_len) + + length_len(match_len) + + LASTLITERALS > oend)) + break; - *token += encode_length(&op, length); + token = op++; + *token = encode_length(&op, literal_len) << ML_BITS; + MEMCPY_ADVANCE_CHUNKED(op, anchor, literal_len); + PUT_LE16_ADVANCE(op, match_offset); + *token += encode_length(&op, match_len); - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; - break; - } + anchor = ip; + } - /* Fill table */ - hash.add(hash, ip - 2); + /* Encode remaining input as literal: */ + literal_len = iend - anchor; + if (unlikely(op + + 1 + + literal_len + + length_len(literal_len) > oend)) { + /* Return how much would be able to fit: */ + ssize_t remaining = oend - op; + ssize_t encoded = anchor - src; - /* Test next position */ - ref = try_match(hash, ip); - if (!ref) - break; + remaining -= length_len(remaining) + 1; - token = op++; - *token = 0; - } + return -max(encoded + remaining, 1L); } -_last_literals: - /* Encode Last Literals */ - length = iend - anchor; - if ((op - dst) + length + 1 + - ((length + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) - return -(ip - src); - token = op++; - *token = encode_length(&op, length) << ML_BITS; - MEMCPY_ADVANCE(op, anchor, iend - anchor); + *token = encode_length(&op, literal_len) << ML_BITS; + MEMCPY_ADVANCE(op, anchor, literal_len); /* End */ + BUG_ON(op > oend); *dst_len = op - dst; return 0; } @@ -252,7 +222,3 @@ int lz4_compress(const unsigned char *src, size_t src_len, return lz4_compressctx(hash, src, src_len, dst, dst_len); } } -EXPORT_SYMBOL(lz4_compress); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/linux/rhashtable.c b/linux/rhashtable.c index 035d82a..03369ea 100644 --- a/linux/rhashtable.c +++ b/linux/rhashtable.c @@ -25,7 +25,6 @@ #include #include #include -#include #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U @@ -38,36 +37,11 @@ static u32 head_hashfn(struct rhashtable *ht, return rht_head_hashfn(ht, tbl, he, ht->p); } -#ifdef CONFIG_PROVE_LOCKING -#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) - -int lockdep_rht_mutex_is_held(struct rhashtable *ht) -{ - return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; -} -EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); - -int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) -{ - spinlock_t *lock = rht_bucket_lock(tbl, hash); - - return (debug_locks) ? lockdep_is_held(lock) : 1; -} -EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); -#else -#define ASSERT_RHT_MUTEX(HT) -#endif - - static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, gfp_t gfp) { unsigned int i, size; -#if defined(CONFIG_PROVE_LOCKING) - unsigned int nr_pcpus = 2; -#else unsigned int nr_pcpus = num_possible_cpus(); -#endif nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); @@ -77,11 +51,6 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, if (sizeof(spinlock_t) != 0) { tbl->locks = NULL; -#ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE && - gfp == GFP_KERNEL) - tbl->locks = vmalloc(size * sizeof(spinlock_t)); -#endif if (gfp != GFP_KERNEL) gfp |= __GFP_NOWARN | __GFP_NORETRY; @@ -270,28 +239,11 @@ static int rhashtable_rehash_table(struct rhashtable *ht) return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } -/** - * rhashtable_expand - Expand hash table while allowing concurrent lookups - * @ht: the hash table to expand - * - * A secondary bucket array is allocated and the hash entries are migrated. - * - * This function may only be called in a context where it is safe to call - * synchronize_rcu(), e.g. not within a rcu_read_lock() section. - * - * The caller must ensure that no concurrent resizing occurs by holding - * ht->mutex. - * - * It is valid to have concurrent insertions and deletions protected by per - * bucket locks or concurrent RCU protected lookups and traversals. - */ static int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); int err; - ASSERT_RHT_MUTEX(ht); - old_tbl = rhashtable_last_table(ht, old_tbl); new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); @@ -305,22 +257,6 @@ static int rhashtable_expand(struct rhashtable *ht) return err; } -/** - * rhashtable_shrink - Shrink hash table while allowing concurrent lookups - * @ht: the hash table to shrink - * - * This function shrinks the hash table to fit, i.e., the smallest - * size would not cause it to expand right away automatically. - * - * The caller must ensure that no concurrent resizing occurs by holding - * ht->mutex. - * - * The caller must ensure that no concurrent table mutations take place. - * It is however valid to have concurrent lookups if they are RCU protected. - * - * It is valid to have concurrent insertions and deletions protected by per - * bucket locks or concurrent RCU protected lookups and traversals. - */ static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); @@ -328,8 +264,6 @@ static int rhashtable_shrink(struct rhashtable *ht) unsigned int size = 0; int err; - ASSERT_RHT_MUTEX(ht); - if (nelems) size = roundup_pow_of_two(nelems * 3 / 2); if (size < ht->p.min_size) @@ -438,7 +372,6 @@ fail: return err; } -EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, @@ -486,191 +419,6 @@ exit: else return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(rhashtable_insert_slow); - -/** - * rhashtable_walk_init - Initialise an iterator - * @ht: Table to walk over - * @iter: Hash table Iterator - * @gfp: GFP flags for allocations - * - * This function prepares a hash table walk. - * - * Note that if you restart a walk after rhashtable_walk_stop you - * may see the same object twice. Also, you may miss objects if - * there are removals in between rhashtable_walk_stop and the next - * call to rhashtable_walk_start. - * - * For a completely stable walk you should construct your own data - * structure outside the hash table. - * - * This function may sleep so you must not call it from interrupt - * context or with spin locks held. - * - * You must call rhashtable_walk_exit if this function returns - * successfully. - */ -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp) -{ - iter->ht = ht; - iter->p = NULL; - iter->slot = 0; - iter->skip = 0; - - iter->walker = kmalloc(sizeof(*iter->walker), gfp); - if (!iter->walker) - return -ENOMEM; - - spin_lock(&ht->lock); - iter->walker->tbl = - rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); - list_add(&iter->walker->list, &iter->walker->tbl->walkers); - spin_unlock(&ht->lock); - - return 0; -} -EXPORT_SYMBOL_GPL(rhashtable_walk_init); - -/** - * rhashtable_walk_exit - Free an iterator - * @iter: Hash table Iterator - * - * This function frees resources allocated by rhashtable_walk_init. - */ -void rhashtable_walk_exit(struct rhashtable_iter *iter) -{ - spin_lock(&iter->ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); - spin_unlock(&iter->ht->lock); - kfree(iter->walker); -} -EXPORT_SYMBOL_GPL(rhashtable_walk_exit); - -/** - * rhashtable_walk_start - Start a hash table walk - * @iter: Hash table iterator - * - * Start a hash table walk. Note that we take the RCU lock in all - * cases including when we return an error. So you must always call - * rhashtable_walk_stop to clean up. - * - * Returns zero if successful. - * - * Returns -EAGAIN if resize event occured. Note that the iterator - * will rewind back to the beginning and you may use it immediately - * by calling rhashtable_walk_next. - */ -int rhashtable_walk_start(struct rhashtable_iter *iter) - __acquires(RCU) -{ - struct rhashtable *ht = iter->ht; - - rcu_read_lock(); - - spin_lock(&ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); - spin_unlock(&ht->lock); - - if (!iter->walker->tbl) { - iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); - return -EAGAIN; - } - - return 0; -} -EXPORT_SYMBOL_GPL(rhashtable_walk_start); - -/** - * rhashtable_walk_next - Return the next object and advance the iterator - * @iter: Hash table iterator - * - * Note that you must call rhashtable_walk_stop when you are finished - * with the walk. - * - * Returns the next object or NULL when the end of the table is reached. - * - * Returns -EAGAIN if resize event occured. Note that the iterator - * will rewind back to the beginning and you may continue to use it. - */ -void *rhashtable_walk_next(struct rhashtable_iter *iter) -{ - struct bucket_table *tbl = iter->walker->tbl; - struct rhashtable *ht = iter->ht; - struct rhash_head *p = iter->p; - - if (p) { - p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); - goto next; - } - - for (; iter->slot < tbl->size; iter->slot++) { - int skip = iter->skip; - - rht_for_each_rcu(p, tbl, iter->slot) { - if (!skip) - break; - skip--; - } - -next: - if (!rht_is_a_nulls(p)) { - iter->skip++; - iter->p = p; - return rht_obj(ht, p); - } - - iter->skip = 0; - } - - iter->p = NULL; - - /* Ensure we see any new tables. */ - smp_rmb(); - - iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (iter->walker->tbl) { - iter->slot = 0; - iter->skip = 0; - return ERR_PTR(-EAGAIN); - } - - return NULL; -} -EXPORT_SYMBOL_GPL(rhashtable_walk_next); - -/** - * rhashtable_walk_stop - Finish a hash table walk - * @iter: Hash table iterator - * - * Finish a hash table walk. - */ -void rhashtable_walk_stop(struct rhashtable_iter *iter) - __releases(RCU) -{ - struct rhashtable *ht; - struct bucket_table *tbl = iter->walker->tbl; - - if (!tbl) - goto out; - - ht = iter->ht; - - spin_lock(&ht->lock); - if (tbl->rehash < tbl->size) - list_add(&iter->walker->list, &tbl->walkers); - else - iter->walker->tbl = NULL; - spin_unlock(&ht->lock); - - iter->p = NULL; - -out: - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { @@ -683,49 +431,6 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) return jhash2(key, length, seed); } -/** - * rhashtable_init - initialize a new hash table - * @ht: hash table to be initialized - * @params: configuration parameters - * - * Initializes a new hash table based on the provided configuration - * parameters. A table can be configured either with a variable or - * fixed length key: - * - * Configuration Example 1: Fixed length keys - * struct test_obj { - * int key; - * void * my_member; - * struct rhash_head node; - * }; - * - * struct rhashtable_params params = { - * .head_offset = offsetof(struct test_obj, node), - * .key_offset = offsetof(struct test_obj, key), - * .key_len = sizeof(int), - * .hashfn = jhash, - * .nulls_base = (1U << RHT_BASE_SHIFT), - * }; - * - * Configuration Example 2: Variable length keys - * struct test_obj { - * [...] - * struct rhash_head node; - * }; - * - * u32 my_hash_fn(const void *data, u32 len, u32 seed) - * { - * struct test_obj *obj = data; - * - * return [... hash ...]; - * } - * - * struct rhashtable_params params = { - * .head_offset = offsetof(struct test_obj, node), - * .hashfn = jhash, - * .obj_hashfn = my_hash_fn, - * }; - */ int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params) { @@ -805,56 +510,15 @@ int rhashtable_init(struct rhashtable *ht, return 0; } -EXPORT_SYMBOL_GPL(rhashtable_init); -/** - * rhashtable_free_and_destroy - free elements and destroy hash table - * @ht: the hash table to destroy - * @free_fn: callback to release resources of element - * @arg: pointer passed to free_fn - * - * Stops an eventual async resize. If defined, invokes free_fn for each - * element to releasal resources. Please note that RCU protected - * readers may still be accessing the elements. Releasing of resources - * must occur in a compatible manner. Then frees the bucket array. - * - * This function will eventually sleep to wait for an async resize - * to complete. The caller is responsible that no further write operations - * occurs in parallel. - */ -void rhashtable_free_and_destroy(struct rhashtable *ht, - void (*free_fn)(void *ptr, void *arg), - void *arg) +void rhashtable_destroy(struct rhashtable *ht) { struct bucket_table *tbl; - unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); - if (free_fn) { - for (i = 0; i < tbl->size; i++) { - struct rhash_head *pos, *next; - - for (pos = rht_dereference(tbl->buckets[i], ht), - next = !rht_is_a_nulls(pos) ? - rht_dereference(pos->next, ht) : NULL; - !rht_is_a_nulls(pos); - pos = next, - next = !rht_is_a_nulls(pos) ? - rht_dereference(pos->next, ht) : NULL) - free_fn(rht_obj(ht, pos), arg); - } - } - bucket_table_free(tbl); mutex_unlock(&ht->mutex); } -EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); - -void rhashtable_destroy(struct rhashtable *ht) -{ - return rhashtable_free_and_destroy(ht, NULL, NULL); -} -EXPORT_SYMBOL_GPL(rhashtable_destroy); diff --git a/linux/wait.c b/linux/wait.c index 83f4e85..991875c 100644 --- a/linux/wait.c +++ b/linux/wait.c @@ -3,66 +3,42 @@ * * (C) 2004 Nadia Yvette Chambers, Oracle */ -#include + +#include #include -#include #include -#include -#include -void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key) +static inline int waitqueue_active(wait_queue_head_t *q) { - spin_lock_init(&q->lock); - lockdep_set_class_and_name(&q->lock, key, name); - INIT_LIST_HEAD(&q->task_list); + return !list_empty(&q->task_list); } -EXPORT_SYMBOL(__init_waitqueue_head); - -void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) { - unsigned long flags; - - wait->flags &= ~WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); - __add_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); + list_add(&new->task_list, &head->task_list); } -EXPORT_SYMBOL(add_wait_queue); -void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +static inline void __add_wait_queue_tail(wait_queue_head_t *head, + wait_queue_t *new) { - unsigned long flags; + list_add_tail(&new->task_list, &head->task_list); +} +static inline void +__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +{ wait->flags |= WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); __add_wait_queue_tail(q, wait); - spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL(add_wait_queue_exclusive); -void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +static inline void +__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) { - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - __remove_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); + list_del(&old->task_list); } -EXPORT_SYMBOL(remove_wait_queue); - -/* - * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just - * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve - * number) then we wake all the non-exclusive tasks and one exclusive task. - * - * There are circumstances in which we can try to wake a task which has already - * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns - * zero in this (rare) case, and we handle it by continuing to scan the queue. - */ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int wake_flags, void *key) + int nr_exclusive, int wake_flags, void *key) { wait_queue_t *curr, *next; @@ -75,18 +51,8 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, } } -/** - * __wake_up - wake up threads blocked on a waitqueue. - * @q: the waitqueue - * @mode: which threads - * @nr_exclusive: how many wake-one or wake-many threads to wake up - * @key: is directly passed to the wakeup function - * - * It may be assumed that this function implies a write memory barrier before - * changing the task state if and only if any tasks are woken up. - */ -void __wake_up(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, void *key) +static void __wake_up(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, void *key) { unsigned long flags; @@ -94,79 +60,17 @@ void __wake_up(wait_queue_head_t *q, unsigned int mode, __wake_up_common(q, mode, nr_exclusive, 0, key); spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL(__wake_up); -/* - * Same as __wake_up but called with the spinlock in wait_queue_head_t held. - */ -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) +void wake_up(wait_queue_head_t *q) { - __wake_up_common(q, mode, nr, 0, NULL); + __wake_up(q, TASK_NORMAL, 1, NULL); } -EXPORT_SYMBOL_GPL(__wake_up_locked); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) +static void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) { - __wake_up_common(q, mode, 1, 0, key); -} -EXPORT_SYMBOL_GPL(__wake_up_locked_key); - -/** - * __wake_up_sync_key - wake up threads blocked on a waitqueue. - * @q: the waitqueue - * @mode: which threads - * @nr_exclusive: how many wake-one or wake-many threads to wake up - * @key: opaque value to be passed to wakeup targets - * - * The sync wakeup differs that the waker knows that it will schedule - * away soon, so while the target thread will be woken up, it will not - * be migrated to another CPU - ie. the two threads are 'synchronized' - * with each other. This can prevent needless bouncing between CPUs. - * - * On UP it can prevent extra preemption. - * - * It may be assumed that this function implies a write memory barrier before - * changing the task state if and only if any tasks are woken up. - */ -void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, void *key) -{ - unsigned long flags; - int wake_flags = 1; /* XXX WF_SYNC */ - - if (unlikely(!q)) - return; - - if (unlikely(nr_exclusive != 1)) - wake_flags = 0; - - spin_lock_irqsave(&q->lock, flags); - __wake_up_common(q, mode, nr_exclusive, wake_flags, key); - spin_unlock_irqrestore(&q->lock, flags); -} -EXPORT_SYMBOL_GPL(__wake_up_sync_key); - -/* - * __wake_up_sync - see __wake_up_sync_key() - */ -void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) -{ - __wake_up_sync_key(q, mode, nr_exclusive, NULL); + __wake_up_common(q, mode, nr, 0, NULL); } -EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ -/* - * Note: we use "set_current_state()" _after_ the wait-queue add, - * because we need a memory barrier there on SMP, so that any - * wake-function that tests for the wait-queue being active - * will be guaranteed to see waitqueue addition _or_ subsequent - * tests in this thread will see the wakeup having taken place. - * - * The spin_unlock() itself is semi-permeable and only protects - * one way (it only protects stuff inside the critical region and - * stops them from bleeding out - it would still allow subsequent - * loads to move into the critical region). - */ void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) { @@ -179,9 +83,8 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) set_current_state(state); spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL(prepare_to_wait); -void +static void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) { unsigned long flags; @@ -193,38 +96,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) set_current_state(state); spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL(prepare_to_wait_exclusive); - -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) -{ - unsigned long flags; - - wait->private = current; - wait->func = autoremove_wake_function; - - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) { - if (wait->flags & WQ_FLAG_EXCLUSIVE) - __add_wait_queue_tail(q, wait); - else - __add_wait_queue(q, wait); - } - set_current_state(state); - spin_unlock_irqrestore(&q->lock, flags); - - return 0; -} -EXPORT_SYMBOL(prepare_to_wait_event); -/** - * finish_wait - clean up after waiting in a queue - * @q: waitqueue waited on - * @wait: wait descriptor - * - * Sets current thread back to running state and removes - * the wait descriptor from the given waitqueue if still - * queued. - */ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; @@ -249,40 +121,6 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) spin_unlock_irqrestore(&q->lock, flags); } } -EXPORT_SYMBOL(finish_wait); - -/** - * abort_exclusive_wait - abort exclusive waiting in a queue - * @q: waitqueue waited on - * @wait: wait descriptor - * @mode: runstate of the waiter to be woken - * @key: key to identify a wait bit queue or %NULL - * - * Sets current thread back to running state and removes - * the wait descriptor from the given waitqueue if still - * queued. - * - * Wakes up the next waiter if the caller is concurrently - * woken up through the queue. - * - * This prevents waiter starvation where an exclusive waiter - * aborts and is woken up concurrently and no one wakes up - * the next waiter. - */ -void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, - unsigned int mode, void *key) -{ - unsigned long flags; - - __set_current_state(TASK_RUNNING); - spin_lock_irqsave(&q->lock, flags); - if (!list_empty(&wait->task_list)) - list_del_init(&wait->task_list); - else if (waitqueue_active(q)) - __wake_up_locked_key(q, mode, key); - spin_unlock_irqrestore(&q->lock, flags); -} -EXPORT_SYMBOL(abort_exclusive_wait); int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, void *key) @@ -298,319 +136,110 @@ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void * list_del_init(&wait->task_list); return ret; } -EXPORT_SYMBOL(autoremove_wake_function); -static inline bool is_kthread_should_stop(void) -{ - return (current->flags & PF_KTHREAD) && kthread_should_stop(); -} +struct wait_bit_key { + void *flags; + int bit_nr; + unsigned long timeout; +}; -/* - * DEFINE_WAIT_FUNC(wait, woken_wake_func); - * - * add_wait_queue(&wq, &wait); - * for (;;) { - * if (condition) - * break; - * - * p->state = mode; condition = true; - * smp_mb(); // A smp_wmb(); // C - * if (!wait->flags & WQ_FLAG_WOKEN) wait->flags |= WQ_FLAG_WOKEN; - * schedule() try_to_wake_up(); - * p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~ - * wait->flags &= ~WQ_FLAG_WOKEN; condition = true; - * smp_mb() // B smp_wmb(); // C - * wait->flags |= WQ_FLAG_WOKEN; - * } - * remove_wait_queue(&wq, &wait); - * - */ -long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) -{ - set_current_state(mode); /* A */ - /* - * The above implies an smp_mb(), which matches with the smp_wmb() from - * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must - * also observe all state before the wakeup. - */ - if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) - timeout = schedule_timeout(timeout); - __set_current_state(TASK_RUNNING); - - /* - * The below implies an smp_mb(), it too pairs with the smp_wmb() from - * woken_wake_function() such that we must either observe the wait - * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss - * an event. - */ - smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ +struct wait_bit_queue { + struct wait_bit_key key; + wait_queue_t wait; +}; - return timeout; -} -EXPORT_SYMBOL(wait_woken); - -int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) { - /* - * Although this function is called under waitqueue lock, LOCK - * doesn't imply write barrier and the users expects write - * barrier semantics on wakeup functions. The following - * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with smp_store_mb() in wait_woken(). - */ - smp_wmb(); /* C */ - wait->flags |= WQ_FLAG_WOKEN; + struct wait_bit_key *key = arg; + struct wait_bit_queue *wait_bit = + container_of(wait, struct wait_bit_queue, wait); - return default_wake_function(wait, mode, sync, key); + return (wait_bit->key.flags == key->flags && + wait_bit->key.bit_nr == key->bit_nr && + !test_bit(key->bit_nr, key->flags)) + ? autoremove_wake_function(wait, mode, sync, key) : 0; } -EXPORT_SYMBOL(woken_wake_function); -int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) -{ - struct wait_bit_key *key = arg; - struct wait_bit_queue *wait_bit - = container_of(wait, struct wait_bit_queue, wait); - - if (wait_bit->key.flags != key->flags || - wait_bit->key.bit_nr != key->bit_nr || - test_bit(key->bit_nr, key->flags)) - return 0; - else - return autoremove_wake_function(wait, mode, sync, key); -} -EXPORT_SYMBOL(wake_bit_function); +static DECLARE_WAIT_QUEUE_HEAD(bit_wq); -/* - * To allow interruptible waiting and asynchronous (i.e. nonblocking) - * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are - * permitted return codes. Nonzero return codes halt waiting and return. - */ -int __sched -__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, - wait_bit_action_f *action, unsigned mode) -{ - int ret = 0; +#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ + { .flags = word, .bit_nr = bit, } - do { - prepare_to_wait(wq, &q->wait, mode); - if (test_bit(q->key.bit_nr, q->key.flags)) - ret = (*action)(&q->key, mode); - } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); - finish_wait(wq, &q->wait); - return ret; -} -EXPORT_SYMBOL(__wait_on_bit); +#define DEFINE_WAIT_BIT(name, word, bit) \ + struct wait_bit_queue name = { \ + .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ + .wait = { \ + .private = current, \ + .func = wake_bit_function, \ + .task_list = \ + LIST_HEAD_INIT((name).wait.task_list), \ + }, \ + } -int __sched out_of_line_wait_on_bit(void *word, int bit, - wait_bit_action_f *action, unsigned mode) +void wake_up_bit(void *word, int bit) { - wait_queue_head_t *wq = bit_waitqueue(word, bit); - DEFINE_WAIT_BIT(wait, word, bit); + struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); - return __wait_on_bit(wq, &wait, action, mode); + if (waitqueue_active(&bit_wq)) + __wake_up(&bit_wq, TASK_NORMAL, 1, &key); } -EXPORT_SYMBOL(out_of_line_wait_on_bit); -int __sched out_of_line_wait_on_bit_timeout( - void *word, int bit, wait_bit_action_f *action, - unsigned mode, unsigned long timeout) +void __wait_on_bit(void *word, int bit, unsigned mode) { - wait_queue_head_t *wq = bit_waitqueue(word, bit); DEFINE_WAIT_BIT(wait, word, bit); - wait.key.timeout = jiffies + timeout; - return __wait_on_bit(wq, &wait, action, mode); -} -EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); - -int __sched -__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, - wait_bit_action_f *action, unsigned mode) -{ do { - int ret; + prepare_to_wait(&bit_wq, &wait.wait, mode); + if (test_bit(wait.key.bit_nr, wait.key.flags)) + schedule(); + } while (test_bit(wait.key.bit_nr, wait.key.flags)); - prepare_to_wait_exclusive(wq, &q->wait, mode); - if (!test_bit(q->key.bit_nr, q->key.flags)) - continue; - ret = action(&q->key, mode); - if (!ret) - continue; - abort_exclusive_wait(wq, &q->wait, mode, &q->key); - return ret; - } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); - finish_wait(wq, &q->wait); - return 0; + finish_wait(&bit_wq, &wait.wait); } -EXPORT_SYMBOL(__wait_on_bit_lock); -int __sched out_of_line_wait_on_bit_lock(void *word, int bit, - wait_bit_action_f *action, unsigned mode) +void __wait_on_bit_lock(void *word, int bit, unsigned mode) { - wait_queue_head_t *wq = bit_waitqueue(word, bit); DEFINE_WAIT_BIT(wait, word, bit); - return __wait_on_bit_lock(wq, &wait, action, mode); -} -EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); - -void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) -{ - struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); - if (waitqueue_active(wq)) - __wake_up(wq, TASK_NORMAL, 1, &key); -} -EXPORT_SYMBOL(__wake_up_bit); - -/** - * wake_up_bit - wake up a waiter on a bit - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that wakes up waiters - * on a bit. For instance, if one were to have waiters on a bitflag, - * one would call wake_up_bit() after clearing the bit. - * - * In order for this to function properly, as it uses waitqueue_active() - * internally, some kind of memory barrier must be done prior to calling - * this. Typically, this will be smp_mb__after_atomic(), but in some - * cases where bitflags are manipulated non-atomically under a lock, one - * may need to use a less regular barrier, such fs/inode.c's smp_mb(), - * because spin_unlock() does not guarantee a memory barrier. - */ -void wake_up_bit(void *word, int bit) -{ - __wake_up_bit(bit_waitqueue(word, bit), word, bit); -} -EXPORT_SYMBOL(wake_up_bit); - -static DECLARE_WAIT_QUEUE_HEAD(__bit_waitqueue); - -wait_queue_head_t *bit_waitqueue(void *word, int bit) -{ - return &__bit_waitqueue; -} -EXPORT_SYMBOL(bit_waitqueue); - -/* - * Manipulate the atomic_t address to produce a better bit waitqueue table hash - * index (we're keying off bit -1, but that would produce a horrible hash - * value). - */ -static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) -{ - if (BITS_PER_LONG == 64) { - unsigned long q = (unsigned long)p; - return bit_waitqueue((void *)(q & ~1), q & 1); - } - return bit_waitqueue(p, 0); -} - -static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync, - void *arg) -{ - struct wait_bit_key *key = arg; - struct wait_bit_queue *wait_bit - = container_of(wait, struct wait_bit_queue, wait); - atomic_t *val = key->flags; - - if (wait_bit->key.flags != key->flags || - wait_bit->key.bit_nr != key->bit_nr || - atomic_read(val) != 0) - return 0; - return autoremove_wake_function(wait, mode, sync, key); -} - -/* - * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting, - * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero - * return codes halt waiting and return. - */ -static __sched -int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, - int (*action)(atomic_t *), unsigned mode) -{ - atomic_t *val; - int ret = 0; - do { - prepare_to_wait(wq, &q->wait, mode); - val = q->key.flags; - if (atomic_read(val) == 0) - break; - ret = (*action)(val); - } while (!ret && atomic_read(val) != 0); - finish_wait(wq, &q->wait); - return ret; + prepare_to_wait_exclusive(&bit_wq, &wait.wait, mode); + if (!test_bit(wait.key.bit_nr, wait.key.flags)) + continue; + schedule(); + } while (test_and_set_bit(wait.key.bit_nr, wait.key.flags)); + finish_wait(&bit_wq, &wait.wait); } -#define DEFINE_WAIT_ATOMIC_T(name, p) \ - struct wait_bit_queue name = { \ - .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \ - .wait = { \ - .private = current, \ - .func = wake_atomic_t_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wait.task_list), \ - }, \ - } - -__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), - unsigned mode) +void complete(struct completion *x) { - wait_queue_head_t *wq = atomic_t_waitqueue(p); - DEFINE_WAIT_ATOMIC_T(wait, p); + unsigned long flags; - return __wait_on_atomic_t(wq, &wait, action, mode); + spin_lock_irqsave(&x->wait.lock, flags); + x->done++; + __wake_up_locked(&x->wait, TASK_NORMAL, 1); + spin_unlock_irqrestore(&x->wait.lock, flags); } -EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); -/** - * wake_up_atomic_t - Wake up a waiter on a atomic_t - * @p: The atomic_t being waited on, a kernel virtual address - * - * Wake up anyone waiting for the atomic_t to go to zero. - * - * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t - * check is done by the waiter's wake function, not the by the waker itself). - */ -void wake_up_atomic_t(atomic_t *p) +void wait_for_completion(struct completion *x) { - __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); -} -EXPORT_SYMBOL(wake_up_atomic_t); + spin_lock_irq(&x->wait.lock); -__sched int bit_wait(struct wait_bit_key *word, int mode) -{ - schedule(); - return 0; -} -EXPORT_SYMBOL(bit_wait); - -__sched int bit_wait_io(struct wait_bit_key *word, int mode) -{ - io_schedule(); - return 0; -} -EXPORT_SYMBOL(bit_wait_io); + if (!x->done) { + DECLARE_WAITQUEUE(wait, current); -__sched int bit_wait_timeout(struct wait_bit_key *word, int mode) -{ - unsigned long now = jiffies; - if (time_after_eq(now, word->timeout)) - return -EAGAIN; - schedule_timeout(word->timeout - now); - return 0; -} -EXPORT_SYMBOL_GPL(bit_wait_timeout); + __add_wait_queue_tail_exclusive(&x->wait, &wait); + do { + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&x->wait.lock); -__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) -{ - unsigned long now = jiffies; - if (time_after_eq(now, word->timeout)) - return -EAGAIN; - io_schedule_timeout(word->timeout - now); - return 0; + schedule(); + spin_lock_irq(&x->wait.lock); + } while (!x->done); + __remove_wait_queue(&x->wait, &wait); + if (!x->done) + goto out; + } + x->done--; +out: + spin_unlock_irq(&x->wait.lock); } -EXPORT_SYMBOL_GPL(bit_wait_io_timeout); diff --git a/linux/zlib_deflate/deflate.c b/linux/zlib_deflate/deflate.c deleted file mode 100644 index d20ef45..0000000 --- a/linux/zlib_deflate/deflate.c +++ /dev/null @@ -1,1137 +0,0 @@ -/* +++ deflate.c */ -/* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-1996 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process depends on being able to identify portions - * of the input text which are identical to earlier input (within a - * sliding window trailing behind the input currently being processed). - * - * The most straightforward technique turns out to be the fastest for - * most input files: try all possible matches and select the longest. - * The key feature of this algorithm is that insertions into the string - * dictionary are very simple and thus fast, and deletions are avoided - * completely. Insertions are performed at each input character, whereas - * string matches are performed only when the previous match ends. So it - * is preferable to spend more time in matches to allow very fast string - * insertions and avoid deletions. The matching algorithm for small - * strings is inspired from that of Rabin & Karp. A brute force approach - * is used to find longer strings when a small match has been found. - * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze - * (by Leonid Broukhis). - * A previous version of this file used a more sophisticated algorithm - * (by Fiala and Greene) which is guaranteed to run in linear amortized - * time, but has a larger average cost, uses more memory and is patented. - * However the F&G algorithm may be faster for some highly redundant - * files if the parameter max_chain_length (described below) is too large. - * - * ACKNOWLEDGEMENTS - * - * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and - * I found it in 'freeze' written by Leonid Broukhis. - * Thanks to many people for bug reports and testing. - * - * REFERENCES - * - * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". - * Available in ftp://ds.internic.net/rfc/rfc1951.txt - * - * A description of the Rabin and Karp algorithm is given in the book - * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. - * - * Fiala,E.R., and Greene,D.H. - * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 - * - */ - -#include -#include -#include "defutil.h" - - -/* =========================================================================== - * Function prototypes. - */ -typedef enum { - need_more, /* block not completed, need more input or more output */ - block_done, /* block flush performed */ - finish_started, /* finish started, need only more output at next deflate */ - finish_done /* finish done, accept no more input or output */ -} block_state; - -typedef block_state (*compress_func) (deflate_state *s, int flush); -/* Compression function. Returns the block state after the call. */ - -static void fill_window (deflate_state *s); -static block_state deflate_stored (deflate_state *s, int flush); -static block_state deflate_fast (deflate_state *s, int flush); -static block_state deflate_slow (deflate_state *s, int flush); -static void lm_init (deflate_state *s); -static void putShortMSB (deflate_state *s, uInt b); -static void flush_pending (z_streamp strm); -static int read_buf (z_streamp strm, Byte *buf, unsigned size); -static uInt longest_match (deflate_state *s, IPos cur_match); - -#ifdef DEBUG_ZLIB -static void check_match (deflate_state *s, IPos start, IPos match, - int length); -#endif - -/* =========================================================================== - * Local data - */ - -#define NIL 0 -/* Tail of hash chains */ - -#ifndef TOO_FAR -# define TOO_FAR 4096 -#endif -/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ - -#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - -/* Values for max_lazy_match, good_match and max_chain_length, depending on - * the desired pack level (0..9). The values given below have been tuned to - * exclude worst case performance for pathological files. Better values may be - * found for specific files. - */ -typedef struct config_s { - ush good_length; /* reduce lazy search above this match length */ - ush max_lazy; /* do not perform lazy search above this match length */ - ush nice_length; /* quit search above this match length */ - ush max_chain; - compress_func func; -} config; - -static const config configuration_table[10] = { -/* good lazy nice chain */ -/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ -/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ -/* 2 */ {4, 5, 16, 8, deflate_fast}, -/* 3 */ {4, 6, 32, 32, deflate_fast}, - -/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ -/* 5 */ {8, 16, 32, 32, deflate_slow}, -/* 6 */ {8, 16, 128, 128, deflate_slow}, -/* 7 */ {8, 32, 128, 256, deflate_slow}, -/* 8 */ {32, 128, 258, 1024, deflate_slow}, -/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ - -/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 - * For deflate_fast() (levels <= 3) good is ignored and lazy has a different - * meaning. - */ - -#define EQUAL 0 -/* result of memcmp for equal strings */ - -/* =========================================================================== - * Update a hash value with the given input byte - * IN assertion: all calls to UPDATE_HASH are made with consecutive - * input characters, so that a running hash key can be computed from the - * previous key instead of complete recalculation each time. - */ -#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) - - -/* =========================================================================== - * Insert string str in the dictionary and set match_head to the previous head - * of the hash chain (the most recent string with same hash key). Return - * the previous length of the hash chain. - * IN assertion: all calls to INSERT_STRING are made with consecutive - * input characters and the first MIN_MATCH bytes of str are valid - * (except for the last MIN_MATCH-1 bytes of the input file). - */ -#define INSERT_STRING(s, str, match_head) \ - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ - s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ - s->head[s->ins_h] = (Pos)(str)) - -/* =========================================================================== - * Initialize the hash table (avoiding 64K overflow for 16 bit systems). - * prev[] will be initialized on the fly. - */ -#define CLEAR_HASH(s) \ - s->head[s->hash_size-1] = NIL; \ - memset((char *)s->head, 0, (unsigned)(s->hash_size-1)*sizeof(*s->head)); - -/* ========================================================================= */ -int zlib_deflateInit2( - z_streamp strm, - int level, - int method, - int windowBits, - int memLevel, - int strategy -) -{ - deflate_state *s; - int noheader = 0; - deflate_workspace *mem; - char *next; - - ush *overlay; - /* We overlay pending_buf and d_buf+l_buf. This works since the average - * output size for (length,distance) codes is <= 24 bits. - */ - - if (strm == NULL) return Z_STREAM_ERROR; - - strm->msg = NULL; - - if (level == Z_DEFAULT_COMPRESSION) level = 6; - - mem = (deflate_workspace *) strm->workspace; - - if (windowBits < 0) { /* undocumented feature: suppress zlib header */ - noheader = 1; - windowBits = -windowBits; - } - if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || - windowBits < 9 || windowBits > 15 || level < 0 || level > 9 || - strategy < 0 || strategy > Z_HUFFMAN_ONLY) { - return Z_STREAM_ERROR; - } - - /* - * Direct the workspace's pointers to the chunks that were allocated - * along with the deflate_workspace struct. - */ - next = (char *) mem; - next += sizeof(*mem); - mem->window_memory = (Byte *) next; - next += zlib_deflate_window_memsize(windowBits); - mem->prev_memory = (Pos *) next; - next += zlib_deflate_prev_memsize(windowBits); - mem->head_memory = (Pos *) next; - next += zlib_deflate_head_memsize(memLevel); - mem->overlay_memory = next; - - s = (deflate_state *) &(mem->deflate_memory); - strm->state = (struct internal_state *)s; - s->strm = strm; - - s->noheader = noheader; - s->w_bits = windowBits; - s->w_size = 1 << s->w_bits; - s->w_mask = s->w_size - 1; - - s->hash_bits = memLevel + 7; - s->hash_size = 1 << s->hash_bits; - s->hash_mask = s->hash_size - 1; - s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); - - s->window = (Byte *) mem->window_memory; - s->prev = (Pos *) mem->prev_memory; - s->head = (Pos *) mem->head_memory; - - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - - overlay = (ush *) mem->overlay_memory; - s->pending_buf = (uch *) overlay; - s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); - - s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; - - s->level = level; - s->strategy = strategy; - s->method = (Byte)method; - - return zlib_deflateReset(strm); -} - -/* ========================================================================= */ -int zlib_deflateReset( - z_streamp strm -) -{ - deflate_state *s; - - if (strm == NULL || strm->state == NULL) - return Z_STREAM_ERROR; - - strm->total_in = strm->total_out = 0; - strm->msg = NULL; - strm->data_type = Z_UNKNOWN; - - s = (deflate_state *)strm->state; - s->pending = 0; - s->pending_out = s->pending_buf; - - if (s->noheader < 0) { - s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ - } - s->status = s->noheader ? BUSY_STATE : INIT_STATE; - strm->adler = 1; - s->last_flush = Z_NO_FLUSH; - - zlib_tr_init(s); - lm_init(s); - - return Z_OK; -} - -/* ========================================================================= - * Put a short in the pending buffer. The 16-bit value is put in MSB order. - * IN assertion: the stream state is correct and there is enough room in - * pending_buf. - */ -static void putShortMSB( - deflate_state *s, - uInt b -) -{ - put_byte(s, (Byte)(b >> 8)); - put_byte(s, (Byte)(b & 0xff)); -} - -/* ========================================================================= - * Flush as much pending output as possible. All deflate() output goes - * through this function so some applications may wish to modify it - * to avoid allocating a large strm->next_out buffer and copying into it. - * (See also read_buf()). - */ -static void flush_pending( - z_streamp strm -) -{ - deflate_state *s = (deflate_state *) strm->state; - unsigned len = s->pending; - - if (len > strm->avail_out) len = strm->avail_out; - if (len == 0) return; - - if (strm->next_out != NULL) { - memcpy(strm->next_out, s->pending_out, len); - strm->next_out += len; - } - s->pending_out += len; - strm->total_out += len; - strm->avail_out -= len; - s->pending -= len; - if (s->pending == 0) { - s->pending_out = s->pending_buf; - } -} - -/* ========================================================================= */ -int zlib_deflate( - z_streamp strm, - int flush -) -{ - int old_flush; /* value of flush param for previous deflate call */ - deflate_state *s; - - if (strm == NULL || strm->state == NULL || - flush > Z_FINISH || flush < 0) { - return Z_STREAM_ERROR; - } - s = (deflate_state *) strm->state; - - if ((strm->next_in == NULL && strm->avail_in != 0) || - (s->status == FINISH_STATE && flush != Z_FINISH)) { - return Z_STREAM_ERROR; - } - if (strm->avail_out == 0) return Z_BUF_ERROR; - - s->strm = strm; /* just in case */ - old_flush = s->last_flush; - s->last_flush = flush; - - /* Write the zlib header */ - if (s->status == INIT_STATE) { - - uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; - uInt level_flags = (s->level-1) >> 1; - - if (level_flags > 3) level_flags = 3; - header |= (level_flags << 6); - if (s->strstart != 0) header |= PRESET_DICT; - header += 31 - (header % 31); - - s->status = BUSY_STATE; - putShortMSB(s, header); - - /* Save the adler32 of the preset dictionary: */ - if (s->strstart != 0) { - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - } - strm->adler = 1L; - } - - /* Flush as much pending output as possible */ - if (s->pending != 0) { - flush_pending(strm); - if (strm->avail_out == 0) { - /* Since avail_out is 0, deflate will be called again with - * more output space, but possibly with both pending and - * avail_in equal to zero. There won't be anything to do, - * but this is not an error situation so make sure we - * return OK instead of BUF_ERROR at next call of deflate: - */ - s->last_flush = -1; - return Z_OK; - } - - /* Make sure there is something to do and avoid duplicate consecutive - * flushes. For repeated and useless calls with Z_FINISH, we keep - * returning Z_STREAM_END instead of Z_BUFF_ERROR. - */ - } else if (strm->avail_in == 0 && flush <= old_flush && - flush != Z_FINISH) { - return Z_BUF_ERROR; - } - - /* User must not provide more input after the first FINISH: */ - if (s->status == FINISH_STATE && strm->avail_in != 0) { - return Z_BUF_ERROR; - } - - /* Start a new block or continue the current one. - */ - if (strm->avail_in != 0 || s->lookahead != 0 || - (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { - block_state bstate; - - bstate = (*(configuration_table[s->level].func))(s, flush); - - if (bstate == finish_started || bstate == finish_done) { - s->status = FINISH_STATE; - } - if (bstate == need_more || bstate == finish_started) { - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ - } - return Z_OK; - /* If flush != Z_NO_FLUSH && avail_out == 0, the next call - * of deflate should use the same flush parameter to make sure - * that the flush is complete. So we don't have to output an - * empty block here, this will be done at next call. This also - * ensures that for a very small output buffer, we emit at most - * one empty block. - */ - } - if (bstate == block_done) { - if (flush == Z_PARTIAL_FLUSH) { - zlib_tr_align(s); - } else if (flush == Z_PACKET_FLUSH) { - /* Output just the 3-bit `stored' block type value, - but not a zero length. */ - zlib_tr_stored_type_only(s); - } else { /* FULL_FLUSH or SYNC_FLUSH */ - zlib_tr_stored_block(s, (char*)0, 0L, 0); - /* For a full flush, this empty block will be recognized - * as a special marker by inflate_sync(). - */ - if (flush == Z_FULL_FLUSH) { - CLEAR_HASH(s); /* forget history */ - } - } - flush_pending(strm); - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ - return Z_OK; - } - } - } - Assert(strm->avail_out > 0, "bug2"); - - if (flush != Z_FINISH) return Z_OK; - if (s->noheader) return Z_STREAM_END; - - /* Write the zlib trailer (adler32) */ - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - flush_pending(strm); - /* If avail_out is zero, the application will call deflate again - * to flush the rest. - */ - s->noheader = -1; /* write the trailer only once! */ - return s->pending != 0 ? Z_OK : Z_STREAM_END; -} - -/* ========================================================================= */ -int zlib_deflateEnd( - z_streamp strm -) -{ - int status; - deflate_state *s; - - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - s = (deflate_state *) strm->state; - - status = s->status; - if (status != INIT_STATE && status != BUSY_STATE && - status != FINISH_STATE) { - return Z_STREAM_ERROR; - } - - strm->state = NULL; - - return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; -} - -/* =========================================================================== - * Read a new buffer from the current input stream, update the adler32 - * and total number of bytes read. All deflate() input goes through - * this function so some applications may wish to modify it to avoid - * allocating a large strm->next_in buffer and copying from it. - * (See also flush_pending()). - */ -static int read_buf( - z_streamp strm, - Byte *buf, - unsigned size -) -{ - unsigned len = strm->avail_in; - - if (len > size) len = size; - if (len == 0) return 0; - - strm->avail_in -= len; - - if (!((deflate_state *)(strm->state))->noheader) { - strm->adler = zlib_adler32(strm->adler, strm->next_in, len); - } - memcpy(buf, strm->next_in, len); - strm->next_in += len; - strm->total_in += len; - - return (int)len; -} - -/* =========================================================================== - * Initialize the "longest match" routines for a new zlib stream - */ -static void lm_init( - deflate_state *s -) -{ - s->window_size = (ulg)2L*s->w_size; - - CLEAR_HASH(s); - - /* Set the default configuration parameters: - */ - s->max_lazy_match = configuration_table[s->level].max_lazy; - s->good_match = configuration_table[s->level].good_length; - s->nice_match = configuration_table[s->level].nice_length; - s->max_chain_length = configuration_table[s->level].max_chain; - - s->strstart = 0; - s->block_start = 0L; - s->lookahead = 0; - s->match_length = s->prev_length = MIN_MATCH-1; - s->match_available = 0; - s->ins_h = 0; -} - -/* =========================================================================== - * Set match_start to the longest match starting at the given string and - * return its length. Matches shorter or equal to prev_length are discarded, - * in which case the result is equal to prev_length and match_start is - * garbage. - * IN assertions: cur_match is the head of the hash chain for the current - * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 - * OUT assertion: the match length is not greater than s->lookahead. - */ -/* For 80x86 and 680x0, an optimized version will be provided in match.asm or - * match.S. The code will be functionally equivalent. - */ -static uInt longest_match( - deflate_state *s, - IPos cur_match /* current match */ -) -{ - unsigned chain_length = s->max_chain_length;/* max hash chain length */ - register Byte *scan = s->window + s->strstart; /* current string */ - register Byte *match; /* matched string */ - register int len; /* length of current match */ - int best_len = s->prev_length; /* best match length so far */ - int nice_match = s->nice_match; /* stop if match long enough */ - IPos limit = s->strstart > (IPos)MAX_DIST(s) ? - s->strstart - (IPos)MAX_DIST(s) : NIL; - /* Stop when cur_match becomes <= limit. To simplify the code, - * we prevent matches with the string of window index 0. - */ - Pos *prev = s->prev; - uInt wmask = s->w_mask; - -#ifdef UNALIGNED_OK - /* Compare two bytes at a time. Note: this is not always beneficial. - * Try with and without -DUNALIGNED_OK to check. - */ - register Byte *strend = s->window + s->strstart + MAX_MATCH - 1; - register ush scan_start = *(ush*)scan; - register ush scan_end = *(ush*)(scan+best_len-1); -#else - register Byte *strend = s->window + s->strstart + MAX_MATCH; - register Byte scan_end1 = scan[best_len-1]; - register Byte scan_end = scan[best_len]; -#endif - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - /* Do not waste too much time if we already have a good match: */ - if (s->prev_length >= s->good_match) { - chain_length >>= 2; - } - /* Do not look for matches beyond the end of the input. This is necessary - * to make deflate deterministic. - */ - if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - do { - Assert(cur_match < s->strstart, "no future"); - match = s->window + cur_match; - - /* Skip to next match if the match length cannot increase - * or if the match length is less than 2: - */ -#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) - /* This code assumes sizeof(unsigned short) == 2. Do not use - * UNALIGNED_OK if your compiler uses a different size. - */ - if (*(ush*)(match+best_len-1) != scan_end || - *(ush*)match != scan_start) continue; - - /* It is not necessary to compare scan[2] and match[2] since they are - * always equal when the other bytes match, given that the hash keys - * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at - * strstart+3, +5, ... up to strstart+257. We check for insufficient - * lookahead only every 4th comparison; the 128th check will be made - * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is - * necessary to put more guard bytes at the end of the window, or - * to check more often for insufficient lookahead. - */ - Assert(scan[2] == match[2], "scan[2]?"); - scan++, match++; - do { - } while (*(ush*)(scan+=2) == *(ush*)(match+=2) && - *(ush*)(scan+=2) == *(ush*)(match+=2) && - *(ush*)(scan+=2) == *(ush*)(match+=2) && - *(ush*)(scan+=2) == *(ush*)(match+=2) && - scan < strend); - /* The funny "do {}" generates better code on most compilers */ - - /* Here, scan <= window+strstart+257 */ - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - if (*scan == *match) scan++; - - len = (MAX_MATCH - 1) - (int)(strend-scan); - scan = strend - (MAX_MATCH-1); - -#else /* UNALIGNED_OK */ - - if (match[best_len] != scan_end || - match[best_len-1] != scan_end1 || - *match != *scan || - *++match != scan[1]) continue; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match++; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - scan = strend - MAX_MATCH; - -#endif /* UNALIGNED_OK */ - - if (len > best_len) { - s->match_start = cur_match; - best_len = len; - if (len >= nice_match) break; -#ifdef UNALIGNED_OK - scan_end = *(ush*)(scan+best_len-1); -#else - scan_end1 = scan[best_len-1]; - scan_end = scan[best_len]; -#endif - } - } while ((cur_match = prev[cur_match & wmask]) > limit - && --chain_length != 0); - - if ((uInt)best_len <= s->lookahead) return best_len; - return s->lookahead; -} - -#ifdef DEBUG_ZLIB -/* =========================================================================== - * Check that the match at match_start is indeed a match. - */ -static void check_match( - deflate_state *s, - IPos start, - IPos match, - int length -) -{ - /* check that the match is indeed a match */ - if (memcmp((char *)s->window + match, - (char *)s->window + start, length) != EQUAL) { - fprintf(stderr, " start %u, match %u, length %d\n", - start, match, length); - do { - fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); - } while (--length != 0); - z_error("invalid match"); - } - if (z_verbose > 1) { - fprintf(stderr,"\\[%d,%d]", start-match, length); - do { putc(s->window[start++], stderr); } while (--length != 0); - } -} -#else -# define check_match(s, start, match, length) -#endif - -/* =========================================================================== - * Fill the window when the lookahead becomes insufficient. - * Updates strstart and lookahead. - * - * IN assertion: lookahead < MIN_LOOKAHEAD - * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD - * At least one byte has been read, or avail_in == 0; reads are - * performed for at least two bytes (required for the zip translate_eol - * option -- not supported here). - */ -static void fill_window( - deflate_state *s -) -{ - register unsigned n, m; - register Pos *p; - unsigned more; /* Amount of free space at the end of the window. */ - uInt wsize = s->w_size; - - do { - more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); - - /* Deal with !@#$% 64K limit: */ - if (more == 0 && s->strstart == 0 && s->lookahead == 0) { - more = wsize; - - } else if (more == (unsigned)(-1)) { - /* Very unlikely, but possible on 16 bit machine if strstart == 0 - * and lookahead == 1 (input done one byte at time) - */ - more--; - - /* If the window is almost full and there is insufficient lookahead, - * move the upper half to the lower one to make room in the upper half. - */ - } else if (s->strstart >= wsize+MAX_DIST(s)) { - - memcpy((char *)s->window, (char *)s->window+wsize, - (unsigned)wsize); - s->match_start -= wsize; - s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ - s->block_start -= (long) wsize; - - /* Slide the hash table (could be avoided with 32 bit values - at the expense of memory usage). We slide even when level == 0 - to keep the hash table consistent if we switch back to level > 0 - later. (Using level 0 permanently is not an optimal usage of - zlib, so we don't care about this pathological case.) - */ - n = s->hash_size; - p = &s->head[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m-wsize : NIL); - } while (--n); - - n = wsize; - p = &s->prev[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m-wsize : NIL); - /* If n is not on any hash chain, prev[n] is garbage but - * its value will never be used. - */ - } while (--n); - more += wsize; - } - if (s->strm->avail_in == 0) return; - - /* If there was no sliding: - * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && - * more == window_size - lookahead - strstart - * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) - * => more >= window_size - 2*WSIZE + 2 - * In the BIG_MEM or MMAP case (not yet supported), - * window_size == input_size + MIN_LOOKAHEAD && - * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. - * Otherwise, window_size == 2*WSIZE so more >= 2. - * If there was sliding, more >= WSIZE. So in all cases, more >= 2. - */ - Assert(more >= 2, "more < 2"); - - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); - s->lookahead += n; - - /* Initialize the hash value now that we have some input: */ - if (s->lookahead >= MIN_MATCH) { - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - } - /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, - * but this is not important since only literal bytes will be emitted. - */ - - } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); -} - -/* =========================================================================== - * Flush the current block, with given end-of-file flag. - * IN assertion: strstart is set to the end of the current match. - */ -#define FLUSH_BLOCK_ONLY(s, eof) { \ - zlib_tr_flush_block(s, (s->block_start >= 0L ? \ - (char *)&s->window[(unsigned)s->block_start] : \ - NULL), \ - (ulg)((long)s->strstart - s->block_start), \ - (eof)); \ - s->block_start = s->strstart; \ - flush_pending(s->strm); \ - Tracev((stderr,"[FLUSH]")); \ -} - -/* Same but force premature exit if necessary. */ -#define FLUSH_BLOCK(s, eof) { \ - FLUSH_BLOCK_ONLY(s, eof); \ - if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ -} - -/* =========================================================================== - * Copy without compression as much as possible from the input stream, return - * the current block state. - * This function does not insert new strings in the dictionary since - * uncompressible data is probably not useful. This function is used - * only for the level=0 compression option. - * NOTE: this function should be optimized to avoid extra copying from - * window to pending_buf. - */ -static block_state deflate_stored( - deflate_state *s, - int flush -) -{ - /* Stored blocks are limited to 0xffff bytes, pending_buf is limited - * to pending_buf_size, and each stored block has a 5 byte header: - */ - ulg max_block_size = 0xffff; - ulg max_start; - - if (max_block_size > s->pending_buf_size - 5) { - max_block_size = s->pending_buf_size - 5; - } - - /* Copy as much as possible from input to output: */ - for (;;) { - /* Fill the window as much as possible: */ - if (s->lookahead <= 1) { - - Assert(s->strstart < s->w_size+MAX_DIST(s) || - s->block_start >= (long)s->w_size, "slide too late"); - - fill_window(s); - if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; - - if (s->lookahead == 0) break; /* flush the current block */ - } - Assert(s->block_start >= 0L, "block gone"); - - s->strstart += s->lookahead; - s->lookahead = 0; - - /* Emit a stored block if pending_buf will be full: */ - max_start = s->block_start + max_block_size; - if (s->strstart == 0 || (ulg)s->strstart >= max_start) { - /* strstart == 0 is possible when wraparound on 16-bit machine */ - s->lookahead = (uInt)(s->strstart - max_start); - s->strstart = (uInt)max_start; - FLUSH_BLOCK(s, 0); - } - /* Flush if we may have to slide, otherwise block_start may become - * negative and the data will be gone: - */ - if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { - FLUSH_BLOCK(s, 0); - } - } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; -} - -/* =========================================================================== - * Compress as much as possible from the input stream, return the current - * block state. - * This function does not perform lazy evaluation of matches and inserts - * new strings in the dictionary only for unmatched strings or for short - * matches. It is used only for the fast compression options. - */ -static block_state deflate_fast( - deflate_state *s, - int flush -) -{ - IPos hash_head = NIL; /* head of the hash chain */ - int bflush; /* set if current block must be flushed */ - - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - * At this point we have always match_length < MIN_MATCH - */ - if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - if (s->strategy != Z_HUFFMAN_ONLY) { - s->match_length = longest_match (s, hash_head); - } - /* longest_match() sets match_start */ - } - if (s->match_length >= MIN_MATCH) { - check_match(s, s->strstart, s->match_start, s->match_length); - - bflush = zlib_tr_tally(s, s->strstart - s->match_start, - s->match_length - MIN_MATCH); - - s->lookahead -= s->match_length; - - /* Insert new strings in the hash table only if the match length - * is not too large. This saves time but degrades compression. - */ - if (s->match_length <= s->max_insert_length && - s->lookahead >= MIN_MATCH) { - s->match_length--; /* string at strstart already in hash table */ - do { - s->strstart++; - INSERT_STRING(s, s->strstart, hash_head); - /* strstart never exceeds WSIZE-MAX_MATCH, so there are - * always MIN_MATCH bytes ahead. - */ - } while (--s->match_length != 0); - s->strstart++; - } else { - s->strstart += s->match_length; - s->match_length = 0; - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not - * matter since it will be recomputed at next deflate call. - */ - } - } else { - /* No match, output a literal byte */ - Tracevv((stderr,"%c", s->window[s->strstart])); - bflush = zlib_tr_tally (s, 0, s->window[s->strstart]); - s->lookahead--; - s->strstart++; - } - if (bflush) FLUSH_BLOCK(s, 0); - } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; -} - -/* =========================================================================== - * Same as above, but achieves better compression. We use a lazy - * evaluation for matches: a match is finally adopted only if there is - * no better match at the next window position. - */ -static block_state deflate_slow( - deflate_state *s, - int flush -) -{ - IPos hash_head = NIL; /* head of hash chain */ - int bflush; /* set if current block must be flushed */ - - /* Process the input block. */ - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - */ - s->prev_length = s->match_length, s->prev_match = s->match_start; - s->match_length = MIN_MATCH-1; - - if (hash_head != NIL && s->prev_length < s->max_lazy_match && - s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - if (s->strategy != Z_HUFFMAN_ONLY) { - s->match_length = longest_match (s, hash_head); - } - /* longest_match() sets match_start */ - - if (s->match_length <= 5 && (s->strategy == Z_FILTERED || - (s->match_length == MIN_MATCH && - s->strstart - s->match_start > TOO_FAR))) { - - /* If prev_match is also MIN_MATCH, match_start is garbage - * but we will ignore the current match anyway. - */ - s->match_length = MIN_MATCH-1; - } - } - /* If there was a match at the previous step and the current - * match is not better, output the previous match: - */ - if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { - uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; - /* Do not insert strings in hash table beyond this. */ - - check_match(s, s->strstart-1, s->prev_match, s->prev_length); - - bflush = zlib_tr_tally(s, s->strstart -1 - s->prev_match, - s->prev_length - MIN_MATCH); - - /* Insert in hash table all strings up to the end of the match. - * strstart-1 and strstart are already inserted. If there is not - * enough lookahead, the last two strings are not inserted in - * the hash table. - */ - s->lookahead -= s->prev_length-1; - s->prev_length -= 2; - do { - if (++s->strstart <= max_insert) { - INSERT_STRING(s, s->strstart, hash_head); - } - } while (--s->prev_length != 0); - s->match_available = 0; - s->match_length = MIN_MATCH-1; - s->strstart++; - - if (bflush) FLUSH_BLOCK(s, 0); - - } else if (s->match_available) { - /* If there was no match at the previous position, output a - * single literal. If there was a match but the current match - * is longer, truncate the previous match to a single literal. - */ - Tracevv((stderr,"%c", s->window[s->strstart-1])); - if (zlib_tr_tally (s, 0, s->window[s->strstart-1])) { - FLUSH_BLOCK_ONLY(s, 0); - } - s->strstart++; - s->lookahead--; - if (s->strm->avail_out == 0) return need_more; - } else { - /* There is no previous match to compare with, wait for - * the next step to decide. - */ - s->match_available = 1; - s->strstart++; - s->lookahead--; - } - } - Assert (flush != Z_NO_FLUSH, "no flush?"); - if (s->match_available) { - Tracevv((stderr,"%c", s->window[s->strstart-1])); - zlib_tr_tally (s, 0, s->window[s->strstart-1]); - s->match_available = 0; - } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; -} - -int zlib_deflate_workspacesize(int windowBits, int memLevel) -{ - if (windowBits < 0) /* undocumented feature: suppress zlib header */ - windowBits = -windowBits; - - /* Since the return value is typically passed to vmalloc() unchecked... */ - BUG_ON(memLevel < 1 || memLevel > MAX_MEM_LEVEL || windowBits < 9 || - windowBits > 15); - - return sizeof(deflate_workspace) - + zlib_deflate_window_memsize(windowBits) - + zlib_deflate_prev_memsize(windowBits) - + zlib_deflate_head_memsize(memLevel) - + zlib_deflate_overlay_memsize(memLevel); -} diff --git a/linux/zlib_deflate/deftree.c b/linux/zlib_deflate/deftree.c deleted file mode 100644 index 9b1756b..0000000 --- a/linux/zlib_deflate/deftree.c +++ /dev/null @@ -1,1113 +0,0 @@ -/* +++ trees.c */ -/* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-1996 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process uses several Huffman trees. The more - * common source values are represented by shorter bit sequences. - * - * Each code tree is stored in a compressed form which is itself - * a Huffman encoding of the lengths of all the code strings (in - * ascending order by source values). The actual code strings are - * reconstructed from the lengths in the inflate process, as described - * in the deflate specification. - * - * REFERENCES - * - * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". - * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc - * - * Storer, James A. - * Data Compression: Methods and Theory, pp. 49-50. - * Computer Science Press, 1988. ISBN 0-7167-8156-5. - * - * Sedgewick, R. - * Algorithms, p290. - * Addison-Wesley, 1983. ISBN 0-201-06672-6. - */ - -/* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */ - -/* #include "deflate.h" */ - -#include -#include -#include "defutil.h" - -#ifdef DEBUG_ZLIB -# include -#endif - -/* =========================================================================== - * Constants - */ - -#define MAX_BL_BITS 7 -/* Bit length codes must not exceed MAX_BL_BITS bits */ - -#define END_BLOCK 256 -/* end of block literal code */ - -#define REP_3_6 16 -/* repeat previous bit length 3-6 times (2 bits of repeat count) */ - -#define REPZ_3_10 17 -/* repeat a zero length 3-10 times (3 bits of repeat count) */ - -#define REPZ_11_138 18 -/* repeat a zero length 11-138 times (7 bits of repeat count) */ - -static const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ - = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; - -static const int extra_dbits[D_CODES] /* extra bits for each distance code */ - = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; - -static const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ - = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; - -static const uch bl_order[BL_CODES] - = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; -/* The lengths of the bit length codes are sent in order of decreasing - * probability, to avoid transmitting the lengths for unused bit length codes. - */ - -#define Buf_size (8 * 2*sizeof(char)) -/* Number of bits used within bi_buf. (bi_buf might be implemented on - * more than 16 bits on some systems.) - */ - -/* =========================================================================== - * Local data. These are initialized only once. - */ - -static ct_data static_ltree[L_CODES+2]; -/* The static literal tree. Since the bit lengths are imposed, there is no - * need for the L_CODES extra codes used during heap construction. However - * The codes 286 and 287 are needed to build a canonical tree (see zlib_tr_init - * below). - */ - -static ct_data static_dtree[D_CODES]; -/* The static distance tree. (Actually a trivial tree since all codes use - * 5 bits.) - */ - -static uch dist_code[512]; -/* distance codes. The first 256 values correspond to the distances - * 3 .. 258, the last 256 values correspond to the top 8 bits of - * the 15 bit distances. - */ - -static uch length_code[MAX_MATCH-MIN_MATCH+1]; -/* length code for each normalized match length (0 == MIN_MATCH) */ - -static int base_length[LENGTH_CODES]; -/* First normalized length for each code (0 = MIN_MATCH) */ - -static int base_dist[D_CODES]; -/* First normalized distance for each code (0 = distance of 1) */ - -struct static_tree_desc_s { - const ct_data *static_tree; /* static tree or NULL */ - const int *extra_bits; /* extra bits for each code or NULL */ - int extra_base; /* base index for extra_bits */ - int elems; /* max number of elements in the tree */ - int max_length; /* max bit length for the codes */ -}; - -static static_tree_desc static_l_desc = -{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; - -static static_tree_desc static_d_desc = -{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; - -static static_tree_desc static_bl_desc = -{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; - -/* =========================================================================== - * Local (static) routines in this file. - */ - -static void tr_static_init (void); -static void init_block (deflate_state *s); -static void pqdownheap (deflate_state *s, ct_data *tree, int k); -static void gen_bitlen (deflate_state *s, tree_desc *desc); -static void gen_codes (ct_data *tree, int max_code, ush *bl_count); -static void build_tree (deflate_state *s, tree_desc *desc); -static void scan_tree (deflate_state *s, ct_data *tree, int max_code); -static void send_tree (deflate_state *s, ct_data *tree, int max_code); -static int build_bl_tree (deflate_state *s); -static void send_all_trees (deflate_state *s, int lcodes, int dcodes, - int blcodes); -static void compress_block (deflate_state *s, ct_data *ltree, - ct_data *dtree); -static void set_data_type (deflate_state *s); -static void bi_windup (deflate_state *s); -static void bi_flush (deflate_state *s); -static void copy_block (deflate_state *s, char *buf, unsigned len, - int header); - -#ifndef DEBUG_ZLIB -# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) - /* Send a code of the given tree. c and tree must not have side effects */ - -#else /* DEBUG_ZLIB */ -# define send_code(s, c, tree) \ - { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ - send_bits(s, tree[c].Code, tree[c].Len); } -#endif - -#define d_code(dist) \ - ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)]) -/* Mapping from a distance to a distance code. dist is the distance - 1 and - * must not have side effects. dist_code[256] and dist_code[257] are never - * used. - */ - -/* =========================================================================== - * Send a value on a given number of bits. - * IN assertion: length <= 16 and value fits in length bits. - */ -#ifdef DEBUG_ZLIB -static void send_bits (deflate_state *s, int value, int length); - -static void send_bits( - deflate_state *s, - int value, /* value to send */ - int length /* number of bits */ -) -{ - Tracevv((stderr," l %2d v %4x ", length, value)); - Assert(length > 0 && length <= 15, "invalid length"); - s->bits_sent += (ulg)length; - - /* If not enough room in bi_buf, use (valid) bits from bi_buf and - * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) - * unused bits in value. - */ - if (s->bi_valid > (int)Buf_size - length) { - s->bi_buf |= (value << s->bi_valid); - put_short(s, s->bi_buf); - s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); - s->bi_valid += length - Buf_size; - } else { - s->bi_buf |= value << s->bi_valid; - s->bi_valid += length; - } -} -#else /* !DEBUG_ZLIB */ - -#define send_bits(s, value, length) \ -{ int len = length;\ - if (s->bi_valid > (int)Buf_size - len) {\ - int val = value;\ - s->bi_buf |= (val << s->bi_valid);\ - put_short(s, s->bi_buf);\ - s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ - s->bi_valid += len - Buf_size;\ - } else {\ - s->bi_buf |= (value) << s->bi_valid;\ - s->bi_valid += len;\ - }\ -} -#endif /* DEBUG_ZLIB */ - -/* =========================================================================== - * Initialize the various 'constant' tables. In a multi-threaded environment, - * this function may be called by two threads concurrently, but this is - * harmless since both invocations do exactly the same thing. - */ -static void tr_static_init(void) -{ - static int static_init_done; - int n; /* iterates over tree elements */ - int bits; /* bit counter */ - int length; /* length value */ - int code; /* code value */ - int dist; /* distance index */ - ush bl_count[MAX_BITS+1]; - /* number of codes at each bit length for an optimal tree */ - - if (static_init_done) return; - - /* Initialize the mapping length (0..255) -> length code (0..28) */ - length = 0; - for (code = 0; code < LENGTH_CODES-1; code++) { - base_length[code] = length; - for (n = 0; n < (1< dist code (0..29) */ - dist = 0; - for (code = 0 ; code < 16; code++) { - base_dist[code] = dist; - for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ - for ( ; code < D_CODES; code++) { - base_dist[code] = dist << 7; - for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { - dist_code[256 + dist++] = (uch)code; - } - } - Assert (dist == 256, "tr_static_init: 256+dist != 512"); - - /* Construct the codes of the static literal tree */ - for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; - n = 0; - while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; - while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; - while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; - while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; - /* Codes 286 and 287 do not exist, but we must include them in the - * tree construction to get a canonical Huffman tree (longest code - * all ones) - */ - gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); - - /* The static distance tree is trivial: */ - for (n = 0; n < D_CODES; n++) { - static_dtree[n].Len = 5; - static_dtree[n].Code = bitrev32((u32)n) >> (32 - 5); - } - static_init_done = 1; -} - -/* =========================================================================== - * Initialize the tree data structures for a new zlib stream. - */ -void zlib_tr_init( - deflate_state *s -) -{ - tr_static_init(); - - s->compressed_len = 0L; - - s->l_desc.dyn_tree = s->dyn_ltree; - s->l_desc.stat_desc = &static_l_desc; - - s->d_desc.dyn_tree = s->dyn_dtree; - s->d_desc.stat_desc = &static_d_desc; - - s->bl_desc.dyn_tree = s->bl_tree; - s->bl_desc.stat_desc = &static_bl_desc; - - s->bi_buf = 0; - s->bi_valid = 0; - s->last_eob_len = 8; /* enough lookahead for inflate */ -#ifdef DEBUG_ZLIB - s->bits_sent = 0L; -#endif - - /* Initialize the first block of the first file: */ - init_block(s); -} - -/* =========================================================================== - * Initialize a new block. - */ -static void init_block( - deflate_state *s -) -{ - int n; /* iterates over tree elements */ - - /* Initialize the trees. */ - for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; - for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; - for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; - - s->dyn_ltree[END_BLOCK].Freq = 1; - s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; -} - -#define SMALLEST 1 -/* Index within the heap array of least frequent node in the Huffman tree */ - - -/* =========================================================================== - * Remove the smallest element from the heap and recreate the heap with - * one less element. Updates heap and heap_len. - */ -#define pqremove(s, tree, top) \ -{\ - top = s->heap[SMALLEST]; \ - s->heap[SMALLEST] = s->heap[s->heap_len--]; \ - pqdownheap(s, tree, SMALLEST); \ -} - -/* =========================================================================== - * Compares to subtrees, using the tree depth as tie breaker when - * the subtrees have equal frequency. This minimizes the worst case length. - */ -#define smaller(tree, n, m, depth) \ - (tree[n].Freq < tree[m].Freq || \ - (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) - -/* =========================================================================== - * Restore the heap property by moving down the tree starting at node k, - * exchanging a node with the smallest of its two sons if necessary, stopping - * when the heap property is re-established (each father smaller than its - * two sons). - */ -static void pqdownheap( - deflate_state *s, - ct_data *tree, /* the tree to restore */ - int k /* node to move down */ -) -{ - int v = s->heap[k]; - int j = k << 1; /* left son of k */ - while (j <= s->heap_len) { - /* Set j to the smallest of the two sons: */ - if (j < s->heap_len && - smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { - j++; - } - /* Exit if v is smaller than both sons */ - if (smaller(tree, v, s->heap[j], s->depth)) break; - - /* Exchange v with the smallest son */ - s->heap[k] = s->heap[j]; k = j; - - /* And continue down the tree, setting j to the left son of k */ - j <<= 1; - } - s->heap[k] = v; -} - -/* =========================================================================== - * Compute the optimal bit lengths for a tree and update the total bit length - * for the current block. - * IN assertion: the fields freq and dad are set, heap[heap_max] and - * above are the tree nodes sorted by increasing frequency. - * OUT assertions: the field len is set to the optimal bit length, the - * array bl_count contains the frequencies for each bit length. - * The length opt_len is updated; static_len is also updated if stree is - * not null. - */ -static void gen_bitlen( - deflate_state *s, - tree_desc *desc /* the tree descriptor */ -) -{ - ct_data *tree = desc->dyn_tree; - int max_code = desc->max_code; - const ct_data *stree = desc->stat_desc->static_tree; - const int *extra = desc->stat_desc->extra_bits; - int base = desc->stat_desc->extra_base; - int max_length = desc->stat_desc->max_length; - int h; /* heap index */ - int n, m; /* iterate over the tree elements */ - int bits; /* bit length */ - int xbits; /* extra bits */ - ush f; /* frequency */ - int overflow = 0; /* number of elements with bit length too large */ - - for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; - - /* In a first pass, compute the optimal bit lengths (which may - * overflow in the case of the bit length tree). - */ - tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ - - for (h = s->heap_max+1; h < HEAP_SIZE; h++) { - n = s->heap[h]; - bits = tree[tree[n].Dad].Len + 1; - if (bits > max_length) bits = max_length, overflow++; - tree[n].Len = (ush)bits; - /* We overwrite tree[n].Dad which is no longer needed */ - - if (n > max_code) continue; /* not a leaf node */ - - s->bl_count[bits]++; - xbits = 0; - if (n >= base) xbits = extra[n-base]; - f = tree[n].Freq; - s->opt_len += (ulg)f * (bits + xbits); - if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); - } - if (overflow == 0) return; - - Trace((stderr,"\nbit length overflow\n")); - /* This happens for example on obj2 and pic of the Calgary corpus */ - - /* Find the first bit length which could increase: */ - do { - bits = max_length-1; - while (s->bl_count[bits] == 0) bits--; - s->bl_count[bits]--; /* move one leaf down the tree */ - s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ - s->bl_count[max_length]--; - /* The brother of the overflow item also moves one step up, - * but this does not affect bl_count[max_length] - */ - overflow -= 2; - } while (overflow > 0); - - /* Now recompute all bit lengths, scanning in increasing frequency. - * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all - * lengths instead of fixing only the wrong ones. This idea is taken - * from 'ar' written by Haruhiko Okumura.) - */ - for (bits = max_length; bits != 0; bits--) { - n = s->bl_count[bits]; - while (n != 0) { - m = s->heap[--h]; - if (m > max_code) continue; - if (tree[m].Len != (unsigned) bits) { - Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); - s->opt_len += ((long)bits - (long)tree[m].Len) - *(long)tree[m].Freq; - tree[m].Len = (ush)bits; - } - n--; - } - } -} - -/* =========================================================================== - * Generate the codes for a given tree and bit counts (which need not be - * optimal). - * IN assertion: the array bl_count contains the bit length statistics for - * the given tree and the field len is set for all tree elements. - * OUT assertion: the field code is set for all tree elements of non - * zero code length. - */ -static void gen_codes( - ct_data *tree, /* the tree to decorate */ - int max_code, /* largest code with non zero frequency */ - ush *bl_count /* number of codes at each bit length */ -) -{ - ush next_code[MAX_BITS+1]; /* next code value for each bit length */ - ush code = 0; /* running code value */ - int bits; /* bit index */ - int n; /* code index */ - - /* The distribution counts are first used to generate the code values - * without bit reversal. - */ - for (bits = 1; bits <= MAX_BITS; bits++) { - next_code[bits] = code = (code + bl_count[bits-1]) << 1; - } - /* Check that the bit counts in bl_count are consistent. The last code - * must be all ones. - */ - Assert (code + bl_count[MAX_BITS]-1 == (1<> (32 - len); - - Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", - n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); - } -} - -/* =========================================================================== - * Construct one Huffman tree and assigns the code bit strings and lengths. - * Update the total bit length for the current block. - * IN assertion: the field freq is set for all tree elements. - * OUT assertions: the fields len and code are set to the optimal bit length - * and corresponding code. The length opt_len is updated; static_len is - * also updated if stree is not null. The field max_code is set. - */ -static void build_tree( - deflate_state *s, - tree_desc *desc /* the tree descriptor */ -) -{ - ct_data *tree = desc->dyn_tree; - const ct_data *stree = desc->stat_desc->static_tree; - int elems = desc->stat_desc->elems; - int n, m; /* iterate over heap elements */ - int max_code = -1; /* largest code with non zero frequency */ - int node; /* new node being created */ - - /* Construct the initial heap, with least frequent element in - * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. - * heap[0] is not used. - */ - s->heap_len = 0, s->heap_max = HEAP_SIZE; - - for (n = 0; n < elems; n++) { - if (tree[n].Freq != 0) { - s->heap[++(s->heap_len)] = max_code = n; - s->depth[n] = 0; - } else { - tree[n].Len = 0; - } - } - - /* The pkzip format requires that at least one distance code exists, - * and that at least one bit should be sent even if there is only one - * possible code. So to avoid special checks later on we force at least - * two codes of non zero frequency. - */ - while (s->heap_len < 2) { - node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); - tree[node].Freq = 1; - s->depth[node] = 0; - s->opt_len--; if (stree) s->static_len -= stree[node].Len; - /* node is 0 or 1 so it does not have extra bits */ - } - desc->max_code = max_code; - - /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, - * establish sub-heaps of increasing lengths: - */ - for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); - - /* Construct the Huffman tree by repeatedly combining the least two - * frequent nodes. - */ - node = elems; /* next internal node of the tree */ - do { - pqremove(s, tree, n); /* n = node of least frequency */ - m = s->heap[SMALLEST]; /* m = node of next least frequency */ - - s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ - s->heap[--(s->heap_max)] = m; - - /* Create a new node father of n and m */ - tree[node].Freq = tree[n].Freq + tree[m].Freq; - s->depth[node] = (uch) (max(s->depth[n], s->depth[m]) + 1); - tree[n].Dad = tree[m].Dad = (ush)node; -#ifdef DUMP_BL_TREE - if (tree == s->bl_tree) { - fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", - node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); - } -#endif - /* and insert the new node in the heap */ - s->heap[SMALLEST] = node++; - pqdownheap(s, tree, SMALLEST); - - } while (s->heap_len >= 2); - - s->heap[--(s->heap_max)] = s->heap[SMALLEST]; - - /* At this point, the fields freq and dad are set. We can now - * generate the bit lengths. - */ - gen_bitlen(s, (tree_desc *)desc); - - /* The field len is now set, we can generate the bit codes */ - gen_codes ((ct_data *)tree, max_code, s->bl_count); -} - -/* =========================================================================== - * Scan a literal or distance tree to determine the frequencies of the codes - * in the bit length tree. - */ -static void scan_tree( - deflate_state *s, - ct_data *tree, /* the tree to be scanned */ - int max_code /* and its largest code of non zero frequency */ -) -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - if (nextlen == 0) max_count = 138, min_count = 3; - tree[max_code+1].Len = (ush)0xffff; /* guard */ - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - s->bl_tree[curlen].Freq += count; - } else if (curlen != 0) { - if (curlen != prevlen) s->bl_tree[curlen].Freq++; - s->bl_tree[REP_3_6].Freq++; - } else if (count <= 10) { - s->bl_tree[REPZ_3_10].Freq++; - } else { - s->bl_tree[REPZ_11_138].Freq++; - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Send a literal or distance tree in compressed form, using the codes in - * bl_tree. - */ -static void send_tree( - deflate_state *s, - ct_data *tree, /* the tree to be scanned */ - int max_code /* and its largest code of non zero frequency */ -) -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - /* tree[max_code+1].Len = -1; */ /* guard already set */ - if (nextlen == 0) max_count = 138, min_count = 3; - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - do { send_code(s, curlen, s->bl_tree); } while (--count != 0); - - } else if (curlen != 0) { - if (curlen != prevlen) { - send_code(s, curlen, s->bl_tree); count--; - } - Assert(count >= 3 && count <= 6, " 3_6?"); - send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); - - } else if (count <= 10) { - send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); - - } else { - send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Construct the Huffman tree for the bit lengths and return the index in - * bl_order of the last bit length code to send. - */ -static int build_bl_tree( - deflate_state *s -) -{ - int max_blindex; /* index of last bit length code of non zero freq */ - - /* Determine the bit length frequencies for literal and distance trees */ - scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); - scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); - - /* Build the bit length tree: */ - build_tree(s, (tree_desc *)(&(s->bl_desc))); - /* opt_len now includes the length of the tree representations, except - * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. - */ - - /* Determine the number of bit length codes to send. The pkzip format - * requires that at least 4 bit length codes be sent. (appnote.txt says - * 3 but the actual value used is 4.) - */ - for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { - if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; - } - /* Update opt_len to include the bit length tree and counts */ - s->opt_len += 3*(max_blindex+1) + 5+5+4; - Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", - s->opt_len, s->static_len)); - - return max_blindex; -} - -/* =========================================================================== - * Send the header for a block using dynamic Huffman trees: the counts, the - * lengths of the bit length codes, the literal tree and the distance tree. - * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. - */ -static void send_all_trees( - deflate_state *s, - int lcodes, /* number of codes for each tree */ - int dcodes, /* number of codes for each tree */ - int blcodes /* number of codes for each tree */ -) -{ - int rank; /* index in bl_order */ - - Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); - Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, - "too many codes"); - Tracev((stderr, "\nbl counts: ")); - send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ - send_bits(s, dcodes-1, 5); - send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ - for (rank = 0; rank < blcodes; rank++) { - Tracev((stderr, "\nbl code %2d ", bl_order[rank])); - send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); - } - Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ - Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ - Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); -} - -/* =========================================================================== - * Send a stored block - */ -void zlib_tr_stored_block( - deflate_state *s, - char *buf, /* input block */ - ulg stored_len, /* length of input block */ - int eof /* true if this is the last block for a file */ -) -{ - send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ - s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; - s->compressed_len += (stored_len + 4) << 3; - - copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ -} - -/* Send just the `stored block' type code without any length bytes or data. - */ -void zlib_tr_stored_type_only( - deflate_state *s -) -{ - send_bits(s, (STORED_BLOCK << 1), 3); - bi_windup(s); - s->compressed_len = (s->compressed_len + 3) & ~7L; -} - - -/* =========================================================================== - * Send one empty static block to give enough lookahead for inflate. - * This takes 10 bits, of which 7 may remain in the bit buffer. - * The current inflate code requires 9 bits of lookahead. If the - * last two codes for the previous block (real code plus EOB) were coded - * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode - * the last real code. In this case we send two empty static blocks instead - * of one. (There are no problems if the previous block is stored or fixed.) - * To simplify the code, we assume the worst case of last real code encoded - * on one bit only. - */ -void zlib_tr_align( - deflate_state *s -) -{ - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); - s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ - bi_flush(s); - /* Of the 10 bits for the empty block, we have already sent - * (10 - bi_valid) bits. The lookahead for the last real code (before - * the EOB of the previous block) was thus at least one plus the length - * of the EOB plus what we have just sent of the empty static block. - */ - if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); - s->compressed_len += 10L; - bi_flush(s); - } - s->last_eob_len = 7; -} - -/* =========================================================================== - * Determine the best encoding for the current block: dynamic trees, static - * trees or store, and output the encoded block to the zip file. This function - * returns the total compressed length for the file so far. - */ -ulg zlib_tr_flush_block( - deflate_state *s, - char *buf, /* input block, or NULL if too old */ - ulg stored_len, /* length of input block */ - int eof /* true if this is the last block for a file */ -) -{ - ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ - int max_blindex = 0; /* index of last bit length code of non zero freq */ - - /* Build the Huffman trees unless a stored block is forced */ - if (s->level > 0) { - - /* Check if the file is ascii or binary */ - if (s->data_type == Z_UNKNOWN) set_data_type(s); - - /* Construct the literal and distance trees */ - build_tree(s, (tree_desc *)(&(s->l_desc))); - Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - - build_tree(s, (tree_desc *)(&(s->d_desc))); - Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - /* At this point, opt_len and static_len are the total bit lengths of - * the compressed block data, excluding the tree representations. - */ - - /* Build the bit length tree for the above two trees, and get the index - * in bl_order of the last bit length code to send. - */ - max_blindex = build_bl_tree(s); - - /* Determine the best encoding. Compute first the block length in bytes*/ - opt_lenb = (s->opt_len+3+7)>>3; - static_lenb = (s->static_len+3+7)>>3; - - Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", - opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); - - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; - - } else { - Assert(buf != (char*)0, "lost buf"); - opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ - } - - /* If compression failed and this is the first and last block, - * and if the .zip file can be seeked (to rewrite the local header), - * the whole file is transformed into a stored file: - */ -#ifdef STORED_FILE_OK -# ifdef FORCE_STORED_FILE - if (eof && s->compressed_len == 0L) { /* force stored file */ -# else - if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) { -# endif - /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ - if (buf == (char*)0) error ("block vanished"); - - copy_block(s, buf, (unsigned)stored_len, 0); /* without header */ - s->compressed_len = stored_len << 3; - s->method = STORED; - } else -#endif /* STORED_FILE_OK */ - -#ifdef FORCE_STORED - if (buf != (char*)0) { /* force stored block */ -#else - if (stored_len+4 <= opt_lenb && buf != (char*)0) { - /* 4: two words for the lengths */ -#endif - /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. - * Otherwise we can't have processed more than WSIZE input bytes since - * the last block flush, because compression would have been - * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to - * transform a block into a stored block. - */ - zlib_tr_stored_block(s, buf, stored_len, eof); - -#ifdef FORCE_STATIC - } else if (static_lenb >= 0) { /* force static trees */ -#else - } else if (static_lenb == opt_lenb) { -#endif - send_bits(s, (STATIC_TREES<<1)+eof, 3); - compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); - s->compressed_len += 3 + s->static_len; - } else { - send_bits(s, (DYN_TREES<<1)+eof, 3); - send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, - max_blindex+1); - compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); - s->compressed_len += 3 + s->opt_len; - } - Assert (s->compressed_len == s->bits_sent, "bad compressed size"); - init_block(s); - - if (eof) { - bi_windup(s); - s->compressed_len += 7; /* align on byte boundary */ - } - Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, - s->compressed_len-7*eof)); - - return s->compressed_len >> 3; -} - -/* =========================================================================== - * Save the match info and tally the frequency counts. Return true if - * the current block must be flushed. - */ -int zlib_tr_tally( - deflate_state *s, - unsigned dist, /* distance of matched string */ - unsigned lc /* match length-MIN_MATCH or unmatched char (if dist==0) */ -) -{ - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; - if (dist == 0) { - /* lc is the unmatched char */ - s->dyn_ltree[lc].Freq++; - } else { - s->matches++; - /* Here, lc is the match length - MIN_MATCH */ - dist--; /* dist = match distance - 1 */ - Assert((ush)dist < (ush)MAX_DIST(s) && - (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && - (ush)d_code(dist) < (ush)D_CODES, "zlib_tr_tally: bad match"); - - s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++; - s->dyn_dtree[d_code(dist)].Freq++; - } - - /* Try to guess if it is profitable to stop the current block here */ - if ((s->last_lit & 0xfff) == 0 && s->level > 2) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)((long)s->strstart - s->block_start); - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ -} - -/* =========================================================================== - * Send the block data compressed using the given Huffman trees - */ -static void compress_block( - deflate_state *s, - ct_data *ltree, /* literal tree */ - ct_data *dtree /* distance tree */ -) -{ - unsigned dist; /* distance of matched string */ - int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ - unsigned code; /* the code to send */ - int extra; /* number of extra bits to send */ - - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; - if (dist == 0) { - send_code(s, lc, ltree); /* send a literal byte */ - Tracecv(isgraph(lc), (stderr," '%c' ", lc)); - } else { - /* Here, lc is the match length - MIN_MATCH */ - code = length_code[lc]; - send_code(s, code+LITERALS+1, ltree); /* send the length code */ - extra = extra_lbits[code]; - if (extra != 0) { - lc -= base_length[code]; - send_bits(s, lc, extra); /* send the extra length bits */ - } - dist--; /* dist is now the match distance - 1 */ - code = d_code(dist); - Assert (code < D_CODES, "bad d_code"); - - send_code(s, code, dtree); /* send the distance code */ - extra = extra_dbits[code]; - if (extra != 0) { - dist -= base_dist[code]; - send_bits(s, dist, extra); /* send the extra distance bits */ - } - } /* literal or match pair ? */ - - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); - - } while (lx < s->last_lit); - - send_code(s, END_BLOCK, ltree); - s->last_eob_len = ltree[END_BLOCK].Len; -} - -/* =========================================================================== - * Set the data type to ASCII or BINARY, using a crude approximation: - * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. - * IN assertion: the fields freq of dyn_ltree are set and the total of all - * frequencies does not exceed 64K (to fit in an int on 16 bit machines). - */ -static void set_data_type( - deflate_state *s -) -{ - int n = 0; - unsigned ascii_freq = 0; - unsigned bin_freq = 0; - while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; - while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; - while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; - s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); -} - -/* =========================================================================== - * Copy a stored block, storing first the length and its - * one's complement if requested. - */ -static void copy_block( - deflate_state *s, - char *buf, /* the input data */ - unsigned len, /* its length */ - int header /* true if block header must be written */ -) -{ - bi_windup(s); /* align on byte boundary */ - s->last_eob_len = 8; /* enough lookahead for inflate */ - - if (header) { - put_short(s, (ush)len); - put_short(s, (ush)~len); -#ifdef DEBUG_ZLIB - s->bits_sent += 2*16; -#endif - } -#ifdef DEBUG_ZLIB - s->bits_sent += (ulg)len<<3; -#endif - /* bundle up the put_byte(s, *buf++) calls */ - memcpy(&s->pending_buf[s->pending], buf, len); - s->pending += len; -} - diff --git a/linux/zlib_deflate/defutil.h b/linux/zlib_deflate/defutil.h deleted file mode 100644 index a8c3708..0000000 --- a/linux/zlib_deflate/defutil.h +++ /dev/null @@ -1,327 +0,0 @@ - - - -#define Assert(err, str) -#define Trace(dummy) -#define Tracev(dummy) -#define Tracecv(err, dummy) -#define Tracevv(dummy) - - - -#define LENGTH_CODES 29 -/* number of length codes, not counting the special END_BLOCK code */ - -#define LITERALS 256 -/* number of literal bytes 0..255 */ - -#define L_CODES (LITERALS+1+LENGTH_CODES) -/* number of Literal or Length codes, including the END_BLOCK code */ - -#define D_CODES 30 -/* number of distance codes */ - -#define BL_CODES 19 -/* number of codes used to transfer the bit lengths */ - -#define HEAP_SIZE (2*L_CODES+1) -/* maximum heap size */ - -#define MAX_BITS 15 -/* All codes must not exceed MAX_BITS bits */ - -#define INIT_STATE 42 -#define BUSY_STATE 113 -#define FINISH_STATE 666 -/* Stream status */ - - -/* Data structure describing a single value and its code string. */ -typedef struct ct_data_s { - union { - ush freq; /* frequency count */ - ush code; /* bit string */ - } fc; - union { - ush dad; /* father node in Huffman tree */ - ush len; /* length of bit string */ - } dl; -} ct_data; - -#define Freq fc.freq -#define Code fc.code -#define Dad dl.dad -#define Len dl.len - -typedef struct static_tree_desc_s static_tree_desc; - -typedef struct tree_desc_s { - ct_data *dyn_tree; /* the dynamic tree */ - int max_code; /* largest code with non zero frequency */ - static_tree_desc *stat_desc; /* the corresponding static tree */ -} tree_desc; - -typedef ush Pos; -typedef unsigned IPos; - -/* A Pos is an index in the character window. We use short instead of int to - * save space in the various tables. IPos is used only for parameter passing. - */ - -typedef struct deflate_state { - z_streamp strm; /* pointer back to this zlib stream */ - int status; /* as the name implies */ - Byte *pending_buf; /* output still pending */ - ulg pending_buf_size; /* size of pending_buf */ - Byte *pending_out; /* next pending byte to output to the stream */ - int pending; /* nb of bytes in the pending buffer */ - int noheader; /* suppress zlib header and adler32 */ - Byte data_type; /* UNKNOWN, BINARY or ASCII */ - Byte method; /* STORED (for zip only) or DEFLATED */ - int last_flush; /* value of flush param for previous deflate call */ - - /* used by deflate.c: */ - - uInt w_size; /* LZ77 window size (32K by default) */ - uInt w_bits; /* log2(w_size) (8..16) */ - uInt w_mask; /* w_size - 1 */ - - Byte *window; - /* Sliding window. Input bytes are read into the second half of the window, - * and move to the first half later to keep a dictionary of at least wSize - * bytes. With this organization, matches are limited to a distance of - * wSize-MAX_MATCH bytes, but this ensures that IO is always - * performed with a length multiple of the block size. Also, it limits - * the window size to 64K, which is quite useful on MSDOS. - * To do: use the user input buffer as sliding window. - */ - - ulg window_size; - /* Actual size of window: 2*wSize, except when the user input buffer - * is directly used as sliding window. - */ - - Pos *prev; - /* Link to older string with same hash index. To limit the size of this - * array to 64K, this link is maintained only for the last 32K strings. - * An index in this array is thus a window index modulo 32K. - */ - - Pos *head; /* Heads of the hash chains or NIL. */ - - uInt ins_h; /* hash index of string to be inserted */ - uInt hash_size; /* number of elements in hash table */ - uInt hash_bits; /* log2(hash_size) */ - uInt hash_mask; /* hash_size-1 */ - - uInt hash_shift; - /* Number of bits by which ins_h must be shifted at each input - * step. It must be such that after MIN_MATCH steps, the oldest - * byte no longer takes part in the hash key, that is: - * hash_shift * MIN_MATCH >= hash_bits - */ - - long block_start; - /* Window position at the beginning of the current output block. Gets - * negative when the window is moved backwards. - */ - - uInt match_length; /* length of best match */ - IPos prev_match; /* previous match */ - int match_available; /* set if previous match exists */ - uInt strstart; /* start of string to insert */ - uInt match_start; /* start of matching string */ - uInt lookahead; /* number of valid bytes ahead in window */ - - uInt prev_length; - /* Length of the best match at previous step. Matches not greater than this - * are discarded. This is used in the lazy match evaluation. - */ - - uInt max_chain_length; - /* To speed up deflation, hash chains are never searched beyond this - * length. A higher limit improves compression ratio but degrades the - * speed. - */ - - uInt max_lazy_match; - /* Attempt to find a better match only when the current match is strictly - * smaller than this value. This mechanism is used only for compression - * levels >= 4. - */ -# define max_insert_length max_lazy_match - /* Insert new strings in the hash table only if the match length is not - * greater than this length. This saves time but degrades compression. - * max_insert_length is used only for compression levels <= 3. - */ - - int level; /* compression level (1..9) */ - int strategy; /* favor or force Huffman coding*/ - - uInt good_match; - /* Use a faster search when the previous match is longer than this */ - - int nice_match; /* Stop searching when current match exceeds this */ - - /* used by trees.c: */ - /* Didn't use ct_data typedef below to suppress compiler warning */ - struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ - struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ - struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ - - struct tree_desc_s l_desc; /* desc. for literal tree */ - struct tree_desc_s d_desc; /* desc. for distance tree */ - struct tree_desc_s bl_desc; /* desc. for bit length tree */ - - ush bl_count[MAX_BITS+1]; - /* number of codes at each bit length for an optimal tree */ - - int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ - int heap_len; /* number of elements in the heap */ - int heap_max; /* element of largest frequency */ - /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. - * The same heap array is used to build all trees. - */ - - uch depth[2*L_CODES+1]; - /* Depth of each subtree used as tie breaker for trees of equal frequency - */ - - uch *l_buf; /* buffer for literals or lengths */ - - uInt lit_bufsize; - /* Size of match buffer for literals/lengths. There are 4 reasons for - * limiting lit_bufsize to 64K: - * - frequencies can be kept in 16 bit counters - * - if compression is not successful for the first block, all input - * data is still in the window so we can still emit a stored block even - * when input comes from standard input. (This can also be done for - * all blocks if lit_bufsize is not greater than 32K.) - * - if compression is not successful for a file smaller than 64K, we can - * even emit a stored file instead of a stored block (saving 5 bytes). - * This is applicable only for zip (not gzip or zlib). - * - creating new Huffman trees less frequently may not provide fast - * adaptation to changes in the input data statistics. (Take for - * example a binary file with poorly compressible code followed by - * a highly compressible string table.) Smaller buffer sizes give - * fast adaptation but have of course the overhead of transmitting - * trees more frequently. - * - I can't count above 4 - */ - - uInt last_lit; /* running index in l_buf */ - - ush *d_buf; - /* Buffer for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ - - ulg opt_len; /* bit length of current block with optimal trees */ - ulg static_len; /* bit length of current block with static trees */ - ulg compressed_len; /* total bit length of compressed file */ - uInt matches; /* number of string matches in current block */ - int last_eob_len; /* bit length of EOB code for last block */ - -#ifdef DEBUG_ZLIB - ulg bits_sent; /* bit length of the compressed data */ -#endif - - ush bi_buf; - /* Output buffer. bits are inserted starting at the bottom (least - * significant bits). - */ - int bi_valid; - /* Number of valid bits in bi_buf. All bits above the last valid bit - * are always zero. - */ - -} deflate_state; - -typedef struct deflate_workspace { - /* State memory for the deflator */ - deflate_state deflate_memory; - Byte *window_memory; - Pos *prev_memory; - Pos *head_memory; - char *overlay_memory; -} deflate_workspace; - -#define zlib_deflate_window_memsize(windowBits) \ - (2 * (1 << (windowBits)) * sizeof(Byte)) -#define zlib_deflate_prev_memsize(windowBits) \ - ((1 << (windowBits)) * sizeof(Pos)) -#define zlib_deflate_head_memsize(memLevel) \ - ((1 << ((memLevel)+7)) * sizeof(Pos)) -#define zlib_deflate_overlay_memsize(memLevel) \ - ((1 << ((memLevel)+6)) * (sizeof(ush)+2)) - -/* Output a byte on the stream. - * IN assertion: there is enough room in pending_buf. - */ -#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} - - -#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - -#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) -/* In order to simplify the code, particularly on 16 bit machines, match - * distances are limited to MAX_DIST instead of WSIZE. - */ - - /* in trees.c */ -void zlib_tr_init (deflate_state *s); -int zlib_tr_tally (deflate_state *s, unsigned dist, unsigned lc); -ulg zlib_tr_flush_block (deflate_state *s, char *buf, ulg stored_len, - int eof); -void zlib_tr_align (deflate_state *s); -void zlib_tr_stored_block (deflate_state *s, char *buf, ulg stored_len, - int eof); -void zlib_tr_stored_type_only (deflate_state *); - - -/* =========================================================================== - * Output a short LSB first on the stream. - * IN assertion: there is enough room in pendingBuf. - */ -#define put_short(s, w) { \ - put_byte(s, (uch)((w) & 0xff)); \ - put_byte(s, (uch)((ush)(w) >> 8)); \ -} - -/* =========================================================================== - * Flush the bit buffer, keeping at most 7 bits in it. - */ -static inline void bi_flush(deflate_state *s) -{ - if (s->bi_valid == 16) { - put_short(s, s->bi_buf); - s->bi_buf = 0; - s->bi_valid = 0; - } else if (s->bi_valid >= 8) { - put_byte(s, (Byte)s->bi_buf); - s->bi_buf >>= 8; - s->bi_valid -= 8; - } -} - -/* =========================================================================== - * Flush the bit buffer and align the output on a byte boundary - */ -static inline void bi_windup(deflate_state *s) -{ - if (s->bi_valid > 8) { - put_short(s, s->bi_buf); - } else if (s->bi_valid > 0) { - put_byte(s, (Byte)s->bi_buf); - } - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef DEBUG_ZLIB - s->bits_sent = (s->bits_sent+7) & ~7; -#endif -} - diff --git a/linux/zlib_inflate/inffast.c b/linux/zlib_inflate/inffast.c deleted file mode 100644 index 2c13ecc..0000000 --- a/linux/zlib_inflate/inffast.c +++ /dev/null @@ -1,363 +0,0 @@ -/* inffast.c -- fast decoding - * Copyright (C) 1995-2004 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" - -#ifndef ASMINF - -/* Allow machine dependent optimization for post-increment or pre-increment. - Based on testing to date, - Pre-increment preferred for: - - PowerPC G3 (Adler) - - MIPS R5000 (Randers-Pehrson) - Post-increment preferred for: - - none - No measurable difference: - - Pentium III (Anderson) - - M68060 (Nikl) - */ -union uu { - unsigned short us; - unsigned char b[2]; -}; - -/* Endian independed version */ -static inline unsigned short -get_unaligned16(const unsigned short *p) -{ - union uu mm; - unsigned char *b = (unsigned char *)p; - - mm.b[0] = b[0]; - mm.b[1] = b[1]; - return mm.us; -} - -#ifdef POSTINC -# define OFF 0 -# define PUP(a) *(a)++ -# define UP_UNALIGNED(a) get_unaligned16((a)++) -#else -# define OFF 1 -# define PUP(a) *++(a) -# define UP_UNALIGNED(a) get_unaligned16(++(a)) -#endif - -/* - Decode literal, length, and distance codes and write out the resulting - literal and match bytes until either not enough input or output is - available, an end-of-block is encountered, or a data error is encountered. - When large enough input and output buffers are supplied to inflate(), for - example, a 16K input buffer and a 64K output buffer, more than 95% of the - inflate execution time is spent in this routine. - - Entry assumptions: - - state->mode == LEN - strm->avail_in >= 6 - strm->avail_out >= 258 - start >= strm->avail_out - state->bits < 8 - - On return, state->mode is one of: - - LEN -- ran out of enough output space or enough available input - TYPE -- reached end of block code, inflate() to interpret next block - BAD -- error in block data - - Notes: - - - The maximum input bits used by a length/distance pair is 15 bits for the - length code, 5 bits for the length extra, 15 bits for the distance code, - and 13 bits for the distance extra. This totals 48 bits, or six bytes. - Therefore if strm->avail_in >= 6, then there is enough input to avoid - checking for available input while decoding. - - - The maximum bytes that a single length/distance pair can output is 258 - bytes, which is the maximum length that can be coded. inflate_fast() - requires strm->avail_out >= 258 for each loop to avoid checking for - output space. - - - @start: inflate()'s starting value for strm->avail_out - */ -void inflate_fast(z_streamp strm, unsigned start) -{ - struct inflate_state *state; - const unsigned char *in; /* local strm->next_in */ - const unsigned char *last; /* while in < last, enough input available */ - unsigned char *out; /* local strm->next_out */ - unsigned char *beg; /* inflate()'s initial strm->next_out */ - unsigned char *end; /* while out < end, enough space available */ -#ifdef INFLATE_STRICT - unsigned dmax; /* maximum distance from zlib header */ -#endif - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned write; /* window write index */ - unsigned char *window; /* allocated sliding window, if wsize != 0 */ - unsigned long hold; /* local strm->hold */ - unsigned bits; /* local strm->bits */ - code const *lcode; /* local strm->lencode */ - code const *dcode; /* local strm->distcode */ - unsigned lmask; /* mask for first level of length codes */ - unsigned dmask; /* mask for first level of distance codes */ - code this; /* retrieved table entry */ - unsigned op; /* code bits, operation, extra bits, or */ - /* window position, window bytes to copy */ - unsigned len; /* match length, unused bytes */ - unsigned dist; /* match distance */ - unsigned char *from; /* where to copy match from */ - - /* copy state to local variables */ - state = (struct inflate_state *)strm->state; - in = strm->next_in - OFF; - last = in + (strm->avail_in - 5); - out = strm->next_out - OFF; - beg = out - (start - strm->avail_out); - end = out + (strm->avail_out - 257); -#ifdef INFLATE_STRICT - dmax = state->dmax; -#endif - wsize = state->wsize; - whave = state->whave; - write = state->write; - window = state->window; - hold = state->hold; - bits = state->bits; - lcode = state->lencode; - dcode = state->distcode; - lmask = (1U << state->lenbits) - 1; - dmask = (1U << state->distbits) - 1; - - /* decode literals and length/distances until end-of-block or not enough - input data or output space */ - do { - if (bits < 15) { - hold += (unsigned long)(PUP(in)) << bits; - bits += 8; - hold += (unsigned long)(PUP(in)) << bits; - bits += 8; - } - this = lcode[hold & lmask]; - dolen: - op = (unsigned)(this.bits); - hold >>= op; - bits -= op; - op = (unsigned)(this.op); - if (op == 0) { /* literal */ - PUP(out) = (unsigned char)(this.val); - } - else if (op & 16) { /* length base */ - len = (unsigned)(this.val); - op &= 15; /* number of extra bits */ - if (op) { - if (bits < op) { - hold += (unsigned long)(PUP(in)) << bits; - bits += 8; - } - len += (unsigned)hold & ((1U << op) - 1); - hold >>= op; - bits -= op; - } - if (bits < 15) { - hold += (unsigned long)(PUP(in)) << bits; - bits += 8; - hold += (unsigned long)(PUP(in)) << bits; - bits += 8; - } - this = dcode[hold & dmask]; - dodist: - op = (unsigned)(this.bits); - hold >>= op; - bits -= op; - op = (unsigned)(this.op); - if (op & 16) { /* distance base */ - dist = (unsigned)(this.val); - op &= 15; /* number of extra bits */ - if (bits < op) { - hold += (unsigned long)(PUP(in)) << bits; - bits += 8; - if (bits < op) { - hold += (unsigned long)(PUP(in)) << bits; - bits += 8; - } - } - dist += (unsigned)hold & ((1U << op) - 1); -#ifdef INFLATE_STRICT - if (dist > dmax) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#endif - hold >>= op; - bits -= op; - op = (unsigned)(out - beg); /* max distance in output */ - if (dist > op) { /* see if copy from window */ - op = dist - op; /* distance back in window */ - if (op > whave) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } - from = window - OFF; - if (write == 0) { /* very common case */ - from += wsize - op; - if (op < len) { /* some from window */ - len -= op; - do { - PUP(out) = PUP(from); - } while (--op); - from = out - dist; /* rest from output */ - } - } - else if (write < op) { /* wrap around window */ - from += wsize + write - op; - op -= write; - if (op < len) { /* some from end of window */ - len -= op; - do { - PUP(out) = PUP(from); - } while (--op); - from = window - OFF; - if (write < len) { /* some from start of window */ - op = write; - len -= op; - do { - PUP(out) = PUP(from); - } while (--op); - from = out - dist; /* rest from output */ - } - } - } - else { /* contiguous in window */ - from += write - op; - if (op < len) { /* some from window */ - len -= op; - do { - PUP(out) = PUP(from); - } while (--op); - from = out - dist; /* rest from output */ - } - } - while (len > 2) { - PUP(out) = PUP(from); - PUP(out) = PUP(from); - PUP(out) = PUP(from); - len -= 3; - } - if (len) { - PUP(out) = PUP(from); - if (len > 1) - PUP(out) = PUP(from); - } - } - else { - unsigned short *sout; - unsigned long loops; - - from = out - dist; /* copy direct from output */ - /* minimum length is three */ - /* Align out addr */ - if (!((long)(out - 1 + OFF) & 1)) { - PUP(out) = PUP(from); - len--; - } - sout = (unsigned short *)(out - OFF); - if (dist > 2) { - unsigned short *sfrom; - - sfrom = (unsigned short *)(from - OFF); - loops = len >> 1; - do -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - PUP(sout) = PUP(sfrom); -#else - PUP(sout) = UP_UNALIGNED(sfrom); -#endif - while (--loops); - out = (unsigned char *)sout + OFF; - from = (unsigned char *)sfrom + OFF; - } else { /* dist == 1 or dist == 2 */ - unsigned short pat16; - - pat16 = *(sout-1+OFF); - if (dist == 1) { - union uu mm; - /* copy one char pattern to both bytes */ - mm.us = pat16; - mm.b[0] = mm.b[1]; - pat16 = mm.us; - } - loops = len >> 1; - do - PUP(sout) = pat16; - while (--loops); - out = (unsigned char *)sout + OFF; - } - if (len & 1) - PUP(out) = PUP(from); - } - } - else if ((op & 64) == 0) { /* 2nd level distance code */ - this = dcode[this.val + (hold & ((1U << op) - 1))]; - goto dodist; - } - else { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - } - else if ((op & 64) == 0) { /* 2nd level length code */ - this = lcode[this.val + (hold & ((1U << op) - 1))]; - goto dolen; - } - else if (op & 32) { /* end-of-block */ - state->mode = TYPE; - break; - } - else { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - } while (in < last && out < end); - - /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ - len = bits >> 3; - in -= len; - bits -= len << 3; - hold &= (1U << bits) - 1; - - /* update state and return */ - strm->next_in = in + OFF; - strm->next_out = out + OFF; - strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); - strm->avail_out = (unsigned)(out < end ? - 257 + (end - out) : 257 - (out - end)); - state->hold = hold; - state->bits = bits; - return; -} - -/* - inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - - Using bit fields for code structure - - Different op definition to avoid & for extra bits (do & for table bits) - - Three separate decoding do-loops for direct, window, and write == 0 - - Special case for distance > 1 copies to do overlapped load and store copy - - Explicit branch predictions (based on measured branch probabilities) - - Deferring match copy and interspersed it with decoding subsequent codes - - Swapping literal/length else - - Swapping window/direct else - - Larger unrolled copy loops (three is about right) - - Moving len -= 3 statement into middle of loop - */ - -#endif /* !ASMINF */ diff --git a/linux/zlib_inflate/inffast.h b/linux/zlib_inflate/inffast.h deleted file mode 100644 index 40315d9..0000000 --- a/linux/zlib_inflate/inffast.h +++ /dev/null @@ -1,11 +0,0 @@ -/* inffast.h -- header to use inffast.c - * Copyright (C) 1995-2003 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -void inflate_fast (z_streamp strm, unsigned start); diff --git a/linux/zlib_inflate/inffixed.h b/linux/zlib_inflate/inffixed.h deleted file mode 100644 index 75ed4b5..0000000 --- a/linux/zlib_inflate/inffixed.h +++ /dev/null @@ -1,94 +0,0 @@ - /* inffixed.h -- table for decoding fixed codes - * Generated automatically by makefixed(). - */ - - /* WARNING: this file should *not* be used by applications. It - is part of the implementation of the compression library and - is subject to change. Applications should only use zlib.h. - */ - - static const code lenfix[512] = { - {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, - {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, - {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, - {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, - {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, - {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, - {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, - {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, - {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, - {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, - {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, - {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, - {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, - {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, - {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, - {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, - {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, - {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, - {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, - {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, - {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, - {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, - {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, - {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, - {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, - {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, - {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, - {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, - {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, - {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, - {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, - {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, - {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, - {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, - {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, - {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, - {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, - {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, - {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, - {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, - {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, - {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, - {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, - {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, - {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, - {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, - {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, - {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, - {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, - {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, - {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, - {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, - {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, - {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, - {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, - {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, - {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, - {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, - {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, - {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, - {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, - {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, - {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, - {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, - {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, - {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, - {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, - {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, - {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, - {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, - {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, - {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, - {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, - {0,9,255} - }; - - static const code distfix[32] = { - {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, - {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, - {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, - {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, - {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, - {22,5,193},{64,5,0} - }; diff --git a/linux/zlib_inflate/inflate.c b/linux/zlib_inflate/inflate.c deleted file mode 100644 index 58a733b..0000000 --- a/linux/zlib_inflate/inflate.c +++ /dev/null @@ -1,786 +0,0 @@ -/* inflate.c -- zlib decompression - * Copyright (C) 1995-2005 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - * - * Based on zlib 1.2.3 but modified for the Linux Kernel by - * Richard Purdie - * - * Changes mainly for static instead of dynamic memory allocation - * - */ - -#include -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" -#include "infutil.h" - -int zlib_inflate_workspacesize(void) -{ - return sizeof(struct inflate_workspace); -} - -int zlib_inflateReset(z_streamp strm) -{ - struct inflate_state *state; - - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - strm->total_in = strm->total_out = state->total = 0; - strm->msg = NULL; - strm->adler = 1; /* to support ill-conceived Java test suite */ - state->mode = HEAD; - state->last = 0; - state->havedict = 0; - state->dmax = 32768U; - state->hold = 0; - state->bits = 0; - state->lencode = state->distcode = state->next = state->codes; - - /* Initialise Window */ - state->wsize = 1U << state->wbits; - state->write = 0; - state->whave = 0; - - return Z_OK; -} - -int zlib_inflateInit2(z_streamp strm, int windowBits) -{ - struct inflate_state *state; - - if (strm == NULL) return Z_STREAM_ERROR; - strm->msg = NULL; /* in case we return an error */ - - state = &WS(strm)->inflate_state; - strm->state = (struct internal_state *)state; - - if (windowBits < 0) { - state->wrap = 0; - windowBits = -windowBits; - } - else { - state->wrap = (windowBits >> 4) + 1; - } - if (windowBits < 8 || windowBits > 15) { - return Z_STREAM_ERROR; - } - state->wbits = (unsigned)windowBits; - state->window = &WS(strm)->working_window[0]; - - return zlib_inflateReset(strm); -} - -/* - Return state with length and distance decoding tables and index sizes set to - fixed code decoding. This returns fixed tables from inffixed.h. - */ -static void zlib_fixedtables(struct inflate_state *state) -{ -# include "inffixed.h" - state->lencode = lenfix; - state->lenbits = 9; - state->distcode = distfix; - state->distbits = 5; -} - - -/* - Update the window with the last wsize (normally 32K) bytes written before - returning. This is only called when a window is already in use, or when - output has been written during this inflate call, but the end of the deflate - stream has not been reached yet. It is also called to window dictionary data - when a dictionary is loaded. - - Providing output buffers larger than 32K to inflate() should provide a speed - advantage, since only the last 32K of output is copied to the sliding window - upon return from inflate(), and since all distances after the first 32K of - output will fall in the output data, making match copies simpler and faster. - The advantage may be dependent on the size of the processor's data caches. - */ -static void zlib_updatewindow(z_streamp strm, unsigned out) -{ - struct inflate_state *state; - unsigned copy, dist; - - state = (struct inflate_state *)strm->state; - - /* copy state->wsize or less output bytes into the circular window */ - copy = out - strm->avail_out; - if (copy >= state->wsize) { - memcpy(state->window, strm->next_out - state->wsize, state->wsize); - state->write = 0; - state->whave = state->wsize; - } - else { - dist = state->wsize - state->write; - if (dist > copy) dist = copy; - memcpy(state->window + state->write, strm->next_out - copy, dist); - copy -= dist; - if (copy) { - memcpy(state->window, strm->next_out - copy, copy); - state->write = copy; - state->whave = state->wsize; - } - else { - state->write += dist; - if (state->write == state->wsize) state->write = 0; - if (state->whave < state->wsize) state->whave += dist; - } - } -} - - -/* - * At the end of a Deflate-compressed PPP packet, we expect to have seen - * a `stored' block type value but not the (zero) length bytes. - */ -/* - Returns true if inflate is currently at the end of a block generated by - Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP - implementation to provide an additional safety check. PPP uses - Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored - block. When decompressing, PPP checks that at the end of input packet, - inflate is waiting for these length bytes. - */ -static int zlib_inflateSyncPacket(z_streamp strm) -{ - struct inflate_state *state; - - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - - if (state->mode == STORED && state->bits == 0) { - state->mode = TYPE; - return Z_OK; - } - return Z_DATA_ERROR; -} - -/* Macros for inflate(): */ - -/* check function to use adler32() for zlib or crc32() for gzip */ -#define UPDATE(check, buf, len) zlib_adler32(check, buf, len) - -/* Load registers with state in inflate() for speed */ -#define LOAD() \ - do { \ - put = strm->next_out; \ - left = strm->avail_out; \ - next = strm->next_in; \ - have = strm->avail_in; \ - hold = state->hold; \ - bits = state->bits; \ - } while (0) - -/* Restore state from registers in inflate() */ -#define RESTORE() \ - do { \ - strm->next_out = put; \ - strm->avail_out = left; \ - strm->next_in = next; \ - strm->avail_in = have; \ - state->hold = hold; \ - state->bits = bits; \ - } while (0) - -/* Clear the input bit accumulator */ -#define INITBITS() \ - do { \ - hold = 0; \ - bits = 0; \ - } while (0) - -/* Get a byte of input into the bit accumulator, or return from inflate() - if there is no input available. */ -#define PULLBYTE() \ - do { \ - if (have == 0) goto inf_leave; \ - have--; \ - hold += (unsigned long)(*next++) << bits; \ - bits += 8; \ - } while (0) - -/* Assure that there are at least n bits in the bit accumulator. If there is - not enough available input to do that, then return from inflate(). */ -#define NEEDBITS(n) \ - do { \ - while (bits < (unsigned)(n)) \ - PULLBYTE(); \ - } while (0) - -/* Return the low n bits of the bit accumulator (n < 16) */ -#define BITS(n) \ - ((unsigned)hold & ((1U << (n)) - 1)) - -/* Remove n bits from the bit accumulator */ -#define DROPBITS(n) \ - do { \ - hold >>= (n); \ - bits -= (unsigned)(n); \ - } while (0) - -/* Remove zero to seven bits as needed to go to a byte boundary */ -#define BYTEBITS() \ - do { \ - hold >>= bits & 7; \ - bits -= bits & 7; \ - } while (0) - -/* Reverse the bytes in a 32-bit value */ -#define REVERSE(q) \ - ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ - (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) - -/* - inflate() uses a state machine to process as much input data and generate as - much output data as possible before returning. The state machine is - structured roughly as follows: - - for (;;) switch (state) { - ... - case STATEn: - if (not enough input data or output space to make progress) - return; - ... make progress ... - state = STATEm; - break; - ... - } - - so when inflate() is called again, the same case is attempted again, and - if the appropriate resources are provided, the machine proceeds to the - next state. The NEEDBITS() macro is usually the way the state evaluates - whether it can proceed or should return. NEEDBITS() does the return if - the requested bits are not available. The typical use of the BITS macros - is: - - NEEDBITS(n); - ... do something with BITS(n) ... - DROPBITS(n); - - where NEEDBITS(n) either returns from inflate() if there isn't enough - input left to load n bits into the accumulator, or it continues. BITS(n) - gives the low n bits in the accumulator. When done, DROPBITS(n) drops - the low n bits off the accumulator. INITBITS() clears the accumulator - and sets the number of available bits to zero. BYTEBITS() discards just - enough bits to put the accumulator on a byte boundary. After BYTEBITS() - and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. - - NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return - if there is no input available. The decoding of variable length codes uses - PULLBYTE() directly in order to pull just enough bytes to decode the next - code, and no more. - - Some states loop until they get enough input, making sure that enough - state information is maintained to continue the loop where it left off - if NEEDBITS() returns in the loop. For example, want, need, and keep - would all have to actually be part of the saved state in case NEEDBITS() - returns: - - case STATEw: - while (want < need) { - NEEDBITS(n); - keep[want++] = BITS(n); - DROPBITS(n); - } - state = STATEx; - case STATEx: - - As shown above, if the next state is also the next case, then the break - is omitted. - - A state may also return if there is not enough output space available to - complete that state. Those states are copying stored data, writing a - literal byte, and copying a matching string. - - When returning, a "goto inf_leave" is used to update the total counters, - update the check value, and determine whether any progress has been made - during that inflate() call in order to return the proper return code. - Progress is defined as a change in either strm->avail_in or strm->avail_out. - When there is a window, goto inf_leave will update the window with the last - output written. If a goto inf_leave occurs in the middle of decompression - and there is no window currently, goto inf_leave will create one and copy - output to the window for the next call of inflate(). - - In this implementation, the flush parameter of inflate() only affects the - return code (per zlib.h). inflate() always writes as much as possible to - strm->next_out, given the space available and the provided input--the effect - documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers - the allocation of and copying into a sliding window until necessary, which - provides the effect documented in zlib.h for Z_FINISH when the entire input - stream available. So the only thing the flush parameter actually does is: - when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it - will return Z_BUF_ERROR if it has not reached the end of the stream. - */ - -int zlib_inflate(z_streamp strm, int flush) -{ - struct inflate_state *state; - const unsigned char *next; /* next input */ - unsigned char *put; /* next output */ - unsigned have, left; /* available input and output */ - unsigned long hold; /* bit buffer */ - unsigned bits; /* bits in bit buffer */ - unsigned in, out; /* save starting available input and output */ - unsigned copy; /* number of stored or match bytes to copy */ - unsigned char *from; /* where to copy match bytes from */ - code this; /* current decoding table entry */ - code last; /* parent table entry */ - unsigned len; /* length to copy for repeats, bits to drop */ - int ret; /* return code */ - static const unsigned short order[19] = /* permutation of code lengths */ - {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - - /* Do not check for strm->next_out == NULL here as ppc zImage - inflates to strm->next_out = 0 */ - - if (strm == NULL || strm->state == NULL || - (strm->next_in == NULL && strm->avail_in != 0)) - return Z_STREAM_ERROR; - - state = (struct inflate_state *)strm->state; - - if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ - LOAD(); - in = have; - out = left; - ret = Z_OK; - for (;;) - switch (state->mode) { - case HEAD: - if (state->wrap == 0) { - state->mode = TYPEDO; - break; - } - NEEDBITS(16); - if ( - ((BITS(8) << 8) + (hold >> 8)) % 31) { - strm->msg = (char *)"incorrect header check"; - state->mode = BAD; - break; - } - if (BITS(4) != Z_DEFLATED) { - strm->msg = (char *)"unknown compression method"; - state->mode = BAD; - break; - } - DROPBITS(4); - len = BITS(4) + 8; - if (len > state->wbits) { - strm->msg = (char *)"invalid window size"; - state->mode = BAD; - break; - } - state->dmax = 1U << len; - strm->adler = state->check = zlib_adler32(0L, NULL, 0); - state->mode = hold & 0x200 ? DICTID : TYPE; - INITBITS(); - break; - case DICTID: - NEEDBITS(32); - strm->adler = state->check = REVERSE(hold); - INITBITS(); - state->mode = DICT; - case DICT: - if (state->havedict == 0) { - RESTORE(); - return Z_NEED_DICT; - } - strm->adler = state->check = zlib_adler32(0L, NULL, 0); - state->mode = TYPE; - case TYPE: - if (flush == Z_BLOCK) goto inf_leave; - case TYPEDO: - if (state->last) { - BYTEBITS(); - state->mode = CHECK; - break; - } - NEEDBITS(3); - state->last = BITS(1); - DROPBITS(1); - switch (BITS(2)) { - case 0: /* stored block */ - state->mode = STORED; - break; - case 1: /* fixed block */ - zlib_fixedtables(state); - state->mode = LEN; /* decode codes */ - break; - case 2: /* dynamic block */ - state->mode = TABLE; - break; - case 3: - strm->msg = (char *)"invalid block type"; - state->mode = BAD; - } - DROPBITS(2); - break; - case STORED: - BYTEBITS(); /* go to byte boundary */ - NEEDBITS(32); - if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { - strm->msg = (char *)"invalid stored block lengths"; - state->mode = BAD; - break; - } - state->length = (unsigned)hold & 0xffff; - INITBITS(); - state->mode = COPY; - case COPY: - copy = state->length; - if (copy) { - if (copy > have) copy = have; - if (copy > left) copy = left; - if (copy == 0) goto inf_leave; - memcpy(put, next, copy); - have -= copy; - next += copy; - left -= copy; - put += copy; - state->length -= copy; - break; - } - state->mode = TYPE; - break; - case TABLE: - NEEDBITS(14); - state->nlen = BITS(5) + 257; - DROPBITS(5); - state->ndist = BITS(5) + 1; - DROPBITS(5); - state->ncode = BITS(4) + 4; - DROPBITS(4); -#ifndef PKZIP_BUG_WORKAROUND - if (state->nlen > 286 || state->ndist > 30) { - strm->msg = (char *)"too many length or distance symbols"; - state->mode = BAD; - break; - } -#endif - state->have = 0; - state->mode = LENLENS; - case LENLENS: - while (state->have < state->ncode) { - NEEDBITS(3); - state->lens[order[state->have++]] = (unsigned short)BITS(3); - DROPBITS(3); - } - while (state->have < 19) - state->lens[order[state->have++]] = 0; - state->next = state->codes; - state->lencode = (code const *)(state->next); - state->lenbits = 7; - ret = zlib_inflate_table(CODES, state->lens, 19, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid code lengths set"; - state->mode = BAD; - break; - } - state->have = 0; - state->mode = CODELENS; - case CODELENS: - while (state->have < state->nlen + state->ndist) { - for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; - PULLBYTE(); - } - if (this.val < 16) { - NEEDBITS(this.bits); - DROPBITS(this.bits); - state->lens[state->have++] = this.val; - } - else { - if (this.val == 16) { - NEEDBITS(this.bits + 2); - DROPBITS(this.bits); - if (state->have == 0) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - len = state->lens[state->have - 1]; - copy = 3 + BITS(2); - DROPBITS(2); - } - else if (this.val == 17) { - NEEDBITS(this.bits + 3); - DROPBITS(this.bits); - len = 0; - copy = 3 + BITS(3); - DROPBITS(3); - } - else { - NEEDBITS(this.bits + 7); - DROPBITS(this.bits); - len = 0; - copy = 11 + BITS(7); - DROPBITS(7); - } - if (state->have + copy > state->nlen + state->ndist) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - while (copy--) - state->lens[state->have++] = (unsigned short)len; - } - } - - /* handle error breaks in while */ - if (state->mode == BAD) break; - - /* build code tables */ - state->next = state->codes; - state->lencode = (code const *)(state->next); - state->lenbits = 9; - ret = zlib_inflate_table(LENS, state->lens, state->nlen, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid literal/lengths set"; - state->mode = BAD; - break; - } - state->distcode = (code const *)(state->next); - state->distbits = 6; - ret = zlib_inflate_table(DISTS, state->lens + state->nlen, state->ndist, - &(state->next), &(state->distbits), state->work); - if (ret) { - strm->msg = (char *)"invalid distances set"; - state->mode = BAD; - break; - } - state->mode = LEN; - case LEN: - if (have >= 6 && left >= 258) { - RESTORE(); - inflate_fast(strm, out); - LOAD(); - break; - } - for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; - PULLBYTE(); - } - if (this.op && (this.op & 0xf0) == 0) { - last = this; - for (;;) { - this = state->lencode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - } - DROPBITS(this.bits); - state->length = (unsigned)this.val; - if ((int)(this.op) == 0) { - state->mode = LIT; - break; - } - if (this.op & 32) { - state->mode = TYPE; - break; - } - if (this.op & 64) { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - state->extra = (unsigned)(this.op) & 15; - state->mode = LENEXT; - case LENEXT: - if (state->extra) { - NEEDBITS(state->extra); - state->length += BITS(state->extra); - DROPBITS(state->extra); - } - state->mode = DIST; - case DIST: - for (;;) { - this = state->distcode[BITS(state->distbits)]; - if ((unsigned)(this.bits) <= bits) break; - PULLBYTE(); - } - if ((this.op & 0xf0) == 0) { - last = this; - for (;;) { - this = state->distcode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - } - DROPBITS(this.bits); - if (this.op & 64) { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - state->offset = (unsigned)this.val; - state->extra = (unsigned)(this.op) & 15; - state->mode = DISTEXT; - case DISTEXT: - if (state->extra) { - NEEDBITS(state->extra); - state->offset += BITS(state->extra); - DROPBITS(state->extra); - } -#ifdef INFLATE_STRICT - if (state->offset > state->dmax) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#endif - if (state->offset > state->whave + out - left) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } - state->mode = MATCH; - case MATCH: - if (left == 0) goto inf_leave; - copy = out - left; - if (state->offset > copy) { /* copy from window */ - copy = state->offset - copy; - if (copy > state->write) { - copy -= state->write; - from = state->window + (state->wsize - copy); - } - else - from = state->window + (state->write - copy); - if (copy > state->length) copy = state->length; - } - else { /* copy from output */ - from = put - state->offset; - copy = state->length; - } - if (copy > left) copy = left; - left -= copy; - state->length -= copy; - do { - *put++ = *from++; - } while (--copy); - if (state->length == 0) state->mode = LEN; - break; - case LIT: - if (left == 0) goto inf_leave; - *put++ = (unsigned char)(state->length); - left--; - state->mode = LEN; - break; - case CHECK: - if (state->wrap) { - NEEDBITS(32); - out -= left; - strm->total_out += out; - state->total += out; - if (out) - strm->adler = state->check = - UPDATE(state->check, put - out, out); - out = left; - if (( - REVERSE(hold)) != state->check) { - strm->msg = (char *)"incorrect data check"; - state->mode = BAD; - break; - } - INITBITS(); - } - state->mode = DONE; - case DONE: - ret = Z_STREAM_END; - goto inf_leave; - case BAD: - ret = Z_DATA_ERROR; - goto inf_leave; - case MEM: - return Z_MEM_ERROR; - case SYNC: - default: - return Z_STREAM_ERROR; - } - - /* - Return from inflate(), updating the total counts and the check value. - If there was no progress during the inflate() call, return a buffer - error. Call zlib_updatewindow() to create and/or update the window state. - */ - inf_leave: - RESTORE(); - if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) - zlib_updatewindow(strm, out); - - in -= strm->avail_in; - out -= strm->avail_out; - strm->total_in += in; - strm->total_out += out; - state->total += out; - if (state->wrap && out) - strm->adler = state->check = - UPDATE(state->check, strm->next_out - out, out); - - strm->data_type = state->bits + (state->last ? 64 : 0) + - (state->mode == TYPE ? 128 : 0); - - if (flush == Z_PACKET_FLUSH && ret == Z_OK && - strm->avail_out != 0 && strm->avail_in == 0) - return zlib_inflateSyncPacket(strm); - - if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) - ret = Z_BUF_ERROR; - - return ret; -} - -int zlib_inflateEnd(z_streamp strm) -{ - if (strm == NULL || strm->state == NULL) - return Z_STREAM_ERROR; - return Z_OK; -} - -/* - * This subroutine adds the data at next_in/avail_in to the output history - * without performing any output. The output buffer must be "caught up"; - * i.e. no pending output but this should always be the case. The state must - * be waiting on the start of a block (i.e. mode == TYPE or HEAD). On exit, - * the output will also be caught up, and the checksum will have been updated - * if need be. - */ -int zlib_inflateIncomp(z_stream *z) -{ - struct inflate_state *state = (struct inflate_state *)z->state; - Byte *saved_no = z->next_out; - uInt saved_ao = z->avail_out; - - if (state->mode != TYPE && state->mode != HEAD) - return Z_DATA_ERROR; - - /* Setup some variables to allow misuse of updateWindow */ - z->avail_out = 0; - z->next_out = (unsigned char*)z->next_in + z->avail_in; - - zlib_updatewindow(z, z->avail_in); - - /* Restore saved variables */ - z->avail_out = saved_ao; - z->next_out = saved_no; - - z->adler = state->check = - UPDATE(state->check, z->next_in, z->avail_in); - - z->total_out += z->avail_in; - z->total_in += z->avail_in; - z->next_in += z->avail_in; - state->total += z->avail_in; - z->avail_in = 0; - - return Z_OK; -} diff --git a/linux/zlib_inflate/inflate.h b/linux/zlib_inflate/inflate.h deleted file mode 100644 index 3d17b3d..0000000 --- a/linux/zlib_inflate/inflate.h +++ /dev/null @@ -1,111 +0,0 @@ -#ifndef INFLATE_H -#define INFLATE_H - -/* inflate.h -- internal inflate state definition - * Copyright (C) 1995-2004 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* Possible inflate modes between inflate() calls */ -typedef enum { - HEAD, /* i: waiting for magic header */ - FLAGS, /* i: waiting for method and flags (gzip) */ - TIME, /* i: waiting for modification time (gzip) */ - OS, /* i: waiting for extra flags and operating system (gzip) */ - EXLEN, /* i: waiting for extra length (gzip) */ - EXTRA, /* i: waiting for extra bytes (gzip) */ - NAME, /* i: waiting for end of file name (gzip) */ - COMMENT, /* i: waiting for end of comment (gzip) */ - HCRC, /* i: waiting for header crc (gzip) */ - DICTID, /* i: waiting for dictionary check value */ - DICT, /* waiting for inflateSetDictionary() call */ - TYPE, /* i: waiting for type bits, including last-flag bit */ - TYPEDO, /* i: same, but skip check to exit inflate on new block */ - STORED, /* i: waiting for stored size (length and complement) */ - COPY, /* i/o: waiting for input or output to copy stored block */ - TABLE, /* i: waiting for dynamic block table lengths */ - LENLENS, /* i: waiting for code length code lengths */ - CODELENS, /* i: waiting for length/lit and distance code lengths */ - LEN, /* i: waiting for length/lit code */ - LENEXT, /* i: waiting for length extra bits */ - DIST, /* i: waiting for distance code */ - DISTEXT, /* i: waiting for distance extra bits */ - MATCH, /* o: waiting for output space to copy string */ - LIT, /* o: waiting for output space to write literal */ - CHECK, /* i: waiting for 32-bit check value */ - LENGTH, /* i: waiting for 32-bit length (gzip) */ - DONE, /* finished check, done -- remain here until reset */ - BAD, /* got a data error -- remain here until reset */ - MEM, /* got an inflate() memory error -- remain here until reset */ - SYNC /* looking for synchronization bytes to restart inflate() */ -} inflate_mode; - -/* - State transitions between above modes - - - (most modes can go to the BAD or MEM mode -- not shown for clarity) - - Process header: - HEAD -> (gzip) or (zlib) - (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME - NAME -> COMMENT -> HCRC -> TYPE - (zlib) -> DICTID or TYPE - DICTID -> DICT -> TYPE - Read deflate blocks: - TYPE -> STORED or TABLE or LEN or CHECK - STORED -> COPY -> TYPE - TABLE -> LENLENS -> CODELENS -> LEN - Read deflate codes: - LEN -> LENEXT or LIT or TYPE - LENEXT -> DIST -> DISTEXT -> MATCH -> LEN - LIT -> LEN - Process trailer: - CHECK -> LENGTH -> DONE - */ - -/* state maintained between inflate() calls. Approximately 7K bytes. */ -struct inflate_state { - inflate_mode mode; /* current inflate mode */ - int last; /* true if processing last block */ - int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ - int havedict; /* true if dictionary provided */ - int flags; /* gzip header method and flags (0 if zlib) */ - unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ - unsigned long check; /* protected copy of check value */ - unsigned long total; /* protected copy of output count */ - /* gz_headerp head; */ /* where to save gzip header information */ - /* sliding window */ - unsigned wbits; /* log base 2 of requested window size */ - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned write; /* window write index */ - unsigned char *window; /* allocated sliding window, if needed */ - /* bit accumulator */ - unsigned long hold; /* input bit accumulator */ - unsigned bits; /* number of bits in "in" */ - /* for string and stored block copying */ - unsigned length; /* literal or length of data to copy */ - unsigned offset; /* distance back to copy string from */ - /* for table and code decoding */ - unsigned extra; /* extra bits needed */ - /* fixed and dynamic code tables */ - code const *lencode; /* starting table for length/literal codes */ - code const *distcode; /* starting table for distance codes */ - unsigned lenbits; /* index bits for lencode */ - unsigned distbits; /* index bits for distcode */ - /* dynamic table building */ - unsigned ncode; /* number of code length code lengths */ - unsigned nlen; /* number of length code lengths */ - unsigned ndist; /* number of distance code lengths */ - unsigned have; /* number of code lengths in lens[] */ - code *next; /* next available space in codes[] */ - unsigned short lens[320]; /* temporary storage for code lengths */ - unsigned short work[288]; /* work area for code table building */ - code codes[ENOUGH]; /* space for code tables */ -}; -#endif diff --git a/linux/zlib_inflate/inftrees.c b/linux/zlib_inflate/inftrees.c deleted file mode 100644 index 3fe6ce5..0000000 --- a/linux/zlib_inflate/inftrees.c +++ /dev/null @@ -1,315 +0,0 @@ -/* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2005 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include -#include "inftrees.h" - -#define MAXBITS 15 - -/* - Build a set of tables to decode the provided canonical Huffman code. - The code lengths are lens[0..codes-1]. The result starts at *table, - whose indices are 0..2^bits-1. work is a writable array of at least - lens shorts, which is used as a work area. type is the type of code - to be generated, CODES, LENS, or DISTS. On return, zero is success, - -1 is an invalid code, and +1 means that ENOUGH isn't enough. table - on return points to the next available entry's address. bits is the - requested root table index bits, and on return it is the actual root - table index bits. It will differ if the request is greater than the - longest code or if it is less than the shortest code. - */ -int zlib_inflate_table(codetype type, unsigned short *lens, unsigned codes, - code **table, unsigned *bits, unsigned short *work) -{ - unsigned len; /* a code's length in bits */ - unsigned sym; /* index of code symbols */ - unsigned min, max; /* minimum and maximum code lengths */ - unsigned root; /* number of index bits for root table */ - unsigned curr; /* number of index bits for current table */ - unsigned drop; /* code bits to drop for sub-table */ - int left; /* number of prefix codes available */ - unsigned used; /* code entries in table used */ - unsigned huff; /* Huffman code */ - unsigned incr; /* for incrementing code, index */ - unsigned fill; /* index for replicating entries */ - unsigned low; /* low bits for current root entry */ - unsigned mask; /* mask for low root bits */ - code this; /* table entry for duplication */ - code *next; /* next available space in table */ - const unsigned short *base; /* base value table to use */ - const unsigned short *extra; /* extra bits table to use */ - int end; /* use base and extra for symbol > end */ - unsigned short count[MAXBITS+1]; /* number of codes of each length */ - unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ - static const unsigned short lbase[31] = { /* Length codes 257..285 base */ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - static const unsigned short lext[31] = { /* Length codes 257..285 extra */ - 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196}; - static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, - 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, - 8193, 12289, 16385, 24577, 0, 0}; - static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ - 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, - 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, - 28, 28, 29, 29, 64, 64}; - - /* - Process a set of code lengths to create a canonical Huffman code. The - code lengths are lens[0..codes-1]. Each length corresponds to the - symbols 0..codes-1. The Huffman code is generated by first sorting the - symbols by length from short to long, and retaining the symbol order - for codes with equal lengths. Then the code starts with all zero bits - for the first code of the shortest length, and the codes are integer - increments for the same length, and zeros are appended as the length - increases. For the deflate format, these bits are stored backwards - from their more natural integer increment ordering, and so when the - decoding tables are built in the large loop below, the integer codes - are incremented backwards. - - This routine assumes, but does not check, that all of the entries in - lens[] are in the range 0..MAXBITS. The caller must assure this. - 1..MAXBITS is interpreted as that code length. zero means that that - symbol does not occur in this code. - - The codes are sorted by computing a count of codes for each length, - creating from that a table of starting indices for each length in the - sorted table, and then entering the symbols in order in the sorted - table. The sorted table is work[], with that space being provided by - the caller. - - The length counts are used for other purposes as well, i.e. finding - the minimum and maximum length codes, determining if there are any - codes at all, checking for a valid set of lengths, and looking ahead - at length counts to determine sub-table sizes when building the - decoding tables. - */ - - /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ - for (len = 0; len <= MAXBITS; len++) - count[len] = 0; - for (sym = 0; sym < codes; sym++) - count[lens[sym]]++; - - /* bound code lengths, force root to be within code lengths */ - root = *bits; - for (max = MAXBITS; max >= 1; max--) - if (count[max] != 0) break; - if (root > max) root = max; - if (max == 0) { /* no symbols to code at all */ - this.op = (unsigned char)64; /* invalid code marker */ - this.bits = (unsigned char)1; - this.val = (unsigned short)0; - *(*table)++ = this; /* make a table to force an error */ - *(*table)++ = this; - *bits = 1; - return 0; /* no symbols, but wait for decoding to report error */ - } - for (min = 1; min <= MAXBITS; min++) - if (count[min] != 0) break; - if (root < min) root = min; - - /* check for an over-subscribed or incomplete set of lengths */ - left = 1; - for (len = 1; len <= MAXBITS; len++) { - left <<= 1; - left -= count[len]; - if (left < 0) return -1; /* over-subscribed */ - } - if (left > 0 && (type == CODES || max != 1)) - return -1; /* incomplete set */ - - /* generate offsets into symbol table for each length for sorting */ - offs[1] = 0; - for (len = 1; len < MAXBITS; len++) - offs[len + 1] = offs[len] + count[len]; - - /* sort symbols by length, by symbol order within each length */ - for (sym = 0; sym < codes; sym++) - if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; - - /* - Create and fill in decoding tables. In this loop, the table being - filled is at next and has curr index bits. The code being used is huff - with length len. That code is converted to an index by dropping drop - bits off of the bottom. For codes where len is less than drop + curr, - those top drop + curr - len bits are incremented through all values to - fill the table with replicated entries. - - root is the number of index bits for the root table. When len exceeds - root, sub-tables are created pointed to by the root entry with an index - of the low root bits of huff. This is saved in low to check for when a - new sub-table should be started. drop is zero when the root table is - being filled, and drop is root when sub-tables are being filled. - - When a new sub-table is needed, it is necessary to look ahead in the - code lengths to determine what size sub-table is needed. The length - counts are used for this, and so count[] is decremented as codes are - entered in the tables. - - used keeps track of how many table entries have been allocated from the - provided *table space. It is checked when a LENS table is being made - against the space in *table, ENOUGH, minus the maximum space needed by - the worst case distance code, MAXD. This should never happen, but the - sufficiency of ENOUGH has not been proven exhaustively, hence the check. - This assumes that when type == LENS, bits == 9. - - sym increments through all symbols, and the loop terminates when - all codes of length max, i.e. all codes, have been processed. This - routine permits incomplete codes, so another loop after this one fills - in the rest of the decoding tables with invalid code markers. - */ - - /* set up for code type */ - switch (type) { - case CODES: - base = extra = work; /* dummy value--not used */ - end = 19; - break; - case LENS: - base = lbase; - base -= 257; - extra = lext; - extra -= 257; - end = 256; - break; - default: /* DISTS */ - base = dbase; - extra = dext; - end = -1; - } - - /* initialize state for loop */ - huff = 0; /* starting code */ - sym = 0; /* starting code symbol */ - len = min; /* starting code length */ - next = *table; /* current table to fill in */ - curr = root; /* current table index bits */ - drop = 0; /* current bits to drop from code for index */ - low = (unsigned)(-1); /* trigger new sub-table when len > root */ - used = 1U << root; /* use root table entries */ - mask = used - 1; /* mask for comparing low */ - - /* check available table space */ - if (type == LENS && used >= ENOUGH - MAXD) - return 1; - - /* process all codes and make table entries */ - for (;;) { - /* create table entry */ - this.bits = (unsigned char)(len - drop); - if ((int)(work[sym]) < end) { - this.op = (unsigned char)0; - this.val = work[sym]; - } - else if ((int)(work[sym]) > end) { - this.op = (unsigned char)(extra[work[sym]]); - this.val = base[work[sym]]; - } - else { - this.op = (unsigned char)(32 + 64); /* end of block */ - this.val = 0; - } - - /* replicate for those indices with low len bits equal to huff */ - incr = 1U << (len - drop); - fill = 1U << curr; - min = fill; /* save offset to next table */ - do { - fill -= incr; - next[(huff >> drop) + fill] = this; - } while (fill != 0); - - /* backwards increment the len-bit code huff */ - incr = 1U << (len - 1); - while (huff & incr) - incr >>= 1; - if (incr != 0) { - huff &= incr - 1; - huff += incr; - } - else - huff = 0; - - /* go to next symbol, update count, len */ - sym++; - if (--(count[len]) == 0) { - if (len == max) break; - len = lens[work[sym]]; - } - - /* create new sub-table if needed */ - if (len > root && (huff & mask) != low) { - /* if first time, transition to sub-tables */ - if (drop == 0) - drop = root; - - /* increment past last table */ - next += min; /* here min is 1 << curr */ - - /* determine length of next table */ - curr = len - drop; - left = (int)(1 << curr); - while (curr + drop < max) { - left -= count[curr + drop]; - if (left <= 0) break; - curr++; - left <<= 1; - } - - /* check for enough space */ - used += 1U << curr; - if (type == LENS && used >= ENOUGH - MAXD) - return 1; - - /* point entry in root table to sub-table */ - low = huff & mask; - (*table)[low].op = (unsigned char)curr; - (*table)[low].bits = (unsigned char)root; - (*table)[low].val = (unsigned short)(next - *table); - } - } - - /* - Fill in rest of table for incomplete codes. This loop is similar to the - loop above in incrementing huff for table indices. It is assumed that - len is equal to curr + drop, so there is no loop needed to increment - through high index bits. When the current sub-table is filled, the loop - drops back to the root table to fill in any remaining entries there. - */ - this.op = (unsigned char)64; /* invalid code marker */ - this.bits = (unsigned char)(len - drop); - this.val = (unsigned short)0; - while (huff != 0) { - /* when done with sub-table, drop back to root table */ - if (drop != 0 && (huff & mask) != low) { - drop = 0; - len = root; - next = *table; - this.bits = (unsigned char)len; - } - - /* put invalid code marker in table */ - next[huff >> drop] = this; - - /* backwards increment the len-bit code huff */ - incr = 1U << (len - 1); - while (huff & incr) - incr >>= 1; - if (incr != 0) { - huff &= incr - 1; - huff += incr; - } - else - huff = 0; - } - - /* set return parameters */ - *table += used; - *bits = root; - return 0; -} diff --git a/linux/zlib_inflate/inftrees.h b/linux/zlib_inflate/inftrees.h deleted file mode 100644 index b70b473..0000000 --- a/linux/zlib_inflate/inftrees.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef INFTREES_H -#define INFTREES_H - -/* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995-2005 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* Structure for decoding tables. Each entry provides either the - information needed to do the operation requested by the code that - indexed that table entry, or it provides a pointer to another - table that indexes more bits of the code. op indicates whether - the entry is a pointer to another table, a literal, a length or - distance, an end-of-block, or an invalid code. For a table - pointer, the low four bits of op is the number of index bits of - that table. For a length or distance, the low four bits of op - is the number of extra bits to get after the code. bits is - the number of bits in this code or part of the code to drop off - of the bit buffer. val is the actual byte to output in the case - of a literal, the base length or distance, or the offset from - the current table to the next table. Each entry is four bytes. */ -typedef struct { - unsigned char op; /* operation, extra bits, table bits */ - unsigned char bits; /* bits in this part of the code */ - unsigned short val; /* offset in table or code value */ -} code; - -/* op values as set by inflate_table(): - 00000000 - literal - 0000tttt - table link, tttt != 0 is the number of table index bits - 0001eeee - length or distance, eeee is the number of extra bits - 01100000 - end of block - 01000000 - invalid code - */ - -/* Maximum size of dynamic tree. The maximum found in a long but non- - exhaustive search was 1444 code structures (852 for length/literals - and 592 for distances, the latter actually the result of an - exhaustive search). The true maximum is not known, but the value - below is more than safe. */ -#define ENOUGH 2048 -#define MAXD 592 - -/* Type of code to build for inftable() */ -typedef enum { - CODES, - LENS, - DISTS -} codetype; - -extern int zlib_inflate_table (codetype type, unsigned short *lens, - unsigned codes, code **table, - unsigned *bits, unsigned short *work); -#endif diff --git a/linux/zlib_inflate/infutil.c b/linux/zlib_inflate/infutil.c deleted file mode 100644 index 4824c2c..0000000 --- a/linux/zlib_inflate/infutil.c +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include -#include -#include - -/* Utility function: initialize zlib, unpack binary blob, clean up zlib, - * return len or negative error code. - */ -int zlib_inflate_blob(void *gunzip_buf, unsigned int sz, - const void *buf, unsigned int len) -{ - const u8 *zbuf = buf; - struct z_stream_s *strm; - int rc; - - rc = -ENOMEM; - strm = kmalloc(sizeof(*strm), GFP_KERNEL); - if (strm == NULL) - goto gunzip_nomem1; - strm->workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL); - if (strm->workspace == NULL) - goto gunzip_nomem2; - - /* gzip header (1f,8b,08... 10 bytes total + possible asciz filename) - * expected to be stripped from input - */ - strm->next_in = zbuf; - strm->avail_in = len; - strm->next_out = gunzip_buf; - strm->avail_out = sz; - - rc = zlib_inflateInit2(strm, -MAX_WBITS); - if (rc == Z_OK) { - rc = zlib_inflate(strm, Z_FINISH); - /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */ - if (rc == Z_STREAM_END) - rc = sz - strm->avail_out; - else - rc = -EINVAL; - zlib_inflateEnd(strm); - } else - rc = -EINVAL; - - kfree(strm->workspace); -gunzip_nomem2: - kfree(strm); -gunzip_nomem1: - return rc; /* returns Z_OK (0) if successful */ -} diff --git a/linux/zlib_inflate/infutil.h b/linux/zlib_inflate/infutil.h deleted file mode 100644 index eb1a900..0000000 --- a/linux/zlib_inflate/infutil.h +++ /dev/null @@ -1,25 +0,0 @@ -/* infutil.h -- types and macros common to blocks and codes - * Copyright (C) 1995-1998 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -#ifndef _INFUTIL_H -#define _INFUTIL_H - -#include - -/* memory allocation for inflation */ - -struct inflate_workspace { - struct inflate_state inflate_state; - unsigned char working_window[1 << MAX_WBITS]; -}; - -#define WS(z) ((struct inflate_workspace *)(z->workspace)) - -#endif