-BCACHE_REVISION=561f3067172cbfc63a680cfb670d558724441123
+BCACHE_REVISION=aa4471ac314a1f117957f9fc59c1bfbdf965a28c
-D_GNU_SOURCE \
-D_LGPL_SOURCE \
-DRCU_MEMBARRIER \
+ -DNO_BCACHE_ACCOUNTING \
+ -DNO_BCACHE_BLOCKDEV \
+ -DNO_BCACHE_CHARDEV \
+ -DNO_BCACHE_FS \
+ -DNO_BCACHE_NOTIFY \
+ -DNO_BCACHE_WRITEBACK \
$(EXTRA_CFLAGS)
LDFLAGS+=-O2 -g
LDFLAGS+=-flto
endif
-PKGCONFIG_LIBS="blkid uuid liburcu libsodium"
+PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib"
CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}`
LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}` \
-lm -lpthread -lrt -lscrypt -lkeyutils
#include "libbcache.h"
#include "tools-util.h"
-/* stub out the bcache code we aren't building: */
-
-struct block_device;
-struct bcache_superblock;
-struct cache;
-struct cache_accounting;
-struct cache_set;
-struct closure;
-struct file;
-struct kobject;
-
-struct kmem_cache *bch_search_cache;
-
-const char *bch_backing_dev_register(struct bcache_superblock *sb)
-{
- return "not implemented";
-}
-void bch_blockdevs_stop(struct cache_set *c) {}
-int bch_blockdev_volumes_start(struct cache_set *c) { return 0; }
-void bch_attach_backing_devs(struct cache_set *c) {}
-bool bch_is_open_backing_dev(struct block_device *bdev) { return false; }
-void bch_blockdev_exit(void) {}
-int bch_blockdev_init(void) { return 0; }
-
-void bch_fs_exit(void) {}
-int bch_fs_init(void) { return 0; }
-
-const struct file_operations bch_chardev_fops;
-
-void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
- u64 offset, int nr_sectors) {}
-void bch_writeback_recalc_oldest_gens(struct cache_set *c) {}
-
-void bch_notify_cache_set_read_write(struct cache_set *c) {}
-void bch_notify_cache_set_read_only(struct cache_set *c) {}
-void bch_notify_cache_set_stopped(struct cache_set *c) {}
-void bch_notify_cache_read_write(struct cache *c) {}
-void bch_notify_cache_read_only(struct cache *c) {}
-void bch_notify_cache_added(struct cache *c) {}
-void bch_notify_cache_removing(struct cache *c) {}
-void bch_notify_cache_removed(struct cache *c) {}
-void bch_notify_cache_remove_failed(struct cache *c) {}
-void bch_notify_cache_error(struct cache *c, bool b) {}
-
-int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
- struct kobject *parent) { return 0; }
-void bch_cache_accounting_destroy(struct cache_accounting *acc) {}
-void bch_cache_accounting_init(struct cache_accounting *acc,
- struct closure *parent) {}
-
#define bch_fmt(_c, fmt) fmt "\n"
enum fsck_err_opts fsck_err_opt;
#define SHIM_KTYPE(type) \
struct kobj_type type ## _ktype = { .release = type ## _release, }
-static void bch_cache_set_internal_release(struct kobject *k) {}
+static void bch_fs_internal_release(struct kobject *k) {}
-static void bch_cache_set_opts_dir_release(struct kobject *k) {}
+static void bch_fs_opts_dir_release(struct kobject *k) {}
-static void bch_cache_set_time_stats_release(struct kobject *k) {}
+static void bch_fs_time_stats_release(struct kobject *k) {}
-SHIM_KTYPE(bch_cache);
-SHIM_KTYPE(bch_cache_set);
-SHIM_KTYPE(bch_cache_set_internal);
-SHIM_KTYPE(bch_cache_set_time_stats);
-SHIM_KTYPE(bch_cache_set_opts_dir);
+SHIM_KTYPE(bch_dev);
+SHIM_KTYPE(bch_fs);
+SHIM_KTYPE(bch_fs_internal);
+SHIM_KTYPE(bch_fs_time_stats);
+SHIM_KTYPE(bch_fs_opts_dir);
int cmd_dump(int argc, char *argv[])
{
- DECLARE_COMPLETION_ONSTACK(shutdown);
- struct cache_set_opts opts = cache_set_opts_empty();
+ struct bch_opts opts = bch_opts_empty();
struct cache_set *c = NULL;
const char *err;
char *out = NULL, *buf;
buf = alloca(strlen(out) + 10);
strcpy(buf, out);
- err = bch_register_cache_set(argv + optind, argc - optind, opts, &c);
+ err = bch_fs_open(argv + optind, argc - optind, opts, &c);
if (err)
die("error opening %s: %s", argv[optind], err);
up_read(&c->gc_lock);
- c->stop_completion = &shutdown;
- bch_cache_set_stop(c);
- closure_put(&c->cl);
- wait_for_completion(&shutdown);
+ bch_fs_stop_sync(c);
return 0;
}
int cmd_list(int argc, char *argv[])
{
- DECLARE_COMPLETION_ONSTACK(shutdown);
- struct cache_set_opts opts = cache_set_opts_empty();
+ struct bch_opts opts = bch_opts_empty();
struct cache_set *c = NULL;
enum btree_id btree_id = BTREE_ID_EXTENTS;
struct bpos start = POS_MIN, end = POS_MAX;
if (optind >= argc)
die("Please supply device(s) to check");
- err = bch_register_cache_set(argv + optind, argc - optind, opts, &c);
+ err = bch_fs_open(argv + optind, argc - optind, opts, &c);
if (err)
die("error opening %s: %s", argv[optind], err);
die("Invalid mode");
}
- c->stop_completion = &shutdown;
- bch_cache_set_stop(c);
- closure_put(&c->cl);
- wait_for_completion(&shutdown);
+ bch_fs_stop_sync(c);
return 0;
}
int cmd_fsck(int argc, char *argv[])
{
- DECLARE_COMPLETION_ONSTACK(shutdown);
- struct cache_set_opts opts = cache_set_opts_empty();
+ struct bch_opts opts = bch_opts_empty();
struct cache_set *c = NULL;
const char *err;
int opt;
if (optind >= argc)
die("Please supply device(s) to check");
- err = bch_register_cache_set(argv + optind, argc - optind, opts, &c);
+ err = bch_fs_open(argv + optind, argc - optind, opts, &c);
if (err)
die("error opening %s: %s", argv[optind], err);
- c->stop_completion = &shutdown;
- bch_cache_set_stop(c);
- closure_put(&c->cl);
-
- /* Killable? */
- wait_for_completion(&shutdown);
-
+ bch_fs_stop_sync(c);
return 0;
}
Priority: optional
Standards-Version: 3.9.5
Build-Depends: debhelper (>= 9), pkg-config, libblkid-dev, uuid-dev,
- libscrypt-dev, libsodium-dev, libkeyutils-dev, liburcu-dev
+ libscrypt-dev, libsodium-dev, libkeyutils-dev, liburcu-dev, zlib1g-dev
Vcs-Browser: http://anonscm.debian.org/gitweb/?p=collab-maint/bcache-tools.git
Vcs-Git: git://anonscm.debian.org/collab-maint/bcache-tools.git
Homepage: http://bcache.evilpiepirate.org/
BCH_COMPRESSION_NR = 3,
};
-/**
- * BCH_OPT(name, choices, min, max, sb_option, sysfs_writeable)
- *
- * @name - name of mount option, sysfs attribute, and struct cache_set_opts
- * member
- *
- * @choices - array of strings that the user can select from - option is by
- * array index
- *
- * Booleans are special cased; if @choices is bch_bool_opt the mount
- * options name and noname will work as expected.
- *
- * @min, @max
- *
- * @sb_option - name of corresponding superblock option
- *
- * @sysfs_writeable - if true, option will be modifiable at runtime via sysfs
- */
-
-#define BCH_SB_OPTS() \
- BCH_OPT(errors, \
- bch_error_actions, \
- 0, BCH_NR_ERROR_ACTIONS, \
- BCH_SB_ERROR_ACTION, \
- true) \
- BCH_OPT(metadata_replicas, \
- bch_uint_opt, \
- 0, BCH_REPLICAS_MAX, \
- BCH_SB_META_REPLICAS_WANT, \
- false) \
- BCH_OPT(data_replicas, \
- bch_uint_opt, \
- 0, BCH_REPLICAS_MAX, \
- BCH_SB_DATA_REPLICAS_WANT, \
- false) \
- BCH_OPT(metadata_checksum, \
- bch_csum_types, \
- 0, BCH_CSUM_OPT_NR, \
- BCH_SB_META_CSUM_TYPE, \
- true) \
- BCH_OPT(data_checksum, \
- bch_csum_types, \
- 0, BCH_CSUM_OPT_NR, \
- BCH_SB_DATA_CSUM_TYPE, \
- true) \
- BCH_OPT(compression, \
- bch_compression_types, \
- 0, BCH_COMPRESSION_NR, \
- BCH_SB_COMPRESSION_TYPE, \
- true) \
- BCH_OPT(str_hash, \
- bch_str_hash_types, \
- 0, BCH_STR_HASH_NR, \
- BCH_SB_STR_HASH_TYPE, \
- true) \
- BCH_OPT(inodes_32bit, \
- bch_bool_opt, 0, 2, \
- BCH_SB_INODE_32BIT, \
- true) \
- BCH_OPT(gc_reserve_percent, \
- bch_uint_opt, \
- 5, 21, \
- BCH_SB_GC_RESERVE, \
- false) \
- BCH_OPT(root_reserve_percent, \
- bch_uint_opt, \
- 0, 100, \
- BCH_SB_ROOT_RESERVE, \
- false) \
- BCH_OPT(wide_macs, \
- bch_bool_opt, 0, 2, \
- BCH_SB_128_BIT_MACS, \
- true)
-
/* backing device specific stuff: */
struct backingdev_sb {
#include <linux/wait.h>
-/*
- * struct completion - structure used to maintain state for a "completion"
- *
- * This is the opaque structure used to maintain the state for a "completion".
- * Completions currently use a FIFO to queue threads that have to wait for
- * the "completion" event.
- *
- * See also: complete(), wait_for_completion() (and friends _timeout,
- * _interruptible, _interruptible_timeout, and _killable), init_completion(),
- * reinit_completion(), and macros DECLARE_COMPLETION(),
- * DECLARE_COMPLETION_ONSTACK().
- */
struct completion {
unsigned int done;
wait_queue_head_t wait;
};
-#define COMPLETION_INITIALIZER(work) \
- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+#define DECLARE_COMPLETION(work) \
+ struct completion work = { \
+ .done = 0, \
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) \
+ }
-#define COMPLETION_INITIALIZER_ONSTACK(work) \
- ({ init_completion(&work); work; })
-
-#define DECLARE_COMPLETION(work) \
- struct completion work = COMPLETION_INITIALIZER(work)
#define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
-/**
- * init_completion - Initialize a dynamically allocated completion
- * @x: pointer to completion structure that is to be initialized
- *
- * This inline function will initialize a dynamically created completion
- * structure.
- */
static inline void init_completion(struct completion *x)
{
x->done = 0;
init_waitqueue_head(&x->wait);
}
-/**
- * reinit_completion - reinitialize a completion structure
- * @x: pointer to completion structure that is to be reinitialized
- *
- * This inline function should be used to reinitialize a completion structure so it can
- * be reused. This is especially important after complete_all() is used.
- */
-static inline void reinit_completion(struct completion *x)
-{
- x->done = 0;
-}
-
-extern void wait_for_completion(struct completion *);
-extern void wait_for_completion_io(struct completion *);
-extern int wait_for_completion_interruptible(struct completion *x);
-extern int wait_for_completion_killable(struct completion *x);
-extern unsigned long wait_for_completion_timeout(struct completion *x,
- unsigned long timeout);
-extern unsigned long wait_for_completion_io_timeout(struct completion *x,
- unsigned long timeout);
-extern long wait_for_completion_interruptible_timeout(
- struct completion *x, unsigned long timeout);
-extern long wait_for_completion_killable_timeout(
- struct completion *x, unsigned long timeout);
-extern bool try_wait_for_completion(struct completion *x);
-extern bool completion_done(struct completion *x);
-
-extern void complete(struct completion *);
-extern void complete_all(struct completion *);
+void complete(struct completion *);
+void wait_for_completion(struct completion *);
#endif
int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res);
int __must_check kstrtoint(const char *s, unsigned int base, int *res);
+static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res)
+{
+ return kstrtoull(s, base, res);
+}
+
+static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res)
+{
+ return kstrtoll(s, base, res);
+}
+
+static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res)
+{
+ return kstrtouint(s, base, res);
+}
+
+static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res)
+{
+ return kstrtoint(s, base, res);
+}
+
/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */
#define VERIFY_OCTAL_PERMISSIONS(perms) \
(BUILD_BUG_ON_ZERO((perms) < 0) + \
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
-/*
- * The end of the chain is marked with a special nulls marks which has
- * the following format:
- *
- * +-------+-----------------------------------------------------+-+
- * | Base | Hash |1|
- * +-------+-----------------------------------------------------+-+
- *
- * Base (4 bits) : Reserved to distinguish between multiple tables.
- * Specified via &struct rhashtable_params.nulls_base.
- * Hash (27 bits): Full hash (unmasked) of first element added to bucket
- * 1 (1 bit) : Nulls marker (always set)
- *
- * The remaining bits of the next pointer remain unused for now.
- */
#define RHT_BASE_BITS 4
#define RHT_HASH_BITS 27
#define RHT_BASE_SHIFT RHT_HASH_BITS
-
-/* Base bits plus 1 bit for nulls marker */
#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
struct rhash_head {
struct rhash_head __rcu *next;
};
-/**
- * struct bucket_table - Table of hash buckets
- * @size: Number of hash buckets
- * @rehash: Current bucket being rehashed
- * @hash_rnd: Random seed to fold into hash
- * @locks_mask: Mask to apply before accessing locks[]
- * @locks: Array of spinlocks protecting individual buckets
- * @walkers: List of active walkers
- * @rcu: RCU structure for freeing the table
- * @future_tbl: Table under construction during rehashing
- * @buckets: size * hash buckets
- */
struct bucket_table {
unsigned int size;
unsigned int rehash;
struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};
-/**
- * struct rhashtable_compare_arg - Key for the function rhashtable_compare
- * @ht: Hash table
- * @key: Key to compare against
- */
struct rhashtable_compare_arg {
struct rhashtable *ht;
const void *key;
typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
const void *obj);
-struct rhashtable;
-
-/**
- * struct rhashtable_params - Hash table construction parameters
- * @nelem_hint: Hint on number of elements, should be 75% of desired size
- * @key_len: Length of key
- * @key_offset: Offset of key in struct to be hashed
- * @head_offset: Offset of rhash_head in struct to be hashed
- * @insecure_max_entries: Maximum number of entries (may be exceeded)
- * @max_size: Maximum size while expanding
- * @min_size: Minimum size while shrinking
- * @nulls_base: Base value to generate nulls marker
- * @insecure_elasticity: Set to true to disable chain length checks
- * @automatic_shrinking: Enable automatic shrinking of tables
- * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
- * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
- * @obj_hashfn: Function to hash object
- * @obj_cmpfn: Function to compare key with object
- */
struct rhashtable_params {
size_t nelem_hint;
size_t key_len;
rht_obj_cmpfn_t obj_cmpfn;
};
-/**
- * struct rhashtable - Hash table handle
- * @tbl: Bucket table
- * @nelems: Number of elements in table
- * @key_len: Key length for hashfn
- * @elasticity: Maximum chain length before rehash
- * @p: Configuration parameters
- * @run_work: Deferred worker to expand/shrink asynchronously
- * @mutex: Mutex to protect current/future table swapping
- * @lock: Spin lock to protect walker list
- */
struct rhashtable {
struct bucket_table __rcu *tbl;
atomic_t nelems;
spinlock_t lock;
};
-/**
- * struct rhashtable_walker - Hash table walker
- * @list: List entry on list of walkers
- * @tbl: The table that we were walking over
- */
struct rhashtable_walker {
struct list_head list;
struct bucket_table *tbl;
};
-/**
- * struct rhashtable_iter - Hash table iterator, fits into netlink cb
- * @ht: Table to iterate through
- * @p: Current pointer
- * @walker: Associated rhashtable walker
- * @slot: Current slot
- * @skip: Number of entries to skip in slot
- */
-struct rhashtable_iter {
- struct rhashtable *ht;
- struct rhash_head *p;
- struct rhashtable_walker *walker;
- unsigned int slot;
- unsigned int skip;
-};
-
static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
{
return NULLS_MARKER(ht->p.nulls_base + hash);
rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
}
-/**
- * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
- * @ht: hash table
- * @tbl: current table
- */
static inline bool rht_grow_above_75(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
(!ht->p.max_size || tbl->size < ht->p.max_size);
}
-/**
- * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
- * @ht: hash table
- * @tbl: current table
- */
static inline bool rht_shrink_below_30(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
tbl->size > ht->p.min_size;
}
-/**
- * rht_grow_above_100 - returns true if nelems > table-size
- * @ht: hash table
- * @tbl: current table
- */
static inline bool rht_grow_above_100(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
(!ht->p.max_size || tbl->size < ht->p.max_size);
}
-/**
- * rht_grow_above_max - returns true if table is above maximum
- * @ht: hash table
- * @tbl: current table
- */
static inline bool rht_grow_above_max(const struct rhashtable *ht,
const struct bucket_table *tbl)
{
atomic_read(&ht->nelems) >= ht->p.insecure_max_entries;
}
-/* The bucket lock is selected based on the hash and protects mutations
- * on a group of hash buckets.
- *
- * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
- * a single lock always covers both buckets which may both contains
- * entries which link to the same bucket of the old table during resizing.
- * This allows to simplify the locking as locking the bucket in both
- * tables during resize always guarantee protection.
- *
- * IMPORTANT: When holding the bucket lock of both the old and new table
- * during expansions and shrinking, the old bucket lock must always be
- * acquired first.
- */
static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
unsigned int hash)
{
return &tbl->locks[hash & tbl->locks_mask];
}
-#ifdef CONFIG_PROVE_LOCKING
-int lockdep_rht_mutex_is_held(struct rhashtable *ht);
-int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
-#else
-static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
-{
- return 1;
-}
-
-static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
- u32 hash)
-{
- return 1;
-}
-#endif /* CONFIG_PROVE_LOCKING */
+int rhashtable_insert_rehash(struct rhashtable *, struct bucket_table *);
+struct bucket_table *rhashtable_insert_slow(struct rhashtable *,
+ const void *,
+ struct rhash_head *,
+ struct bucket_table *);
-int rhashtable_init(struct rhashtable *ht,
- const struct rhashtable_params *params);
+int rhashtable_init(struct rhashtable *, const struct rhashtable_params *);
+void rhashtable_destroy(struct rhashtable *);
-struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
- const void *key,
- struct rhash_head *obj,
- struct bucket_table *old_tbl);
-int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
-
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
- gfp_t gfp);
-void rhashtable_walk_exit(struct rhashtable_iter *iter);
-int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
-void *rhashtable_walk_next(struct rhashtable_iter *iter);
-void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
-
-void rhashtable_free_and_destroy(struct rhashtable *ht,
- void (*free_fn)(void *ptr, void *arg),
- void *arg);
-void rhashtable_destroy(struct rhashtable *ht);
-
-#define rht_dereference(p, ht) \
- rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
-
-#define rht_dereference_rcu(p, ht) \
- rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
-
-#define rht_dereference_bucket(p, tbl, hash) \
- rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
-
-#define rht_dereference_bucket_rcu(p, tbl, hash) \
- rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
+#define rht_dereference(p, ht) rcu_dereference(p)
+#define rht_dereference_rcu(p, ht) rcu_dereference(p)
+#define rht_dereference_bucket(p, tbl, hash) rcu_dereference(p)
+#define rht_dereference_bucket_rcu(p, tbl, hash) rcu_dereference(p)
#define rht_entry(tpos, pos, member) \
({ tpos = container_of(pos, typeof(*tpos), member); 1; })
-/**
- * rht_for_each_continue - continue iterating over hash chain
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- */
#define rht_for_each_continue(pos, head, tbl, hash) \
for (pos = rht_dereference_bucket(head, tbl, hash); \
!rht_is_a_nulls(pos); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
-/**
- * rht_for_each - iterate over hash chain
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- */
#define rht_for_each(pos, tbl, hash) \
rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
-/**
- * rht_for_each_entry_continue - continue iterating over hash chain
- * @tpos: the type * to use as a loop cursor.
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- * @member: name of the &struct rhash_head within the hashable struct.
- */
-#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
- (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
- pos = rht_dereference_bucket((pos)->next, tbl, hash))
-
-/**
- * rht_for_each_entry - iterate over hash chain of given type
- * @tpos: the type * to use as a loop cursor.
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- * @member: name of the &struct rhash_head within the hashable struct.
- */
-#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \
- tbl, hash, member)
-
-/**
- * rht_for_each_entry_safe - safely iterate over hash chain of given type
- * @tpos: the type * to use as a loop cursor.
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @next: the &struct rhash_head to use as next in loop cursor.
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- * @member: name of the &struct rhash_head within the hashable struct.
- *
- * This hash chain list-traversal primitive allows for the looped code to
- * remove the loop cursor from the list.
- */
-#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
- for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
- next = !rht_is_a_nulls(pos) ? \
- rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
- (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
- pos = next, \
- next = !rht_is_a_nulls(pos) ? \
- rht_dereference_bucket(pos->next, tbl, hash) : NULL)
-
-/**
- * rht_for_each_rcu_continue - continue iterating over rcu hash chain
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- *
- * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu mutation primitives such as rhashtable_insert() as long as the
- * traversal is guarded by rcu_read_lock().
- */
#define rht_for_each_rcu_continue(pos, head, tbl, hash) \
for (({barrier(); }), \
pos = rht_dereference_bucket_rcu(head, tbl, hash); \
!rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
-/**
- * rht_for_each_rcu - iterate over rcu hash chain
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- *
- * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu mutation primitives such as rhashtable_insert() as long as the
- * traversal is guarded by rcu_read_lock().
- */
#define rht_for_each_rcu(pos, tbl, hash) \
rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
-/**
- * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
- * @tpos: the type * to use as a loop cursor.
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- * @member: name of the &struct rhash_head within the hashable struct.
- *
- * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu mutation primitives such as rhashtable_insert() as long as the
- * traversal is guarded by rcu_read_lock().
- */
#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
for (({barrier(); }), \
pos = rht_dereference_bucket_rcu(head, tbl, hash); \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
-/**
- * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
- * @tpos: the type * to use as a loop cursor.
- * @pos: the &struct rhash_head to use as a loop cursor.
- * @tbl: the &struct bucket_table
- * @hash: the hash value / bucket index
- * @member: name of the &struct rhash_head within the hashable struct.
- *
- * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu mutation primitives such as rhashtable_insert() as long as the
- * traversal is guarded by rcu_read_lock().
- */
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
tbl, hash, member)
return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
}
-/**
- * rhashtable_lookup_fast - search hash table, inlined version
- * @ht: hash table
- * @key: the pointer to the key
- * @params: hash table parameters
- *
- * Computes the hash value for the key and traverses the bucket chain looking
- * for a entry with an identical key. The first matching entry is returned.
- *
- * Returns the first entry on which the compare function returned true.
- */
static inline void *rhashtable_lookup_fast(
struct rhashtable *ht, const void *key,
const struct rhashtable_params params)
return NULL;
}
-/* Internal function, please use rhashtable_insert_fast() instead */
static inline int __rhashtable_insert_fast(
struct rhashtable *ht, const void *key, struct rhash_head *obj,
const struct rhashtable_params params)
return err;
}
-/**
- * rhashtable_insert_fast - insert object into hash table
- * @ht: hash table
- * @obj: pointer to hash head inside object
- * @params: hash table parameters
- *
- * Will take a per bucket spinlock to protect against mutual mutations
- * on the same bucket. Multiple insertions may occur in parallel unless
- * they map to the same bucket lock.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-static inline int rhashtable_insert_fast(
- struct rhashtable *ht, struct rhash_head *obj,
- const struct rhashtable_params params)
-{
- return __rhashtable_insert_fast(ht, NULL, obj, params);
-}
-
-/**
- * rhashtable_lookup_insert_fast - lookup and insert object into hash table
- * @ht: hash table
- * @obj: pointer to hash head inside object
- * @params: hash table parameters
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * This lookup function may only be used for fixed key hash table (key_len
- * parameter set). It will BUG() if used inappropriately.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
static inline int rhashtable_lookup_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
const struct rhashtable_params params)
params);
}
-/**
- * rhashtable_lookup_insert_key - search and insert object to hash table
- * with explicit key
- * @ht: hash table
- * @key: key
- * @obj: pointer to hash head inside object
- * @params: hash table parameters
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- *
- * Returns zero on success.
- */
-static inline int rhashtable_lookup_insert_key(
- struct rhashtable *ht, const void *key, struct rhash_head *obj,
- const struct rhashtable_params params)
-{
- BUG_ON(!ht->p.obj_hashfn || !key);
-
- return __rhashtable_insert_fast(ht, key, obj, params);
-}
-
-/* Internal function, please use rhashtable_remove_fast() instead */
static inline int __rhashtable_remove_fast(
struct rhashtable *ht, struct bucket_table *tbl,
struct rhash_head *obj, const struct rhashtable_params params)
return err;
}
-/**
- * rhashtable_remove_fast - remove object from hash table
- * @ht: hash table
- * @obj: pointer to hash head inside object
- * @params: hash table parameters
- *
- * Since the hash chain is single linked, the removal operation needs to
- * walk the bucket chain upon removal. The removal operation is thus
- * considerable slow if the hash table is not correctly sized.
- *
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
- *
- * Returns zero on success, -ENOENT if the entry could not be found.
- */
static inline int rhashtable_remove_fast(
struct rhashtable *ht, struct rhash_head *obj,
const struct rhashtable_params params)
return err;
}
-/* Internal function, please use rhashtable_replace_fast() instead */
-static inline int __rhashtable_replace_fast(
- struct rhashtable *ht, struct bucket_table *tbl,
- struct rhash_head *obj_old, struct rhash_head *obj_new,
- const struct rhashtable_params params)
-{
- struct rhash_head __rcu **pprev;
- struct rhash_head *he;
- spinlock_t *lock;
- unsigned int hash;
- int err = -ENOENT;
-
- /* Minimally, the old and new objects must have same hash
- * (which should mean identifiers are the same).
- */
- hash = rht_head_hashfn(ht, tbl, obj_old, params);
- if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
- return -EINVAL;
-
- lock = rht_bucket_lock(tbl, hash);
-
- spin_lock_bh(lock);
-
- pprev = &tbl->buckets[hash];
- rht_for_each(he, tbl, hash) {
- if (he != obj_old) {
- pprev = &he->next;
- continue;
- }
-
- rcu_assign_pointer(obj_new->next, obj_old->next);
- rcu_assign_pointer(*pprev, obj_new);
- err = 0;
- break;
- }
-
- spin_unlock_bh(lock);
-
- return err;
-}
-
-/**
- * rhashtable_replace_fast - replace an object in hash table
- * @ht: hash table
- * @obj_old: pointer to hash head inside object being replaced
- * @obj_new: pointer to hash head inside object which is new
- * @params: hash table parameters
- *
- * Replacing an object doesn't affect the number of elements in the hash table
- * or bucket, so we don't need to worry about shrinking or expanding the
- * table here.
- *
- * Returns zero on success, -ENOENT if the entry could not be found,
- * -EINVAL if hash is not the same for the old and new objects.
- */
-static inline int rhashtable_replace_fast(
- struct rhashtable *ht, struct rhash_head *obj_old,
- struct rhash_head *obj_new,
- const struct rhashtable_params params)
-{
- struct bucket_table *tbl;
- int err;
-
- rcu_read_lock();
-
- tbl = rht_dereference_rcu(ht->tbl, ht);
-
- /* Because we have already taken (and released) the bucket
- * lock in old_tbl, if we find that future_tbl is not yet
- * visible then that guarantees the entry to still be in
- * the old tbl if it exists.
- */
- while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
- obj_new, params)) &&
- (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
- ;
-
- rcu_read_unlock();
-
- return err;
-}
-
#endif /* _LINUX_RHASHTABLE_H */
#define _LINUX_WAIT_H
#include <pthread.h>
-
#include <linux/bitmap.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/spinlock.h>
-//#include <uapi/linux/wait.h>
typedef struct __wait_queue wait_queue_t;
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
-int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
-/* __wait_queue::flags */
#define WQ_FLAG_EXCLUSIVE 0x01
-#define WQ_FLAG_WOKEN 0x02
struct __wait_queue {
unsigned int flags;
struct list_head task_list;
};
-struct wait_bit_key {
- void *flags;
- int bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR -1
- unsigned long timeout;
-};
-
-struct wait_bit_queue {
- struct wait_bit_key key;
- wait_queue_t wait;
-};
-
-struct __wait_queue_head {
+typedef struct {
spinlock_t lock;
struct list_head task_list;
-};
-typedef struct __wait_queue_head wait_queue_head_t;
-
-struct task_struct;
+} wait_queue_head_t;
-/*
- * Macros for declaration and initialisaton of the datatypes
- */
-
-#define __WAITQUEUE_INITIALIZER(name, tsk) { \
- .private = tsk, \
- .func = default_wake_function, \
- .task_list = { NULL, NULL } }
+void wake_up(wait_queue_head_t *);
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
#define DECLARE_WAITQUEUE(name, tsk) \
- wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
+ wait_queue_t name = { \
+ .private = tsk, \
+ .func = default_wake_function, \
+ .task_list = { NULL, NULL } \
+ }
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
#define DECLARE_WAIT_QUEUE_HEAD(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
-#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
- { .flags = word, .bit_nr = bit, }
-
-#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \
- { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
-
-extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
-
-#define init_waitqueue_head(q) \
- do { \
- static struct lock_class_key __key; \
- \
- __init_waitqueue_head((q), #q, &__key); \
- } while (0)
-
-#ifdef CONFIG_LOCKDEP
-# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
- ({ init_waitqueue_head(&name); name; })
-# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
- wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
-#else
-# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
-#endif
-
-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
-{
- q->flags = 0;
- q->private = p;
- q->func = default_wake_function;
-}
-
-static inline void
-init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
-{
- q->flags = 0;
- q->private = NULL;
- q->func = func;
-}
-
-/**
- * waitqueue_active -- locklessly test for waiters on the queue
- * @q: the waitqueue to test for waiters
- *
- * returns true if the wait list is not empty
- *
- * NOTE: this function is lockless and requires care, incorrect usage _will_
- * lead to sporadic and non-obvious failure.
- *
- * Use either while holding wait_queue_head_t::lock or when used for wakeups
- * with an extra smp_mb() like:
- *
- * CPU0 - waker CPU1 - waiter
- *
- * for (;;) {
- * @cond = true; prepare_to_wait(&wq, &wait, state);
- * smp_mb(); // smp_mb() from set_current_state()
- * if (waitqueue_active(wq)) if (@cond)
- * wake_up(wq); break;
- * schedule();
- * }
- * finish_wait(&wq, &wait);
- *
- * Because without the explicit smp_mb() it's possible for the
- * waitqueue_active() load to get hoisted over the @cond store such that we'll
- * observe an empty wait list while the waiter might not observe @cond.
- *
- * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
- * which (when the lock is uncontended) are of roughly equal cost.
- */
-static inline int waitqueue_active(wait_queue_head_t *q)
-{
- return !list_empty(&q->task_list);
-}
-
-/**
- * wq_has_sleeper - check if there are any waiting processes
- * @wq: wait queue head
- *
- * Returns true if wq has waiting processes
- *
- * Please refer to the comment for waitqueue_active.
- */
-static inline bool wq_has_sleeper(wait_queue_head_t *wq)
-{
- /*
- * We need to be sure we are in sync with the
- * add_wait_queue modifications to the wait queue.
- *
- * This memory barrier should be paired with one on the
- * waiting side.
- */
- smp_mb();
- return waitqueue_active(wq);
-}
-
-extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
-extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
-extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
-
-static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
-{
- list_add(&new->task_list, &head->task_list);
-}
-
-/*
- * Used for wake-one threads:
- */
-static inline void
-__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
-{
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue(q, wait);
-}
-
-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
- wait_queue_t *new)
-{
- list_add_tail(&new->task_list, &head->task_list);
-}
-
-static inline void
-__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
-{
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue_tail(q, wait);
-}
-
-static inline void
-__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+static inline void init_waitqueue_head(wait_queue_head_t *q)
{
- list_del(&old->task_list);
+ spin_lock_init(&q->lock);
+ INIT_LIST_HEAD(&q->task_list);
}
-typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_bit(wait_queue_head_t *, void *, int);
-int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-void wake_up_bit(void *, int);
-void wake_up_atomic_t(atomic_t *);
-int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);
-int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
-wait_queue_head_t *bit_waitqueue(void *, int);
-
-#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
-#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
-#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
-#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
-#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
-
-#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
-#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
-#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
-#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
-
-/*
- * Wakeup macros to be used to report events to the targets.
- */
-#define wake_up_poll(x, m) \
- __wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m) \
- __wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m) \
- __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
-#define wake_up_interruptible_sync_poll(x, m) \
- __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
+#define DEFINE_WAIT(name) \
+ wait_queue_t name = { \
+ .private = current, \
+ .func = autoremove_wake_function, \
+ .task_list = LIST_HEAD_INIT((name).task_list), \
+ }
#define ___wait_cond_timeout(condition) \
({ \
__cond || !__ret; \
})
-#define ___wait_is_interruptible(state) \
- (!__builtin_constant_p(state) || \
- state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \
-
-/*
- * The below macro ___wait_event() has an explicit shadow of the __ret
- * variable when used from the wait_event_*() macros.
- *
- * This is so that both can use the ___wait_cond_timeout() construct
- * to wrap the condition.
- *
- * The type inconsistency of the wait_event_*() __ret variable is also
- * on purpose; we use long where we can return timeout values and int
- * otherwise.
- */
-
#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \
({ \
- __label__ __out; \
- wait_queue_t __wait; \
- long __ret = ret; /* explicit shadow */ \
- \
- INIT_LIST_HEAD(&__wait.task_list); \
- if (exclusive) \
- __wait.flags = WQ_FLAG_EXCLUSIVE; \
- else \
- __wait.flags = 0; \
+ DEFINE_WAIT(__wait); \
+ long __ret = ret; \
\
for (;;) { \
- long __int = prepare_to_wait_event(&wq, &__wait, state);\
- \
+ prepare_to_wait(&wq, &__wait, state); \
if (condition) \
break; \
- \
- if (___wait_is_interruptible(state) && __int) { \
- __ret = __int; \
- if (exclusive) { \
- abort_exclusive_wait(&wq, &__wait, \
- state, NULL); \
- goto __out; \
- } \
- break; \
- } \
- \
cmd; \
} \
finish_wait(&wq, &__wait); \
-__out: __ret; \
+ __ret; \
})
#define __wait_event(wq, condition) \
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
schedule())
-/**
- * wait_event - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
- * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- */
#define wait_event(wq, condition) \
do { \
- might_sleep(); \
if (condition) \
break; \
__wait_event(wq, condition); \
} while (0)
-#define __io_wait_event(wq, condition) \
- (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
- io_schedule())
-
-/*
- * io_wait_event() -- like wait_event() but with io_schedule()
- */
-#define io_wait_event(wq, condition) \
-do { \
- might_sleep(); \
- if (condition) \
- break; \
- __io_wait_event(wq, condition); \
-} while (0)
-
-#define __wait_event_freezable(wq, condition) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
- schedule(); try_to_freeze())
-
-/**
- * wait_event_freezable - sleep (or freeze) until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
- * to system load) until the @condition evaluates to true. The
- * @condition is checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- */
-#define wait_event_freezable(wq, condition) \
-({ \
- int __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_freezable(wq, condition); \
- __ret; \
-})
-
#define __wait_event_timeout(wq, condition, timeout) \
___wait_event(wq, ___wait_cond_timeout(condition), \
TASK_UNINTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))
-/**
- * wait_event_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @timeout: timeout, in jiffies
- *
- * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
- * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * Returns:
- * 0 if the @condition evaluated to %false after the @timeout elapsed,
- * 1 if the @condition evaluated to %true after the @timeout elapsed,
- * or the remaining jiffies (at least 1) if the @condition evaluated
- * to %true before the @timeout elapsed.
- */
#define wait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
- might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_timeout(wq, condition, timeout); \
__ret; \
})
-#define __wait_event_freezable_timeout(wq, condition, timeout) \
- ___wait_event(wq, ___wait_cond_timeout(condition), \
- TASK_INTERRUPTIBLE, 0, timeout, \
- __ret = schedule_timeout(__ret); try_to_freeze())
-
-/*
- * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
- * increasing load and is freezable.
- */
-#define wait_event_freezable_timeout(wq, condition, timeout) \
-({ \
- long __ret = timeout; \
- might_sleep(); \
- if (!___wait_cond_timeout(condition)) \
- __ret = __wait_event_freezable_timeout(wq, condition, timeout); \
- __ret; \
-})
-
-#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \
- (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \
- cmd1; schedule(); cmd2)
-/*
- * Just like wait_event_cmd(), except it sets exclusive flag
- */
-#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \
-do { \
- if (condition) \
- break; \
- __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \
-} while (0)
-
-#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
- (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
- cmd1; schedule(); cmd2)
-
-/**
- * wait_event_cmd - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @cmd1: the command will be executed before sleep
- * @cmd2: the command will be executed after sleep
- *
- * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
- * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- */
-#define wait_event_cmd(wq, condition, cmd1, cmd2) \
-do { \
- if (condition) \
- break; \
- __wait_event_cmd(wq, condition, cmd1, cmd2); \
-} while (0)
-
-#define __wait_event_interruptible(wq, condition) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
- schedule())
-
-/**
- * wait_event_interruptible - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function will return -ERESTARTSYS if it was interrupted by a
- * signal and 0 if @condition evaluated to true.
- */
-#define wait_event_interruptible(wq, condition) \
-({ \
- int __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_interruptible(wq, condition); \
- __ret; \
-})
-
-#define __wait_event_interruptible_timeout(wq, condition, timeout) \
- ___wait_event(wq, ___wait_cond_timeout(condition), \
- TASK_INTERRUPTIBLE, 0, timeout, \
- __ret = schedule_timeout(__ret))
-
-/**
- * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @timeout: timeout, in jiffies
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * Returns:
- * 0 if the @condition evaluated to %false after the @timeout elapsed,
- * 1 if the @condition evaluated to %true after the @timeout elapsed,
- * the remaining jiffies (at least 1) if the @condition evaluated
- * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
- * interrupted by a signal.
- */
-#define wait_event_interruptible_timeout(wq, condition, timeout) \
-({ \
- long __ret = timeout; \
- might_sleep(); \
- if (!___wait_cond_timeout(condition)) \
- __ret = __wait_event_interruptible_timeout(wq, \
- condition, timeout); \
- __ret; \
-})
-
-#define __wait_event_hrtimeout(wq, condition, timeout, state) \
-({ \
- int __ret = 0; \
- struct hrtimer_sleeper __t; \
- \
- hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \
- HRTIMER_MODE_REL); \
- hrtimer_init_sleeper(&__t, current); \
- if ((timeout).tv64 != KTIME_MAX) \
- hrtimer_start_range_ns(&__t.timer, timeout, \
- current->timer_slack_ns, \
- HRTIMER_MODE_REL); \
- \
- __ret = ___wait_event(wq, condition, state, 0, 0, \
- if (!__t.task) { \
- __ret = -ETIME; \
- break; \
- } \
- schedule()); \
- \
- hrtimer_cancel(&__t.timer); \
- destroy_hrtimer_on_stack(&__t.timer); \
- __ret; \
-})
-
-/**
- * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @timeout: timeout, as a ktime_t
- *
- * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function returns 0 if @condition became true, or -ETIME if the timeout
- * elapsed.
- */
-#define wait_event_hrtimeout(wq, condition, timeout) \
-({ \
- int __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_hrtimeout(wq, condition, timeout, \
- TASK_UNINTERRUPTIBLE); \
- __ret; \
-})
-
-/**
- * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @timeout: timeout, as a ktime_t
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function returns 0 if @condition became true, -ERESTARTSYS if it was
- * interrupted by a signal, or -ETIME if the timeout elapsed.
- */
-#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
-({ \
- long __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_hrtimeout(wq, condition, timeout, \
- TASK_INTERRUPTIBLE); \
- __ret; \
-})
-
-#define __wait_event_interruptible_exclusive(wq, condition) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
- schedule())
-
-#define wait_event_interruptible_exclusive(wq, condition) \
-({ \
- int __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_interruptible_exclusive(wq, condition);\
- __ret; \
-})
-
-#define __wait_event_killable_exclusive(wq, condition) \
- ___wait_event(wq, condition, TASK_KILLABLE, 1, 0, \
- schedule())
-
-#define wait_event_killable_exclusive(wq, condition) \
-({ \
- int __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_killable_exclusive(wq, condition); \
- __ret; \
-})
-
-
-#define __wait_event_freezable_exclusive(wq, condition) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
- schedule(); try_to_freeze())
-
-#define wait_event_freezable_exclusive(wq, condition) \
-({ \
- int __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_freezable_exclusive(wq, condition);\
- __ret; \
-})
-
-
-#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
-({ \
- int __ret = 0; \
- DEFINE_WAIT(__wait); \
- if (exclusive) \
- __wait.flags |= WQ_FLAG_EXCLUSIVE; \
- do { \
- if (likely(list_empty(&__wait.task_list))) \
- __add_wait_queue_tail(&(wq), &__wait); \
- set_current_state(TASK_INTERRUPTIBLE); \
- if (signal_pending(current)) { \
- __ret = -ERESTARTSYS; \
- break; \
- } \
- if (irq) \
- spin_unlock_irq(&(wq).lock); \
- else \
- spin_unlock(&(wq).lock); \
- schedule(); \
- if (irq) \
- spin_lock_irq(&(wq).lock); \
- else \
- spin_lock(&(wq).lock); \
- } while (!(condition)); \
- __remove_wait_queue(&(wq), &__wait); \
- __set_current_state(TASK_RUNNING); \
- __ret; \
-})
-
-
-/**
- * wait_event_interruptible_locked - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * It must be called with wq.lock being held. This spinlock is
- * unlocked while sleeping but @condition testing is done while lock
- * is held and when this macro exits the lock is held.
- *
- * The lock is locked/unlocked using spin_lock()/spin_unlock()
- * functions which must match the way they are locked/unlocked outside
- * of this macro.
- *
- * wake_up_locked() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function will return -ERESTARTSYS if it was interrupted by a
- * signal and 0 if @condition evaluated to true.
- */
-#define wait_event_interruptible_locked(wq, condition) \
- ((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
-
-/**
- * wait_event_interruptible_locked_irq - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * It must be called with wq.lock being held. This spinlock is
- * unlocked while sleeping but @condition testing is done while lock
- * is held and when this macro exits the lock is held.
- *
- * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
- * functions which must match the way they are locked/unlocked outside
- * of this macro.
- *
- * wake_up_locked() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function will return -ERESTARTSYS if it was interrupted by a
- * signal and 0 if @condition evaluated to true.
- */
-#define wait_event_interruptible_locked_irq(wq, condition) \
- ((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
-
-/**
- * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * It must be called with wq.lock being held. This spinlock is
- * unlocked while sleeping but @condition testing is done while lock
- * is held and when this macro exits the lock is held.
- *
- * The lock is locked/unlocked using spin_lock()/spin_unlock()
- * functions which must match the way they are locked/unlocked outside
- * of this macro.
- *
- * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
- * set thus when other process waits process on the list if this
- * process is awaken further processes are not considered.
- *
- * wake_up_locked() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function will return -ERESTARTSYS if it was interrupted by a
- * signal and 0 if @condition evaluated to true.
- */
-#define wait_event_interruptible_exclusive_locked(wq, condition) \
- ((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
-
-/**
- * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * It must be called with wq.lock being held. This spinlock is
- * unlocked while sleeping but @condition testing is done while lock
- * is held and when this macro exits the lock is held.
- *
- * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
- * functions which must match the way they are locked/unlocked outside
- * of this macro.
- *
- * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
- * set thus when other process waits process on the list if this
- * process is awaken further processes are not considered.
- *
- * wake_up_locked() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function will return -ERESTARTSYS if it was interrupted by a
- * signal and 0 if @condition evaluated to true.
- */
-#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \
- ((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
-
-
-#define __wait_event_killable(wq, condition) \
- ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
-
-/**
- * wait_event_killable - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_KILLABLE) until the
- * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * The function will return -ERESTARTSYS if it was interrupted by a
- * signal and 0 if @condition evaluated to true.
- */
-#define wait_event_killable(wq, condition) \
-({ \
- int __ret = 0; \
- might_sleep(); \
- if (!(condition)) \
- __ret = __wait_event_killable(wq, condition); \
- __ret; \
-})
-
-
-#define __wait_event_lock_irq(wq, condition, lock, cmd) \
- (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
- spin_unlock_irq(&lock); \
- cmd; \
- schedule(); \
- spin_lock_irq(&lock))
-
-/**
- * wait_event_lock_irq_cmd - sleep until a condition gets true. The
- * condition is checked under the lock. This
- * is expected to be called with the lock
- * taken.
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @lock: a locked spinlock_t, which will be released before cmd
- * and schedule() and reacquired afterwards.
- * @cmd: a command which is invoked outside the critical section before
- * sleep
- *
- * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
- * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * This is supposed to be called while holding the lock. The lock is
- * dropped before invoking the cmd and going to sleep and is reacquired
- * afterwards.
- */
-#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \
-do { \
- if (condition) \
- break; \
- __wait_event_lock_irq(wq, condition, lock, cmd); \
-} while (0)
-
-/**
- * wait_event_lock_irq - sleep until a condition gets true. The
- * condition is checked under the lock. This
- * is expected to be called with the lock
- * taken.
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @lock: a locked spinlock_t, which will be released before schedule()
- * and reacquired afterwards.
- *
- * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
- * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * This is supposed to be called while holding the lock. The lock is
- * dropped before going to sleep and is reacquired afterwards.
- */
-#define wait_event_lock_irq(wq, condition, lock) \
-do { \
- if (condition) \
- break; \
- __wait_event_lock_irq(wq, condition, lock, ); \
-} while (0)
-
-
-#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
- spin_unlock_irq(&lock); \
- cmd; \
- schedule(); \
- spin_lock_irq(&lock))
-
-/**
- * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
- * The condition is checked under the lock. This is expected to
- * be called with the lock taken.
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @lock: a locked spinlock_t, which will be released before cmd and
- * schedule() and reacquired afterwards.
- * @cmd: a command which is invoked outside the critical section before
- * sleep
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or a signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * This is supposed to be called while holding the lock. The lock is
- * dropped before invoking the cmd and going to sleep and is reacquired
- * afterwards.
- *
- * The macro will return -ERESTARTSYS if it was interrupted by a signal
- * and 0 if @condition evaluated to true.
- */
-#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
-({ \
- int __ret = 0; \
- if (!(condition)) \
- __ret = __wait_event_interruptible_lock_irq(wq, \
- condition, lock, cmd); \
- __ret; \
-})
-
-/**
- * wait_event_interruptible_lock_irq - sleep until a condition gets true.
- * The condition is checked under the lock. This is expected
- * to be called with the lock taken.
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @lock: a locked spinlock_t, which will be released before schedule()
- * and reacquired afterwards.
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * This is supposed to be called while holding the lock. The lock is
- * dropped before going to sleep and is reacquired afterwards.
- *
- * The macro will return -ERESTARTSYS if it was interrupted by a signal
- * and 0 if @condition evaluated to true.
- */
-#define wait_event_interruptible_lock_irq(wq, condition, lock) \
-({ \
- int __ret = 0; \
- if (!(condition)) \
- __ret = __wait_event_interruptible_lock_irq(wq, \
- condition, lock,); \
- __ret; \
-})
-
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \
- lock, timeout) \
- ___wait_event(wq, ___wait_cond_timeout(condition), \
- TASK_INTERRUPTIBLE, 0, timeout, \
- spin_unlock_irq(&lock); \
- __ret = schedule_timeout(__ret); \
- spin_lock_irq(&lock));
-
-/**
- * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
- * true or a timeout elapses. The condition is checked under
- * the lock. This is expected to be called with the lock taken.
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- * @lock: a locked spinlock_t, which will be released before schedule()
- * and reacquired afterwards.
- * @timeout: timeout, in jiffies
- *
- * The process is put to sleep (TASK_INTERRUPTIBLE) until the
- * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- *
- * This is supposed to be called while holding the lock. The lock is
- * dropped before going to sleep and is reacquired afterwards.
- *
- * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
- * was interrupted by a signal, and the remaining jiffies otherwise
- * if the condition evaluated to true before the timeout elapsed.
- */
-#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \
- timeout) \
-({ \
- long __ret = timeout; \
- if (!___wait_cond_timeout(condition)) \
- __ret = __wait_event_interruptible_lock_irq_timeout( \
- wq, condition, lock, timeout); \
- __ret; \
-})
-
-/*
- * Waitqueues which are removed from the waitqueue_head at wakeup time
- */
-void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-
-#define DEFINE_WAIT_FUNC(name, function) \
- wait_queue_t name = { \
- .private = current, \
- .func = function, \
- .task_list = LIST_HEAD_INIT((name).task_list), \
- }
-
-#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
-
-#define DEFINE_WAIT_BIT(name, word, bit) \
- struct wait_bit_queue name = { \
- .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
- .wait = { \
- .private = current, \
- .func = wake_bit_function, \
- .task_list = \
- LIST_HEAD_INIT((name).wait.task_list), \
- }, \
- }
-
-#define init_wait(wait) \
- do { \
- (wait)->private = current; \
- (wait)->func = autoremove_wake_function; \
- INIT_LIST_HEAD(&(wait)->task_list); \
- (wait)->flags = 0; \
- } while (0)
-
-
-extern int bit_wait(struct wait_bit_key *, int);
-extern int bit_wait_io(struct wait_bit_key *, int);
-extern int bit_wait_timeout(struct wait_bit_key *, int);
-extern int bit_wait_io_timeout(struct wait_bit_key *, int);
+void wake_up_bit(void *, int);
+void __wait_on_bit(void *, int, unsigned);
+void __wait_on_bit_lock(void *, int, unsigned);
-/**
- * wait_on_bit - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit.
- * For instance, if one were to have waiters on a bitflag, one would
- * call wait_on_bit() in threads waiting for the bit to clear.
- * One uses wait_on_bit() where one is waiting for the bit to clear,
- * but has no intention of setting it.
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
static inline int
wait_on_bit(unsigned long *word, int bit, unsigned mode)
{
- might_sleep();
if (!test_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit(word, bit,
- bit_wait,
- mode);
+ __wait_on_bit(word, bit, mode);
+ return 0;
}
-/**
- * wait_on_bit_io - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), but calls
- * io_schedule() instead of schedule() for the actual waiting.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
-{
- might_sleep();
- if (!test_bit(bit, word))
- return 0;
- return out_of_line_wait_on_bit(word, bit,
- bit_wait_io,
- mode);
-}
-
-/**
- * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- * @timeout: timeout, in jiffies
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), except also takes a
- * timeout parameter.
- *
- * Returned value will be zero if the bit was cleared before the
- * @timeout elapsed, or non-zero if the @timeout elapsed or process
- * received a signal and the mode permitted wakeup on that signal.
- */
-static inline int
-wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
- unsigned long timeout)
-{
- might_sleep();
- if (!test_bit(bit, word))
- return 0;
- return out_of_line_wait_on_bit_timeout(word, bit,
- bit_wait_timeout,
- mode, timeout);
-}
-
-/**
- * wait_on_bit_action - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared, and allow the waiting action to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
- unsigned mode)
-{
- might_sleep();
- if (!test_bit(bit, word))
- return 0;
- return out_of_line_wait_on_bit(word, bit, action, mode);
-}
-
-/**
- * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit
- * when one intends to set it, for instance, trying to lock bitflags.
- * For instance, if one were to have waiters trying to set bitflag
- * and waiting for it to clear before setting it, one would call
- * wait_on_bit() in threads waiting to be able to set the bit.
- * One uses wait_on_bit_lock() where one is waiting for the bit to
- * clear with the intention of setting it, and when done, clearing it.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set. Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
static inline int
wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
{
- might_sleep();
if (!test_and_set_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
+ __wait_on_bit_lock(word, bit, mode);
+ return 0;
}
-/**
- * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to atomically set it. This is similar
- * to wait_on_bit(), but calls io_schedule() instead of schedule()
- * for the actual waiting.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set. Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
-{
- might_sleep();
- if (!test_and_set_bit(bit, word))
- return 0;
- return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
-}
-
-/**
- * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to set it, and allow the waiting action
- * to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set. Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
- unsigned mode)
-{
- might_sleep();
- if (!test_and_set_bit(bit, word))
- return 0;
- return out_of_line_wait_on_bit_lock(word, bit, action, mode);
-}
-
-/**
- * wait_on_atomic_t - Wait for an atomic_t to become 0
- * @val: The atomic value being waited on, a kernel virtual address
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for
- * the purpose of getting a waitqueue, but we set the key to a bit number
- * outside of the target 'word'.
- */
-static inline
-int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
-{
- might_sleep();
- if (atomic_read(val) == 0)
- return 0;
- return out_of_line_wait_on_atomic_t(val, action, mode);
-}
+#define wait_on_bit_io(w, b, m) wait_on_bit(w, b, m)
+#define wait_on_bit_lock_io(w, b, m) wait_on_bit_lock(w, b, m)
#endif /* _LINUX_WAIT_H */
+++ /dev/null
-/* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-1998 Jean-loup Gailly.
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#ifndef _ZCONF_H
-#define _ZCONF_H
-
-/* The memory requirements for deflate are (in bytes):
- (1 << (windowBits+2)) + (1 << (memLevel+9))
- that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
- plus a few kilobytes for small objects. For example, if you want to reduce
- the default memory requirements from 256K to 128K, compile with
- make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
- Of course this will generally degrade compression (there's no free lunch).
-
- The memory requirements for inflate are (in bytes) 1 << windowBits
- that is, 32K for windowBits=15 (default value) plus a few kilobytes
- for small objects.
-*/
-
-/* Maximum value for memLevel in deflateInit2 */
-#ifndef MAX_MEM_LEVEL
-# define MAX_MEM_LEVEL 8
-#endif
-
-/* Maximum value for windowBits in deflateInit2 and inflateInit2.
- * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
- * created by gzip. (Files created by minigzip can still be extracted by
- * gzip.)
- */
-#ifndef MAX_WBITS
-# define MAX_WBITS 15 /* 32K LZ77 window */
-#endif
-
-/* default windowBits for decompression. MAX_WBITS is for compression only */
-#ifndef DEF_WBITS
-# define DEF_WBITS MAX_WBITS
-#endif
-
-/* default memLevel */
-#if MAX_MEM_LEVEL >= 8
-# define DEF_MEM_LEVEL 8
-#else
-# define DEF_MEM_LEVEL MAX_MEM_LEVEL
-#endif
-
- /* Type declarations */
-
-typedef unsigned char Byte; /* 8 bits */
-typedef unsigned int uInt; /* 16 bits or more */
-typedef unsigned long uLong; /* 32 bits or more */
-typedef void *voidp;
-
-#endif /* _ZCONF_H */
-/* zlib.h -- interface of the 'zlib' general purpose compression library
-
- Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
-
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any damages
- arising from the use of this software.
-
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it
- freely, subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you must not
- claim that you wrote the original software. If you use this software
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
-
- Jean-loup Gailly Mark Adler
- jloup@gzip.org madler@alumni.caltech.edu
-
-
- The data format used by the zlib library is described by RFCs (Request for
- Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
- (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
-*/
-
#ifndef _ZLIB_H
#define _ZLIB_H
-#include <linux/zconf.h>
-
-/* zlib deflate based on ZLIB_VERSION "1.1.3" */
-/* zlib inflate based on ZLIB_VERSION "1.2.3" */
-
-/*
- This is a modified version of zlib for use inside the Linux kernel.
- The main changes are to perform all memory allocation in advance.
-
- Inflation Changes:
- * Z_PACKET_FLUSH is added and used by ppp_deflate. Before returning
- this checks there is no more input data available and the next data
- is a STORED block. It also resets the mode to be read for the next
- data, all as per PPP requirements.
- * Addition of zlib_inflateIncomp which copies incompressible data into
- the history window and adjusts the accoutning without calling
- zlib_inflate itself to inflate the data.
-*/
-
-/*
- The 'zlib' compression library provides in-memory compression and
- decompression functions, including integrity checks of the uncompressed
- data. This version of the library supports only one compression method
- (deflation) but other algorithms will be added later and will have the same
- stream interface.
-
- Compression can be done in a single step if the buffers are large
- enough (for example if an input file is mmap'ed), or can be done by
- repeated calls of the compression function. In the latter case, the
- application must provide more input and/or consume the output
- (providing more output space) before each call.
-
- The compressed data format used by default by the in-memory functions is
- the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
- around a deflate stream, which is itself documented in RFC 1951.
-
- The library also supports reading and writing files in gzip (.gz) format
- with an interface similar to that of stdio.
-
- The zlib format was designed to be compact and fast for use in memory
- and on communications channels. The gzip format was designed for single-
- file compression on file systems, has a larger header than zlib to maintain
- directory information, and uses a different, slower check method than zlib.
-
- The library does not install any signal handler. The decoder checks
- the consistency of the compressed data, so the library should never
- crash even in case of corrupted input.
-*/
-
-struct internal_state;
-
-typedef struct z_stream_s {
- const Byte *next_in; /* next input byte */
- uLong avail_in; /* number of bytes available at next_in */
- uLong total_in; /* total nb of input bytes read so far */
-
- Byte *next_out; /* next output byte should be put there */
- uLong avail_out; /* remaining free space at next_out */
- uLong total_out; /* total nb of bytes output so far */
-
- char *msg; /* last error message, NULL if no error */
- struct internal_state *state; /* not visible by applications */
-
- void *workspace; /* memory allocated for this stream */
-
- int data_type; /* best guess about the data type: ascii or binary */
- uLong adler; /* adler32 value of the uncompressed data */
- uLong reserved; /* reserved for future use */
-} z_stream;
-
-typedef z_stream *z_streamp;
-
-/*
- The application must update next_in and avail_in when avail_in has
- dropped to zero. It must update next_out and avail_out when avail_out
- has dropped to zero. The application must initialize zalloc, zfree and
- opaque before calling the init function. All other fields are set by the
- compression library and must not be updated by the application.
-
- The opaque value provided by the application will be passed as the first
- parameter for calls of zalloc and zfree. This can be useful for custom
- memory management. The compression library attaches no meaning to the
- opaque value.
-
- zalloc must return NULL if there is not enough memory for the object.
- If zlib is used in a multi-threaded application, zalloc and zfree must be
- thread safe.
-
- On 16-bit systems, the functions zalloc and zfree must be able to allocate
- exactly 65536 bytes, but will not be required to allocate more than this
- if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
- pointers returned by zalloc for objects of exactly 65536 bytes *must*
- have their offset normalized to zero. The default allocation function
- provided by this library ensures this (see zutil.c). To reduce memory
- requirements and avoid any allocation of 64K objects, at the expense of
- compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
-
- The fields total_in and total_out can be used for statistics or
- progress reports. After compression, total_in holds the total size of
- the uncompressed data and may be saved for use in the decompressor
- (particularly if the decompressor wants to decompress everything in
- a single step).
-*/
-
- /* constants */
-
-#define Z_NO_FLUSH 0
-#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
-#define Z_PACKET_FLUSH 2
-#define Z_SYNC_FLUSH 3
-#define Z_FULL_FLUSH 4
-#define Z_FINISH 5
-#define Z_BLOCK 6 /* Only for inflate at present */
-/* Allowed flush values; see deflate() and inflate() below for details */
-
-#define Z_OK 0
-#define Z_STREAM_END 1
-#define Z_NEED_DICT 2
-#define Z_ERRNO (-1)
-#define Z_STREAM_ERROR (-2)
-#define Z_DATA_ERROR (-3)
-#define Z_MEM_ERROR (-4)
-#define Z_BUF_ERROR (-5)
-#define Z_VERSION_ERROR (-6)
-/* Return codes for the compression/decompression functions. Negative
- * values are errors, positive values are used for special but normal events.
- */
-
-#define Z_NO_COMPRESSION 0
-#define Z_BEST_SPEED 1
-#define Z_BEST_COMPRESSION 9
-#define Z_DEFAULT_COMPRESSION (-1)
-/* compression levels */
-
-#define Z_FILTERED 1
-#define Z_HUFFMAN_ONLY 2
-#define Z_DEFAULT_STRATEGY 0
-/* compression strategy; see deflateInit2() below for details */
-
-#define Z_BINARY 0
-#define Z_ASCII 1
-#define Z_UNKNOWN 2
-/* Possible values of the data_type field */
-
-#define Z_DEFLATED 8
-/* The deflate compression method (the only one supported in this version) */
-
- /* basic functions */
-
-extern int zlib_deflate_workspacesize (int windowBits, int memLevel);
-/*
- Returns the number of bytes that needs to be allocated for a per-
- stream workspace with the specified parameters. A pointer to this
- number of bytes should be returned in stream->workspace before
- you call zlib_deflateInit() or zlib_deflateInit2(). If you call
- zlib_deflateInit(), specify windowBits = MAX_WBITS and memLevel =
- MAX_MEM_LEVEL here. If you call zlib_deflateInit2(), the windowBits
- and memLevel parameters passed to zlib_deflateInit2() must not
- exceed those passed here.
-*/
-
-/*
-extern int deflateInit (z_streamp strm, int level);
-
- Initializes the internal stream state for compression. The fields
- zalloc, zfree and opaque must be initialized before by the caller.
- If zalloc and zfree are set to NULL, deflateInit updates them to
- use default allocation functions.
-
- The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
- 1 gives best speed, 9 gives best compression, 0 gives no compression at
- all (the input data is simply copied a block at a time).
- Z_DEFAULT_COMPRESSION requests a default compromise between speed and
- compression (currently equivalent to level 6).
-
- deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_STREAM_ERROR if level is not a valid compression level,
- Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
- with the version assumed by the caller (ZLIB_VERSION).
- msg is set to null if there is no error message. deflateInit does not
- perform any compression: this will be done by deflate().
-*/
-
-
-extern int zlib_deflate (z_streamp strm, int flush);
-/*
- deflate compresses as much data as possible, and stops when the input
- buffer becomes empty or the output buffer becomes full. It may introduce some
- output latency (reading input without producing any output) except when
- forced to flush.
-
- The detailed semantics are as follows. deflate performs one or both of the
- following actions:
-
- - Compress more input starting at next_in and update next_in and avail_in
- accordingly. If not all input can be processed (because there is not
- enough room in the output buffer), next_in and avail_in are updated and
- processing will resume at this point for the next call of deflate().
-
- - Provide more output starting at next_out and update next_out and avail_out
- accordingly. This action is forced if the parameter flush is non zero.
- Forcing flush frequently degrades the compression ratio, so this parameter
- should be set only when necessary (in interactive applications).
- Some output may be provided even if flush is not set.
-
- Before the call of deflate(), the application should ensure that at least
- one of the actions is possible, by providing more input and/or consuming
- more output, and updating avail_in or avail_out accordingly; avail_out
- should never be zero before the call. The application can consume the
- compressed output when it wants, for example when the output buffer is full
- (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
- and with zero avail_out, it must be called again after making room in the
- output buffer because there might be more output pending.
-
- If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
- flushed to the output buffer and the output is aligned on a byte boundary, so
- that the decompressor can get all input data available so far. (In particular
- avail_in is zero after the call if enough output space has been provided
- before the call.) Flushing may degrade compression for some compression
- algorithms and so it should be used only when necessary.
-
- If flush is set to Z_FULL_FLUSH, all output is flushed as with
- Z_SYNC_FLUSH, and the compression state is reset so that decompression can
- restart from this point if previous compressed data has been damaged or if
- random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
- the compression.
-
- If deflate returns with avail_out == 0, this function must be called again
- with the same value of the flush parameter and more output space (updated
- avail_out), until the flush is complete (deflate returns with non-zero
- avail_out).
-
- If the parameter flush is set to Z_FINISH, pending input is processed,
- pending output is flushed and deflate returns with Z_STREAM_END if there
- was enough output space; if deflate returns with Z_OK, this function must be
- called again with Z_FINISH and more output space (updated avail_out) but no
- more input data, until it returns with Z_STREAM_END or an error. After
- deflate has returned Z_STREAM_END, the only possible operations on the
- stream are deflateReset or deflateEnd.
-
- Z_FINISH can be used immediately after deflateInit if all the compression
- is to be done in a single step. In this case, avail_out must be at least
- 0.1% larger than avail_in plus 12 bytes. If deflate does not return
- Z_STREAM_END, then it must be called again as described above.
-
- deflate() sets strm->adler to the adler32 checksum of all input read
- so far (that is, total_in bytes).
-
- deflate() may update data_type if it can make a good guess about
- the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered
- binary. This field is only for information purposes and does not affect
- the compression algorithm in any manner.
-
- deflate() returns Z_OK if some progress has been made (more input
- processed or more output produced), Z_STREAM_END if all input has been
- consumed and all output has been produced (only when flush is set to
- Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
- if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
- (for example avail_in or avail_out was zero).
-*/
-
-
-extern int zlib_deflateEnd (z_streamp strm);
-/*
- All dynamically allocated data structures for this stream are freed.
- This function discards any unprocessed input and does not flush any
- pending output.
-
- deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
- stream state was inconsistent, Z_DATA_ERROR if the stream was freed
- prematurely (some input or output was discarded). In the error case,
- msg may be set but then points to a static string (which must not be
- deallocated).
-*/
-
-
-extern int zlib_inflate_workspacesize (void);
-/*
- Returns the number of bytes that needs to be allocated for a per-
- stream workspace. A pointer to this number of bytes should be
- returned in stream->workspace before calling zlib_inflateInit().
-*/
-
-/*
-extern int zlib_inflateInit (z_streamp strm);
-
- Initializes the internal stream state for decompression. The fields
- next_in, avail_in, and workspace must be initialized before by
- the caller. If next_in is not NULL and avail_in is large enough (the exact
- value depends on the compression method), inflateInit determines the
- compression method from the zlib header and allocates all data structures
- accordingly; otherwise the allocation will be deferred to the first call of
- inflate. If zalloc and zfree are set to NULL, inflateInit updates them to
- use default allocation functions.
-
- inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
- memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
- version assumed by the caller. msg is set to null if there is no error
- message. inflateInit does not perform any decompression apart from reading
- the zlib header if present: this will be done by inflate(). (So next_in and
- avail_in may be modified, but next_out and avail_out are unchanged.)
-*/
-
-
-extern int zlib_inflate (z_streamp strm, int flush);
-/*
- inflate decompresses as much data as possible, and stops when the input
- buffer becomes empty or the output buffer becomes full. It may introduce
- some output latency (reading input without producing any output) except when
- forced to flush.
-
- The detailed semantics are as follows. inflate performs one or both of the
- following actions:
-
- - Decompress more input starting at next_in and update next_in and avail_in
- accordingly. If not all input can be processed (because there is not
- enough room in the output buffer), next_in is updated and processing
- will resume at this point for the next call of inflate().
-
- - Provide more output starting at next_out and update next_out and avail_out
- accordingly. inflate() provides as much output as possible, until there
- is no more input data or no more space in the output buffer (see below
- about the flush parameter).
-
- Before the call of inflate(), the application should ensure that at least
- one of the actions is possible, by providing more input and/or consuming
- more output, and updating the next_* and avail_* values accordingly.
- The application can consume the uncompressed output when it wants, for
- example when the output buffer is full (avail_out == 0), or after each
- call of inflate(). If inflate returns Z_OK and with zero avail_out, it
- must be called again after making room in the output buffer because there
- might be more output pending.
-
- The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH,
- Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much
- output as possible to the output buffer. Z_BLOCK requests that inflate() stop
- if and when it gets to the next deflate block boundary. When decoding the
- zlib or gzip format, this will cause inflate() to return immediately after
- the header and before the first block. When doing a raw inflate, inflate()
- will go ahead and process the first block, and will return when it gets to
- the end of that block, or when it runs out of data.
-
- The Z_BLOCK option assists in appending to or combining deflate streams.
- Also to assist in this, on return inflate() will set strm->data_type to the
- number of unused bits in the last byte taken from strm->next_in, plus 64
- if inflate() is currently decoding the last block in the deflate stream,
- plus 128 if inflate() returned immediately after decoding an end-of-block
- code or decoding the complete header up to just before the first byte of the
- deflate stream. The end-of-block will not be indicated until all of the
- uncompressed data from that block has been written to strm->next_out. The
- number of unused bits may in general be greater than seven, except when
- bit 7 of data_type is set, in which case the number of unused bits will be
- less than eight.
-
- inflate() should normally be called until it returns Z_STREAM_END or an
- error. However if all decompression is to be performed in a single step
- (a single call of inflate), the parameter flush should be set to
- Z_FINISH. In this case all pending input is processed and all pending
- output is flushed; avail_out must be large enough to hold all the
- uncompressed data. (The size of the uncompressed data may have been saved
- by the compressor for this purpose.) The next operation on this stream must
- be inflateEnd to deallocate the decompression state. The use of Z_FINISH
- is never required, but can be used to inform inflate that a faster approach
- may be used for the single inflate() call.
-
- In this implementation, inflate() always flushes as much output as
- possible to the output buffer, and always uses the faster approach on the
- first call. So the only effect of the flush parameter in this implementation
- is on the return value of inflate(), as noted below, or when it returns early
- because Z_BLOCK is used.
-
- If a preset dictionary is needed after this call (see inflateSetDictionary
- below), inflate sets strm->adler to the adler32 checksum of the dictionary
- chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
- strm->adler to the adler32 checksum of all output produced so far (that is,
- total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
- below. At the end of the stream, inflate() checks that its computed adler32
- checksum is equal to that saved by the compressor and returns Z_STREAM_END
- only if the checksum is correct.
-
- inflate() will decompress and check either zlib-wrapped or gzip-wrapped
- deflate data. The header type is detected automatically. Any information
- contained in the gzip header is not retained, so applications that need that
- information should instead use raw inflate, see inflateInit2() below, or
- inflateBack() and perform their own processing of the gzip header and
- trailer.
-
- inflate() returns Z_OK if some progress has been made (more input processed
- or more output produced), Z_STREAM_END if the end of the compressed data has
- been reached and all uncompressed output has been produced, Z_NEED_DICT if a
- preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
- corrupted (input stream not conforming to the zlib format or incorrect check
- value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
- if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory,
- Z_BUF_ERROR if no progress is possible or if there was not enough room in the
- output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
- inflate() can be called again with more input and more output space to
- continue decompressing. If Z_DATA_ERROR is returned, the application may then
- call inflateSync() to look for a good compression block if a partial recovery
- of the data is desired.
-*/
-
-
-extern int zlib_inflateEnd (z_streamp strm);
-/*
- All dynamically allocated data structures for this stream are freed.
- This function discards any unprocessed input and does not flush any
- pending output.
-
- inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
- was inconsistent. In the error case, msg may be set but then points to a
- static string (which must not be deallocated).
-*/
-
- /* Advanced functions */
-
-/*
- The following functions are needed only in some special applications.
-*/
-
-/*
-extern int deflateInit2 (z_streamp strm,
- int level,
- int method,
- int windowBits,
- int memLevel,
- int strategy);
-
- This is another version of deflateInit with more compression options. The
- fields next_in, zalloc, zfree and opaque must be initialized before by
- the caller.
-
- The method parameter is the compression method. It must be Z_DEFLATED in
- this version of the library.
-
- The windowBits parameter is the base two logarithm of the window size
- (the size of the history buffer). It should be in the range 8..15 for this
- version of the library. Larger values of this parameter result in better
- compression at the expense of memory usage. The default value is 15 if
- deflateInit is used instead.
-
- The memLevel parameter specifies how much memory should be allocated
- for the internal compression state. memLevel=1 uses minimum memory but
- is slow and reduces compression ratio; memLevel=9 uses maximum memory
- for optimal speed. The default value is 8. See zconf.h for total memory
- usage as a function of windowBits and memLevel.
-
- The strategy parameter is used to tune the compression algorithm. Use the
- value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
- filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no
- string match). Filtered data consists mostly of small values with a
- somewhat random distribution. In this case, the compression algorithm is
- tuned to compress them better. The effect of Z_FILTERED is to force more
- Huffman coding and less string matching; it is somewhat intermediate
- between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects
- the compression ratio but not the correctness of the compressed output even
- if it is not set appropriately.
-
- deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
- memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
- method). msg is set to null if there is no error message. deflateInit2 does
- not perform any compression: this will be done by deflate().
-*/
-
-extern int zlib_deflateReset (z_streamp strm);
-/*
- This function is equivalent to deflateEnd followed by deflateInit,
- but does not free and reallocate all the internal compression state.
- The stream will keep the same compression level and any other attributes
- that may have been set by deflateInit2.
-
- deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent (such as zalloc or state being NULL).
-*/
-
-static inline unsigned long deflateBound(unsigned long s)
-{
- return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
-}
-
-/*
-extern int inflateInit2 (z_streamp strm, int windowBits);
-
- This is another version of inflateInit with an extra parameter. The
- fields next_in, avail_in, zalloc, zfree and opaque must be initialized
- before by the caller.
-
- The windowBits parameter is the base two logarithm of the maximum window
- size (the size of the history buffer). It should be in the range 8..15 for
- this version of the library. The default value is 15 if inflateInit is used
- instead. windowBits must be greater than or equal to the windowBits value
- provided to deflateInit2() while compressing, or it must be equal to 15 if
- deflateInit2() was not used. If a compressed stream with a larger window
- size is given as input, inflate() will return with the error code
- Z_DATA_ERROR instead of trying to allocate a larger window.
-
- windowBits can also be -8..-15 for raw inflate. In this case, -windowBits
- determines the window size. inflate() will then process raw deflate data,
- not looking for a zlib or gzip header, not generating a check value, and not
- looking for any check values for comparison at the end of the stream. This
- is for use with other formats that use the deflate compressed data format
- such as zip. Those formats provide their own check values. If a custom
- format is developed using the raw deflate format for compressed data, it is
- recommended that a check value such as an adler32 or a crc32 be applied to
- the uncompressed data as is done in the zlib, gzip, and zip formats. For
- most applications, the zlib format should be used as is. Note that comments
- above on the use in deflateInit2() applies to the magnitude of windowBits.
-
- windowBits can also be greater than 15 for optional gzip decoding. Add
- 32 to windowBits to enable zlib and gzip decoding with automatic header
- detection, or add 16 to decode only the gzip format (the zlib format will
- return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is
- a crc32 instead of an adler32.
-
- inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
- memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg
- is set to null if there is no error message. inflateInit2 does not perform
- any decompression apart from reading the zlib header if present: this will
- be done by inflate(). (So next_in and avail_in may be modified, but next_out
- and avail_out are unchanged.)
-*/
-
-extern int zlib_inflateReset (z_streamp strm);
-/*
- This function is equivalent to inflateEnd followed by inflateInit,
- but does not free and reallocate all the internal decompression state.
- The stream will keep attributes that may have been set by inflateInit2.
-
- inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent (such as zalloc or state being NULL).
-*/
-
-extern int zlib_inflateIncomp (z_stream *strm);
-/*
- This function adds the data at next_in (avail_in bytes) to the output
- history without performing any output. There must be no pending output,
- and the decompressor must be expecting to see the start of a block.
- Calling this function is equivalent to decompressing a stored block
- containing the data at next_in (except that the data is not output).
-*/
+#include <zlib.h>
-#define zlib_deflateInit(strm, level) \
- zlib_deflateInit2((strm), (level), Z_DEFLATED, MAX_WBITS, \
- DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY)
-#define zlib_inflateInit(strm) \
- zlib_inflateInit2((strm), DEF_WBITS)
+#define zlib_inflate_workspacesize() 0
+#define zlib_deflate_workspacesize(windowBits, memLevel) 0
-extern int zlib_deflateInit2(z_streamp strm, int level, int method,
- int windowBits, int memLevel,
- int strategy);
-extern int zlib_inflateInit2(z_streamp strm, int windowBits);
+#define zlib_inflateInit2 inflateInit2
+#define zlib_inflate inflate
-#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL)
- struct internal_state {int dummy;}; /* hack for buggy compilers */
-#endif
+#define zlib_deflateInit2 deflateInit2
+#define zlib_deflate deflate
+#define zlib_deflateEnd deflateEnd
-/* Utility function: initialize zlib, unpack binary blob, clean up zlib,
- * return len or negative error code. */
-extern int zlib_inflate_blob(void *dst, unsigned dst_sz, const void *src, unsigned src_sz);
+#define DEF_MEM_LEVEL 8
#endif /* _ZLIB_H */
/* Device state changes */
-DEFINE_EVENT(cache_set, bcache_cache_set_read_only,
+DEFINE_EVENT(cache_set, fs_read_only,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
-DEFINE_EVENT(cache_set, bcache_cache_set_read_only_done,
+DEFINE_EVENT(cache_set, fs_read_only_done,
TP_PROTO(struct cache_set *c),
TP_ARGS(c)
);
TP_ARGS(ca, reserve)
);
-DECLARE_EVENT_CLASS(cache_set_bucket_alloc,
+TRACE_EVENT(bcache_freelist_empty_fail,
TP_PROTO(struct cache_set *c, enum alloc_reserve reserve,
struct closure *cl),
TP_ARGS(c, reserve, cl),
__entry->cl)
);
-DEFINE_EVENT(cache_set_bucket_alloc, bcache_freelist_empty_fail,
- TP_PROTO(struct cache_set *c, enum alloc_reserve reserve,
- struct closure *cl),
- TP_ARGS(c, reserve, cl)
-);
-
DECLARE_EVENT_CLASS(open_bucket_alloc,
TP_PROTO(struct cache_set *c, struct closure *cl),
TP_ARGS(c, cl),
last_mount ? ctime(&last_mount) : "(never)",
BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR
- ? bch_cache_state[BCH_MEMBER_STATE(m)]
+ ? bch_dev_state[BCH_MEMBER_STATE(m)]
: "unknown",
BCH_MEMBER_TIER(m),
/* Allocation groups: */
-void bch_cache_group_remove_cache(struct cache_group *grp, struct cache *ca)
+void bch_dev_group_remove(struct cache_group *grp, struct cache *ca)
{
unsigned i;
spin_unlock(&grp->lock);
}
-void bch_cache_group_add_cache(struct cache_group *grp, struct cache *ca)
+void bch_dev_group_add(struct cache_group *grp, struct cache *ca)
{
unsigned i;
bucket_bytes(ca) - sizeof(p->csum));
ret = prio_io(ca, r, REQ_OP_WRITE);
- if (cache_fatal_io_err_on(ret, ca,
+ if (bch_dev_fatal_io_err_on(ret, ca,
"prio write to bucket %zu", r) ||
bch_meta_write_fault("prio"))
return ret;
bucket_nr++;
ret = prio_io(ca, bucket, REQ_OP_READ);
- if (cache_fatal_io_err_on(ret, ca,
+ if (bch_dev_fatal_io_err_on(ret, ca,
"prior read from bucket %llu",
bucket) ||
bch_meta_read_fault("prio"))
}
/* device goes ro: */
-void bch_cache_allocator_stop(struct cache *ca)
+void bch_dev_allocator_stop(struct cache *ca)
{
struct cache_set *c = ca->set;
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
/* First, remove device from allocation groups: */
- bch_cache_group_remove_cache(tier, ca);
- bch_cache_group_remove_cache(&c->cache_all, ca);
+ bch_dev_group_remove(tier, ca);
+ bch_dev_group_remove(&c->cache_all, ca);
bch_recalc_capacity(c);
/*
* Startup the allocator thread for transition to RW mode:
*/
-int bch_cache_allocator_start(struct cache *ca)
+int bch_dev_allocator_start(struct cache *ca)
{
struct cache_set *c = ca->set;
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
get_task_struct(k);
ca->alloc_thread = k;
- bch_cache_group_add_cache(tier, ca);
- bch_cache_group_add_cache(&c->cache_all, ca);
+ bch_dev_group_add(tier, ca);
+ bch_dev_group_add(&c->cache_all, ca);
bch_recalc_capacity(c);
return DIV_ROUND_UP((size_t) (ca)->mi.nbuckets, prios_per_bucket(ca));
}
-void bch_cache_group_remove_cache(struct cache_group *, struct cache *);
-void bch_cache_group_add_cache(struct cache_group *, struct cache *);
+void bch_dev_group_remove(struct cache_group *, struct cache *);
+void bch_dev_group_add(struct cache_group *, struct cache *);
int bch_prio_read(struct cache *);
((_ca) = __open_bucket_next_online_device(_c, _ob, _ptr, _ca));\
(_ptr)++)
-void bch_cache_allocator_stop(struct cache *);
-int bch_cache_allocator_start(struct cache *);
+void bch_dev_allocator_stop(struct cache *);
+int bch_dev_allocator_start(struct cache *);
void bch_open_buckets_init(struct cache_set *);
#endif /* _BCACHE_ALLOC_H */
#include <linux/dynamic_fault.h>
-#define cache_set_init_fault(name) \
- dynamic_fault("bcache:cache_set_init:" name)
+#define bch_fs_init_fault(name) \
+ dynamic_fault("bcache:bch_fs_init:" name)
#define bch_meta_read_fault(name) \
dynamic_fault("bcache:meta:read:" name)
#define bch_meta_write_fault(name) \
/* cache->flags: */
enum {
- CACHE_DEV_REMOVING,
- CACHE_DEV_FORCE_REMOVE,
+ BCH_DEV_REMOVING,
+ BCH_DEV_FORCE_REMOVE,
};
struct cache {
u8 dev_idx;
/*
* Cached version of this device's member info from superblock
- * Committed by bch_write_super() -> bch_cache_set_mi_update()
+ * Committed by bch_write_super() -> bch_fs_mi_update()
*/
struct cache_member_cpu mi;
uuid_le uuid;
* Flag bits for what phase of startup/shutdown the cache set is at, how we're
* shutting down, etc.:
*
- * CACHE_SET_UNREGISTERING means we're not just shutting down, we're detaching
+ * BCH_FS_UNREGISTERING means we're not just shutting down, we're detaching
* all the backing devices first (their cached data gets invalidated, and they
* won't automatically reattach).
*
- * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
- * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
+ * BCH_FS_STOPPING always gets set first when we're closing down a cache set;
+ * we'll continue to run normally for awhile with BCH_FS_STOPPING set (i.e.
* flushing dirty data).
*
- * CACHE_SET_RUNNING means all cache devices have been registered and journal
+ * BCH_FS_RUNNING means all cache devices have been registered and journal
* replay is complete.
*/
enum {
/* Startup: */
- CACHE_SET_INITIAL_GC_DONE,
- CACHE_SET_RUNNING,
+ BCH_FS_INITIAL_GC_DONE,
+ BCH_FS_RUNNING,
/* Shutdown: */
- CACHE_SET_UNREGISTERING,
- CACHE_SET_STOPPING,
- CACHE_SET_RO,
- CACHE_SET_RO_COMPLETE,
- CACHE_SET_EMERGENCY_RO,
- CACHE_SET_WRITE_DISABLE_COMPLETE,
- CACHE_SET_GC_STOPPING,
- CACHE_SET_GC_FAILURE,
- CACHE_SET_BDEV_MOUNTED,
- CACHE_SET_ERROR,
- CACHE_SET_FSCK_FIXED_ERRORS,
+ BCH_FS_DETACHING,
+ BCH_FS_STOPPING,
+ BCH_FS_RO,
+ BCH_FS_RO_COMPLETE,
+ BCH_FS_EMERGENCY_RO,
+ BCH_FS_WRITE_DISABLE_COMPLETE,
+ BCH_FS_GC_STOPPING,
+ BCH_FS_GC_FAILURE,
+ BCH_FS_BDEV_MOUNTED,
+ BCH_FS_ERROR,
+ BCH_FS_FSCK_FIXED_ERRORS,
};
struct btree_debug {
struct cache __rcu *cache[BCH_SB_MEMBERS_MAX];
- struct cache_set_opts opts;
+ struct bch_opts opts;
/*
* Cached copy in native endianness:
- * Set by bch_cache_set_mi_update():
+ * Set by bch_fs_mi_update():
*/
struct cache_member_rcu __rcu *members;
char buf[160];
bch_bkey_val_to_text(c, type, buf, sizeof(buf), k);
- cache_set_bug(c, "invalid bkey %s: %s", buf, invalid);
+ bch_fs_bug(c, "invalid bkey %s: %s", buf, invalid);
return;
}
static int bch_blockdev_major;
static DEFINE_IDA(bch_blockdev_minor);
static LIST_HEAD(uncached_devices);
-struct kmem_cache *bch_search_cache;
+static struct kmem_cache *bch_search_cache;
static void write_bdev_super_endio(struct bio *bio)
{
struct cache_set *c, *tc;
struct cached_dev *dc, *t;
- list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+ list_for_each_entry_safe(c, tc, &bch_fs_list, list)
list_for_each_entry_safe(dc, t, &c->cached_devs, list)
if (dc->disk_sb.bdev == bdev)
return true;
return -EINVAL;
}
- if (!test_bit(CACHE_SET_RUNNING, &c->flags))
+ if (!test_bit(BCH_FS_RUNNING, &c->flags))
return 0;
- if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
+ if (test_bit(BCH_FS_STOPPING, &c->flags)) {
pr_err("Can't attach %s: shutting down", buf);
return -EINVAL;
}
bdevname(dc->disk_sb.bdev, name));
list_add(&dc->list, &uncached_devices);
- list_for_each_entry(c, &bch_cache_sets, list)
+ list_for_each_entry(c, &bch_fs_list, list)
bch_cached_dev_attach(dc, c);
if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE ||
struct bkey_s_c_inode_blockdev inode;
int ret = 0;
- if (test_bit(CACHE_SET_STOPPING, &c->flags))
+ if (test_bit(BCH_FS_STOPPING, &c->flags))
return -EINVAL;
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
d = radix_tree_deref_slot(slot);
if (CACHED_DEV(&d->inode.v) &&
- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+ test_bit(BCH_FS_DETACHING, &c->flags)) {
dc = container_of(d, struct cached_dev, disk);
bch_cached_dev_detach(dc);
} else {
mutex_unlock(&bch_register_lock);
}
+void bch_fs_blockdev_exit(struct cache_set *c)
+{
+ mempool_exit(&c->search);
+}
+
+int bch_fs_blockdev_init(struct cache_set *c)
+{
+ return mempool_init_slab_pool(&c->search, 1, bch_search_cache);
+}
+
void bch_blockdev_exit(void)
{
kmem_cache_destroy(bch_search_cache);
#include "blockdev_types.h"
#include "io_types.h"
+struct search {
+ /* Stack frame for bio_complete */
+ struct closure cl;
+
+ union {
+ struct bch_read_bio rbio;
+ struct bch_write_bio wbio;
+ };
+ /* Not modified */
+ struct bio *orig_bio;
+ struct bcache_device *d;
+
+ unsigned inode;
+ unsigned write:1;
+
+ /* Flags only used for reads */
+ unsigned recoverable:1;
+ unsigned read_dirty_data:1;
+ unsigned cache_miss:1;
+
+ /*
+ * For reads: bypass read from cache and insertion into cache
+ * For writes: discard key range from cache, sending the write to
+ * the backing device (if there is a backing device)
+ */
+ unsigned bypass:1;
+
+ unsigned long start_time;
+
+ /*
+ * Mostly only used for writes. For reads, we still make use of
+ * some trivial fields:
+ * - c
+ * - error
+ */
+ struct bch_write_op iop;
+};
+
+#ifndef NO_BCACHE_BLOCKDEV
+
+extern struct kobj_type bch_cached_dev_ktype;
+extern struct kobj_type bch_blockdev_volume_ktype;
+
void bch_write_bdev_super(struct cached_dev *, struct closure *);
void bch_cached_dev_release(struct kobject *);
void bch_blockdevs_stop(struct cache_set *);
+void bch_fs_blockdev_exit(struct cache_set *);
+int bch_fs_blockdev_init(struct cache_set *);
void bch_blockdev_exit(void);
int bch_blockdev_init(void);
+#else
+
+static inline void bch_write_bdev_super(struct cached_dev *dc,
+ struct closure *cl) {}
+
+static inline void bch_cached_dev_release(struct kobject *kobj) {}
+static inline void bch_blockdev_volume_release(struct kobject *kobj) {}
+
+static inline int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
+{
+ return 0;
+}
+static inline void bch_attach_backing_devs(struct cache_set *c) {}
+
+static inline void bch_cached_dev_detach(struct cached_dev *dc) {}
+static inline void bch_cached_dev_run(struct cached_dev *dc) {}
+static inline void bch_blockdev_stop(struct bcache_device *d) {}
+
+static inline bool bch_is_open_backing_dev(struct block_device *bdev)
+{
+ return false;
+}
+static inline const char *bch_backing_dev_register(struct bcache_superblock *sb)
+{
+ return "not implemented";
+}
+
+static inline int bch_blockdev_volume_create(struct cache_set *c, u64 s) { return 0; }
+static inline int bch_blockdev_volumes_start(struct cache_set *c) { return 0; }
+
+static inline void bch_blockdevs_stop(struct cache_set *c) {}
+static inline void bch_fs_blockdev_exit(struct cache_set *c) {}
+static inline int bch_fs_blockdev_init(struct cache_set *c) { return 0; }
+static inline void bch_blockdev_exit(void) {}
+static inline int bch_blockdev_init(void) { return 0; }
+
+#endif
+
static inline void cached_dev_put(struct cached_dev *dc)
{
if (atomic_dec_and_test(&dc->count))
return radix_tree_lookup(&c->devices, inode);
}
-struct search {
- /* Stack frame for bio_complete */
- struct closure cl;
-
- union {
- struct bch_read_bio rbio;
- struct bch_write_bio wbio;
- };
- /* Not modified */
- struct bio *orig_bio;
- struct bcache_device *d;
-
- unsigned inode;
- unsigned write:1;
-
- /* Flags only used for reads */
- unsigned recoverable:1;
- unsigned read_dirty_data:1;
- unsigned cache_miss:1;
-
- /*
- * For reads: bypass read from cache and insertion into cache
- * For writes: discard key range from cache, sending the write to
- * the backing device (if there is a backing device)
- */
- unsigned bypass:1;
-
- unsigned long start_time;
-
- /*
- * Mostly only used for writes. For reads, we still make use of
- * some trivial fields:
- * - c
- * - error
- */
- struct bch_write_op iop;
-};
-
-extern struct kmem_cache *bch_search_cache;
-
-extern struct kobj_type bch_cached_dev_ktype;
-extern struct kobj_type bch_blockdev_volume_ktype;
-
#endif /* _BCACHE_BLOCKDEV_H */
? btree_type_successor(b->btree_id, l->max)
: l->max;
- cache_set_inconsistent_on(bkey_cmp(b->data->min_key,
- expected_min), c,
+ bch_fs_inconsistent_on(bkey_cmp(b->data->min_key, expected_min), c,
"btree node has incorrect min key: %llu:%llu != %llu:%llu",
b->data->min_key.inode,
b->data->min_key.offset,
if (b->level > r->depth) {
l = &r->l[b->level - 1];
- cache_set_inconsistent_on(bkey_cmp(b->data->min_key,
- l->min), c,
+ bch_fs_inconsistent_on(bkey_cmp(b->data->min_key, l->min), c,
"btree node min doesn't match min of child nodes: %llu:%llu != %llu:%llu",
b->data->min_key.inode,
b->data->min_key.offset,
l->min.inode,
l->min.offset);
- cache_set_inconsistent_on(bkey_cmp(b->data->max_key,
- l->max), c,
+ bch_fs_inconsistent_on(bkey_cmp(b->data->max_key, l->max), c,
"btree node max doesn't match max of child nodes: %llu:%llu != %llu:%llu",
b->data->max_key.inode,
b->data->max_key.offset,
&stats);
/*
* Don't apply stats - pending deletes aren't tracked in
- * cache_set_stats:
+ * bch_alloc_stats:
*/
mutex_unlock(&c->btree_interior_update_lock);
* uses, GC could skip past them
*/
- if (test_bit(CACHE_SET_GC_FAILURE, &c->flags))
+ if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return;
trace_bcache_gc_start(c);
if (ret) {
bch_err(c, "btree gc failed: %d", ret);
- set_bit(CACHE_SET_GC_FAILURE, &c->flags);
+ set_bit(BCH_FS_GC_FAILURE, &c->flags);
up_write(&c->gc_lock);
return;
}
lock_seq[0] = merge[0]->lock.state.seq;
- if (test_bit(CACHE_SET_GC_STOPPING, &c->flags)) {
+ if (test_bit(BCH_FS_GC_STOPPING, &c->flags)) {
bch_btree_iter_unlock(&iter);
return -ESHUTDOWN;
}
if (btree_gc_coalesce_disabled(c))
return;
- if (test_bit(CACHE_SET_GC_FAILURE, &c->flags))
+ if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return;
down_read(&c->gc_lock);
if (ret) {
if (ret != -ESHUTDOWN)
bch_err(c, "btree coalescing failed: %d", ret);
- set_bit(CACHE_SET_GC_FAILURE, &c->flags);
+ set_bit(BCH_FS_GC_FAILURE, &c->flags);
return;
}
}
void bch_gc_thread_stop(struct cache_set *c)
{
- set_bit(CACHE_SET_GC_STOPPING, &c->flags);
+ set_bit(BCH_FS_GC_STOPPING, &c->flags);
if (!IS_ERR_OR_NULL(c->gc_thread))
kthread_stop(c->gc_thread);
int bch_gc_thread_start(struct cache_set *c)
{
- clear_bit(CACHE_SET_GC_STOPPING, &c->flags);
+ clear_bit(BCH_FS_GC_STOPPING, &c->flags);
c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
if (IS_ERR(c->gc_thread))
bch_mark_metadata(c);
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
- set_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags);
+ set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
return 0;
}
}
#define btree_node_error(b, c, ptr, fmt, ...) \
- cache_set_inconsistent(c, \
+ bch_fs_inconsistent(c, \
"btree node error at btree %u level %u/%u bucket %zu block %u u64s %u: " fmt,\
(b)->btree_id, (b)->level, btree_node_root(c, b) \
? btree_node_root(c, b)->level : -1, \
closure_init_stack(&cl);
pick = bch_btree_pick_ptr(c, b);
- if (cache_set_fatal_err_on(!pick.ca, c,
- "no cache device for btree node")) {
+ if (bch_fs_fatal_err_on(!pick.ca, c,
+ "no cache device for btree node")) {
set_btree_node_read_error(b);
return;
}
bch_generic_make_request(bio, c);
closure_sync(&cl);
- if (cache_fatal_io_err_on(bio->bi_error,
+ if (bch_dev_fatal_io_err_on(bio->bi_error,
pick.ca, "IO error reading bucket %zu",
PTR_BUCKET_NR(pick.ca, &pick.ptr)) ||
bch_meta_read_fault("btree")) {
struct closure *cl = !wbio->split ? wbio->cl : NULL;
struct cache *ca = wbio->ca;
- if (cache_fatal_io_err_on(bio->bi_error, ca, "btree write") ||
+ if (bch_dev_fatal_io_err_on(bio->bi_error, ca, "btree write") ||
bch_meta_write_fault("btree"))
set_btree_node_write_error(b);
d->index_update_done = true;
/*
- * Btree nodes are accounted as freed in cache_set_stats when they're
+ * Btree nodes are accounted as freed in bch_alloc_stats when they're
* freed from the index:
*/
stats->s[S_COMPRESSED][S_META] -= c->sb.btree_node_size;
&tmp, 0);
/*
* Don't apply tmp - pending deletes aren't tracked in
- * cache_set_stats:
+ * bch_alloc_stats:
*/
}
&stats, 0);
/*
* Don't apply stats - pending deletes aren't tracked in
- * cache_set_stats:
+ * bch_alloc_stats:
*/
}
bch_btree_node_free_index(c, NULL, old->btree_id,
bkey_i_to_s_c(&old->key),
&stats);
- bch_cache_set_stats_apply(c, &stats, &btree_reserve->disk_res,
- gc_pos_btree_root(b->btree_id));
+ bch_fs_stats_apply(c, &stats, &btree_reserve->disk_res,
+ gc_pos_btree_root(b->btree_id));
}
bch_recalc_btree_reserve(c);
bkey_disassemble(b, k, &tmp),
&stats);
- bch_cache_set_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b));
+ bch_fs_stats_apply(c, &stats, disk_res, gc_pos_btree_node(b));
bch_btree_bset_insert_key(iter, b, node_iter, insert);
set_btree_node_dirty(b);
#define lg_local_lock lg_global_lock
#define lg_local_unlock lg_global_unlock
-static void bch_cache_set_stats_verify(struct cache_set *c)
+static void bch_fs_stats_verify(struct cache_set *c)
{
struct bucket_stats_cache_set stats =
__bch_bucket_stats_read_cache_set(c);
#else
-static void bch_cache_set_stats_verify(struct cache_set *c) {}
+static void bch_fs_stats_verify(struct cache_set *c) {}
#endif
return !m.owned_by_allocator && !m.dirty_sectors && !!m.cached_sectors;
}
-void bch_cache_set_stats_apply(struct cache_set *c,
- struct bucket_stats_cache_set *stats,
- struct disk_reservation *disk_res,
- struct gc_pos gc_pos)
+void bch_fs_stats_apply(struct cache_set *c,
+ struct bucket_stats_cache_set *stats,
+ struct disk_reservation *disk_res,
+ struct gc_pos gc_pos)
{
s64 added =
stats->s[S_COMPRESSED][S_META] +
if (!gc_will_visit(c, gc_pos))
bucket_stats_add(this_cpu_ptr(c->bucket_stats_percpu), stats);
- bch_cache_set_stats_verify(c);
+ bch_fs_stats_verify(c);
lg_local_unlock(&c->bucket_stats_lock);
memset(stats, 0, sizeof(*stats));
static void bucket_stats_update(struct cache *ca,
struct bucket_mark old, struct bucket_mark new,
bool may_make_unavailable,
- struct bucket_stats_cache_set *cache_set_stats)
+ struct bucket_stats_cache_set *bch_alloc_stats)
{
struct cache_set *c = ca->set;
struct bucket_stats_cache *cache_stats;
!is_available_bucket(new) &&
c->gc_pos.phase == GC_PHASE_DONE);
- if (cache_set_stats) {
- cache_set_stats->s[S_COMPRESSED][S_CACHED] +=
+ if (bch_alloc_stats) {
+ bch_alloc_stats->s[S_COMPRESSED][S_CACHED] +=
(int) new.cached_sectors - (int) old.cached_sectors;
- cache_set_stats->s[S_COMPRESSED]
+ bch_alloc_stats->s[S_COMPRESSED]
[old.is_metadata ? S_META : S_DIRTY] -=
old.dirty_sectors;
- cache_set_stats->s[S_COMPRESSED]
+ bch_alloc_stats->s[S_COMPRESSED]
[new.is_metadata ? S_META : S_DIRTY] +=
new.dirty_sectors;
}
* Ick:
*
* Only stats.sectors_cached should be nonzero: this is important
- * because in this path we modify cache_set_stats based on how the
+ * because in this path we modify bch_alloc_stats based on how the
* bucket_mark was modified, and the sector counts in bucket_mark are
* subject to (saturating) overflow - and if they did overflow, the
* cache set stats will now be off. We can tolerate this for
__bch_mark_key(c, k, sectors, metadata, false, stats,
gc_will_visit(c, gc_pos), journal_seq);
- bch_cache_set_stats_verify(c);
+ bch_fs_stats_verify(c);
lg_local_unlock(&c->bucket_stats_lock);
}
static u64 __recalc_sectors_available(struct cache_set *c)
{
- return c->capacity - cache_set_sectors_used(c);
+ return c->capacity - bch_fs_sectors_used(c);
}
/* Used by gc when it's starting: */
this_cpu_sub(c->bucket_stats_percpu->online_reserved,
res->sectors);
- bch_cache_set_stats_verify(c);
+ bch_fs_stats_verify(c);
lg_local_unlock(&c->bucket_stats_lock);
res->sectors = 0;
stats->online_reserved += sectors;
res->sectors += sectors;
- bch_cache_set_stats_verify(c);
+ bch_fs_stats_verify(c);
lg_local_unlock(&c->bucket_stats_lock);
return 0;
ret = -ENOSPC;
}
- bch_cache_set_stats_verify(c);
+ bch_fs_stats_verify(c);
lg_global_unlock(&c->bucket_stats_lock);
if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD))
up_read(&c->gc_lock);
struct bucket_stats_cache_set __bch_bucket_stats_read_cache_set(struct cache_set *);
struct bucket_stats_cache_set bch_bucket_stats_read_cache_set(struct cache_set *);
-void bch_cache_set_stats_apply(struct cache_set *,
- struct bucket_stats_cache_set *,
- struct disk_reservation *,
+void bch_fs_stats_apply(struct cache_set *,
+ struct bucket_stats_cache_set *,
+ struct disk_reservation *,
struct gc_pos);
-static inline u64 __cache_set_sectors_used(struct cache_set *c)
+static inline u64 __bch_fs_sectors_used(struct cache_set *c)
{
struct bucket_stats_cache_set stats = __bch_bucket_stats_read_cache_set(c);
u64 reserved = stats.persistent_reserved +
(reserved >> 7);
}
-static inline u64 cache_set_sectors_used(struct cache_set *c)
+static inline u64 bch_fs_sectors_used(struct cache_set *c)
{
- return min(c->capacity, __cache_set_sectors_used(c));
+ return min(c->capacity, __bch_fs_sectors_used(c));
}
/* XXX: kill? */
}
}
- err = bch_register_cache_set(devs, arg.nr_devs,
- cache_set_opts_empty(),
- NULL);
+ err = bch_fs_open(devs, arg.nr_devs, bch_opts_empty(), NULL);
if (err) {
pr_err("Could not register cache set: %s", err);
ret = -EINVAL;
if (!path)
return -ENOMEM;
- err = bch_register_one(path);
+ err = bch_fs_open_incremental(path);
kfree(path);
if (err) {
static long bch_ioctl_stop(struct cache_set *c)
{
- bch_cache_set_stop(c);
+ bch_fs_stop(c);
return 0;
}
if (!path)
return -ENOMEM;
- ret = bch_cache_set_add_cache(c, path);
+ ret = bch_dev_add(c, path);
kfree(path);
return ret;
if (IS_ERR(ca))
return PTR_ERR(ca);
- ret = bch_cache_remove(ca, arg.flags & BCH_FORCE_IF_DATA_MISSING)
+ ret = bch_dev_remove(ca, arg.flags & BCH_FORCE_IF_DATA_MISSING)
? 0 : -EBUSY;
percpu_ref_put(&ca->ref);
return PTR_ERR(ca);
/* XXX: failed not actually implemented yet */
- ret = bch_cache_remove(ca, true);
+ ret = bch_dev_remove(ca, true);
percpu_ref_put(&ca->ref);
return ret;
sizeof(c->sb.user_uuid));
}
-long bch_cache_set_ioctl(struct cache_set *c, unsigned cmd, void __user *arg)
+long bch_fs_ioctl(struct cache_set *c, unsigned cmd, void __user *arg)
{
/* ioctls that don't require admin cap: */
switch (cmd) {
void __user *arg = (void __user *) v;
return c
- ? bch_cache_set_ioctl(c, cmd, arg)
+ ? bch_fs_ioctl(c, cmd, arg)
: bch_global_ioctl(cmd, arg);
}
-const struct file_operations bch_chardev_fops = {
+static const struct file_operations bch_chardev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = bch_chardev_ioctl,
.open = nonseekable_open,
};
+
+static int bch_chardev_major;
+static struct class *bch_chardev_class;
+static struct device *bch_chardev;
+static DEFINE_IDR(bch_chardev_minor);
+
+void bch_fs_chardev_exit(struct cache_set *c)
+{
+ if (!IS_ERR_OR_NULL(c->chardev))
+ device_unregister(c->chardev);
+ if (c->minor >= 0)
+ idr_remove(&bch_chardev_minor, c->minor);
+}
+
+int bch_fs_chardev_init(struct cache_set *c)
+{
+ c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
+ if (c->minor < 0)
+ return c->minor;
+
+ c->chardev = device_create(bch_chardev_class, NULL,
+ MKDEV(bch_chardev_major, c->minor), NULL,
+ "bcache%u-ctl", c->minor);
+ if (IS_ERR(c->chardev))
+ return PTR_ERR(c->chardev);
+
+ return 0;
+}
+
+void bch_chardev_exit(void)
+{
+ if (!IS_ERR_OR_NULL(bch_chardev_class))
+ device_destroy(bch_chardev_class,
+ MKDEV(bch_chardev_major, 0));
+ if (!IS_ERR_OR_NULL(bch_chardev_class))
+ class_destroy(bch_chardev_class);
+ if (bch_chardev_major > 0)
+ unregister_chrdev(bch_chardev_major, "bcache");
+
+}
+
+int __init bch_chardev_init(void)
+{
+ bch_chardev_major = register_chrdev(0, "bcache-ctl", &bch_chardev_fops);
+ if (bch_chardev_major < 0)
+ return bch_chardev_major;
+
+ bch_chardev_class = class_create(THIS_MODULE, "bcache");
+ if (IS_ERR(bch_chardev_class))
+ return PTR_ERR(bch_chardev_class);
+
+ bch_chardev = device_create(bch_chardev_class, NULL,
+ MKDEV(bch_chardev_major, 255),
+ NULL, "bcache-ctl");
+ if (IS_ERR(bch_chardev))
+ return PTR_ERR(bch_chardev);
+
+ return 0;
+}
#ifndef _BCACHE_CHARDEV_H
#define _BCACHE_CHARDEV_H
-long bch_cache_set_ioctl(struct cache_set *, unsigned, void __user *);
-extern const struct file_operations bch_chardev_fops;
+#ifndef NO_BCACHE_CHARDEV
+
+long bch_fs_ioctl(struct cache_set *, unsigned, void __user *);
+
+void bch_fs_chardev_exit(struct cache_set *);
+int bch_fs_chardev_init(struct cache_set *);
+
+void bch_chardev_exit(void);
+int __init bch_chardev_init(void);
+
+#else
+
+static inline long bch_fs_ioctl(struct cache_set *c,
+ unsigned cmd, void __user * arg)
+{
+ return -ENOSYS;
+}
+
+static inline void bch_fs_chardev_exit(struct cache_set *c) {}
+static inline int bch_fs_chardev_init(struct cache_set *c) { return 0; }
+
+static inline void bch_chardev_exit(void) {}
+static inline int __init bch_chardev_init(void) { return 0; }
+
+#endif
#endif /* _BCACHE_CHARDEV_H */
return ret;
}
-void bch_cache_set_encryption_free(struct cache_set *c)
+void bch_fs_encryption_free(struct cache_set *c)
{
if (!IS_ERR_OR_NULL(c->poly1305))
crypto_free_shash(c->poly1305);
crypto_free_blkcipher(c->chacha20);
}
-int bch_cache_set_encryption_init(struct cache_set *c)
+int bch_fs_encryption_init(struct cache_set *c)
{
struct bch_sb_field_crypt *crypt;
struct bch_key key;
int bch_disable_encryption(struct cache_set *);
int bch_enable_encryption(struct cache_set *, bool);
-void bch_cache_set_encryption_free(struct cache_set *);
-int bch_cache_set_encryption_init(struct cache_set *);
+void bch_fs_encryption_free(struct cache_set *);
+int bch_fs_encryption_init(struct cache_set *);
static inline unsigned bch_data_checksum_type(struct cache_set *c)
{
}
}
+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
+{
+#ifdef __KERNEL__
+ strm->workspace = workspace;
+#endif
+}
+
static int __bio_uncompress(struct cache_set *c, struct bio *src,
void *dst_data, struct bch_extent_crc128 crc)
{
workspace = c->zlib_workspace;
}
- strm.workspace = workspace;
strm.next_in = src_data;
strm.avail_in = src_len;
strm.next_out = dst_data;
strm.avail_out = dst_len;
+ zlib_set_workspace(&strm, workspace);
zlib_inflateInit2(&strm, -MAX_WBITS);
ret = zlib_inflate(&strm, Z_FINISH);
workspace = c->zlib_workspace;
}
- strm.workspace = workspace;
strm.next_in = src_data;
strm.avail_in = min(src->bi_iter.bi_size,
dst->bi_iter.bi_size);
strm.next_out = dst_data;
strm.avail_out = dst->bi_iter.bi_size;
+ zlib_set_workspace(&strm, workspace);
zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
Z_DEFAULT_STRATEGY);
void bch_inconsistent_error(struct cache_set *c)
{
- set_bit(CACHE_SET_ERROR, &c->flags);
+ set_bit(BCH_FS_ERROR, &c->flags);
switch (c->opts.errors) {
case BCH_ON_ERROR_CONTINUE:
break;
case BCH_ON_ERROR_RO:
- if (!test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags)) {
+ if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
/* XXX do something better here? */
- bch_cache_set_stop(c);
+ bch_fs_stop(c);
return;
}
- if (bch_cache_set_emergency_read_only(c))
+ if (bch_fs_emergency_read_only(c))
bch_err(c, "emergency read only");
break;
case BCH_ON_ERROR_PANIC:
void bch_fatal_error(struct cache_set *c)
{
- if (bch_cache_set_emergency_read_only(c))
+ if (bch_fs_emergency_read_only(c))
bch_err(c, "emergency read only");
}
bool dev;
if (errors < c->error_limit) {
- bch_notify_cache_error(ca, false);
+ bch_notify_dev_error(ca, false);
} else {
- bch_notify_cache_error(ca, true);
+ bch_notify_dev_error(ca, true);
mutex_lock(&bch_register_lock);
- dev = bch_cache_may_remove(ca);
+ dev = bch_dev_may_remove(ca);
if (dev
- ? bch_cache_read_only(ca)
- : bch_cache_set_emergency_read_only(c))
+ ? bch_dev_read_only(ca)
+ : bch_fs_emergency_read_only(c))
bch_err(c,
"too many IO errors on %s, setting %s RO",
bdevname(ca->disk_sb.bdev, buf),
/* Error messages: */
-#define __bch_cache_error(ca, fmt, ...) \
+#define __bch_dev_error(ca, fmt, ...) \
do { \
char _buf[BDEVNAME_SIZE]; \
bch_err((ca)->set, "%s: " fmt, \
* XXX: audit and convert to inconsistent() checks
*/
-#define cache_set_bug(c, ...) \
+#define bch_fs_bug(c, ...) \
do { \
bch_err(c, __VA_ARGS__); \
BUG(); \
} while (0)
-#define cache_set_bug_on(cond, c, ...) \
+#define bch_fs_bug_on(cond, c, ...) \
do { \
if (cond) \
- cache_set_bug(c, __VA_ARGS__); \
+ bch_fs_bug(c, __VA_ARGS__); \
} while (0)
/*
void bch_inconsistent_error(struct cache_set *);
-#define cache_set_inconsistent(c, ...) \
+#define bch_fs_inconsistent(c, ...) \
do { \
bch_err(c, __VA_ARGS__); \
bch_inconsistent_error(c); \
} while (0)
-#define cache_set_inconsistent_on(cond, c, ...) \
+#define bch_fs_inconsistent_on(cond, c, ...) \
({ \
int _ret = !!(cond); \
\
if (_ret) \
- cache_set_inconsistent(c, __VA_ARGS__); \
+ bch_fs_inconsistent(c, __VA_ARGS__); \
_ret; \
})
* entire cache set:
*/
-#define cache_inconsistent(ca, ...) \
+#define bch_dev_inconsistent(ca, ...) \
do { \
- __bch_cache_error(ca, __VA_ARGS__); \
+ __bch_dev_error(ca, __VA_ARGS__); \
bch_inconsistent_error((ca)->set); \
} while (0)
-#define cache_inconsistent_on(cond, ca, ...) \
+#define bch_dev_inconsistent_on(cond, ca, ...) \
({ \
int _ret = !!(cond); \
\
if (_ret) \
- cache_inconsistent(ca, __VA_ARGS__); \
+ bch_dev_inconsistent(ca, __VA_ARGS__); \
_ret; \
})
\
if (_can_fix && (c)->opts.fix_errors) { \
bch_err(c, msg ", fixing", ##__VA_ARGS__); \
- set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \
+ set_bit(BCH_FS_FSCK_FIXED_ERRORS, &(c)->flags); \
_fix = true; \
} else if (_can_ignore && \
(c)->opts.errors == BCH_ON_ERROR_CONTINUE) { \
void bch_fatal_error(struct cache_set *);
-#define cache_set_fatal_error(c, ...) \
+#define bch_fs_fatal_error(c, ...) \
do { \
bch_err(c, __VA_ARGS__); \
bch_fatal_error(c); \
} while (0)
-#define cache_set_fatal_err_on(cond, c, ...) \
+#define bch_fs_fatal_err_on(cond, c, ...) \
({ \
int _ret = !!(cond); \
\
if (_ret) \
- cache_set_fatal_error(c, __VA_ARGS__); \
+ bch_fs_fatal_error(c, __VA_ARGS__); \
_ret; \
})
-#define cache_fatal_error(ca, ...) \
+#define bch_dev_fatal_error(ca, ...) \
do { \
- __bch_cache_error(ca, __VA_ARGS__); \
+ __bch_dev_error(ca, __VA_ARGS__); \
bch_fatal_error(c); \
} while (0)
-#define cache_fatal_io_error(ca, fmt, ...) \
+#define bch_dev_fatal_io_error(ca, fmt, ...) \
do { \
char _buf[BDEVNAME_SIZE]; \
\
bch_fatal_error((ca)->set); \
} while (0)
-#define cache_fatal_io_err_on(cond, ca, ...) \
+#define bch_dev_fatal_io_err_on(cond, ca, ...) \
({ \
int _ret = !!(cond); \
\
if (_ret) \
- cache_fatal_io_error(ca, __VA_ARGS__); \
+ bch_dev_fatal_io_error(ca, __VA_ARGS__); \
_ret; \
})
void bch_nonfatal_io_error(struct cache *);
#if 0
-#define cache_set_nonfatal_io_error(c, ...) \
+#define bch_fs_nonfatal_io_error(c, ...) \
do { \
bch_err(c, __VA_ARGS__); \
bch_nonfatal_io_error(c); \
#endif
/* Logs message and handles the error: */
-#define cache_nonfatal_io_error(ca, fmt, ...) \
+#define bch_dev_nonfatal_io_error(ca, fmt, ...) \
do { \
char _buf[BDEVNAME_SIZE]; \
\
bch_nonfatal_io_error(ca); \
} while (0)
-#define cache_nonfatal_io_err_on(cond, ca, ...) \
+#define bch_dev_nonfatal_io_err_on(cond, ca, ...) \
({ \
bool _ret = (cond); \
\
if (_ret) \
- cache_nonfatal_io_error(ca, __VA_ARGS__); \
+ bch_dev_nonfatal_io_error(ca, __VA_ARGS__); \
_ret; \
})
if (replicas < c->sb.meta_replicas_have) {
bch_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), k);
- cache_set_bug(c,
+ bch_fs_bug(c,
"btree key bad (too few replicas, %u < %u): %s",
replicas, c->sb.meta_replicas_have, buf);
return;
return;
err:
bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
- cache_set_bug(c, "%s btree pointer %s: bucket %zi prio %i "
+ bch_fs_bug(c, "%s btree pointer %s: bucket %zi prio %i "
"gen %i last_gc %i mark %08x",
err, buf, PTR_BUCKET_NR(ca, ptr),
g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen,
extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
struct btree *root = btree_node_root(c, b);
- if (cache_set_inconsistent_on(crc, c,
+ if (bch_fs_inconsistent_on(crc, c,
"btree node pointer with crc at btree %u level %u/%u bucket %zu",
b->btree_id, b->level, root ? root->level : -1,
PTR_BUCKET_NR(ca, ptr)))
break;
- if (cache_inconsistent_on(ptr_stale(ca, ptr), ca,
+ if (bch_dev_inconsistent_on(ptr_stale(ca, ptr), ca,
"stale btree node pointer at btree %u level %u/%u bucket %zu",
b->btree_id, b->level, root ? root->level : -1,
PTR_BUCKET_NR(ca, ptr)))
stop:
extent_insert_committed(s);
- bch_cache_set_stats_apply(c, &s->stats, s->trans->disk_res,
- gc_pos_btree_node(b));
+ bch_fs_stats_apply(c, &s->stats, s->trans->disk_res,
+ gc_pos_btree_node(b));
EBUG_ON(bkey_cmp(iter->pos, s->committed));
EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) != iter->at_end_of_leaf);
bkey_start_offset(&insert->k->k),
insert->k->k.size);
- bch_cache_set_stats_apply(c, &s.stats, trans->disk_res,
- gc_pos_btree_node(b));
+ bch_fs_stats_apply(c, &s.stats, trans->disk_res,
+ gc_pos_btree_node(b));
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
EBUG_ON(bkey_cmp(iter->pos, s.committed));
stale = ptr_stale(ca, ptr);
- cache_set_bug_on(stale && !ptr->cached, c,
+ bch_fs_bug_on(stale && !ptr->cached, c,
"stale dirty pointer");
- cache_set_bug_on(stale > 96, c,
+ bch_fs_bug_on(stale > 96, c,
"key too stale: %i",
stale);
if (replicas > BCH_REPLICAS_MAX) {
bch_bkey_val_to_text(c, btree_node_type(b), buf,
sizeof(buf), e.s_c);
- cache_set_bug(c,
+ bch_fs_bug(c,
"extent key bad (too many replicas: %u): %s",
replicas, buf);
return;
replicas < c->sb.data_replicas_have) {
bch_bkey_val_to_text(c, btree_node_type(b), buf,
sizeof(buf), e.s_c);
- cache_set_bug(c,
+ bch_fs_bug(c,
"extent key bad (too few replicas, %u < %u): %s",
replicas, c->sb.data_replicas_have, buf);
return;
bad_device:
bch_bkey_val_to_text(c, btree_node_type(b), buf,
sizeof(buf), e.s_c);
- cache_set_bug(c, "extent pointer to dev %u missing device: %s",
- ptr->dev, buf);
+ bch_fs_bug(c, "extent pointer to dev %u missing device: %s",
+ ptr->dev, buf);
cache_member_info_put();
return;
bad_ptr:
bch_bkey_val_to_text(c, btree_node_type(b), buf,
sizeof(buf), e.s_c);
- cache_set_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i "
- "gen %i last_gc %i mark 0x%08x",
- buf, PTR_BUCKET_NR(ca, ptr),
- g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen,
- ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
- (unsigned) g->mark.counter);
+ bch_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i "
+ "gen %i last_gc %i mark 0x%08x",
+ buf, PTR_BUCKET_NR(ca, ptr),
+ g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen,
+ ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)],
+ (unsigned) g->mark.counter);
cache_member_info_put();
return;
}
bool do_update = false;
ret = bch_inode_unpack(inode, &u);
- if (cache_set_inconsistent_on(ret, c,
+ if (bch_fs_inconsistent_on(ret, c,
"error unpacking inode %llu in fs-gc",
inode.k->p.inode))
return ret;
#define FS_IOC_GOINGDOWN _IOR ('X', 125, __u32)
-static long bch_fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+static long bch_fs_file_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct super_block *sb = inode->i_sb;
down_write(&sb->s_umount);
sb->s_flags |= MS_RDONLY;
- bch_cache_set_emergency_read_only(c);
+ bch_fs_emergency_read_only(c);
up_write(&sb->s_umount);
return 0;
default:
- return bch_cache_set_ioctl(c, cmd, (void __user *) arg);
+ return bch_fs_ioctl(c, cmd, (void __user *) arg);
}
}
default:
return -ENOIOCTLCMD;
}
- return bch_fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+ return bch_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = bch_fallocate_dispatch,
- .unlocked_ioctl = bch_fs_ioctl,
+ .unlocked_ioctl = bch_fs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = bch_compat_fs_ioctl,
#endif
.read = generic_read_dir,
.iterate = bch_vfs_readdir,
.fsync = bch_fsync,
- .unlocked_ioctl = bch_fs_ioctl,
+ .unlocked_ioctl = bch_fs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = bch_compat_fs_ioctl,
#endif
buf->f_type = BCACHE_STATFS_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT;
- buf->f_bfree = (c->capacity - cache_set_sectors_used(c)) >> PAGE_SECTOR_SHIFT;
+ buf->f_bfree = (c->capacity - bch_fs_sectors_used(c)) >> PAGE_SECTOR_SHIFT;
buf->f_bavail = buf->f_bfree;
buf->f_files = atomic_long_read(&c->nr_inodes);
buf->f_ffree = U64_MAX;
rcu_read_lock();
- list_for_each_entry(c, &bch_cache_sets, list)
+ list_for_each_entry(c, &bch_fs_list, list)
for_each_cache_rcu(ca, c, i)
if (ca->disk_sb.bdev == bdev) {
rcu_read_unlock();
}
static struct cache_set *bch_open_as_blockdevs(const char *_dev_name,
- struct cache_set_opts opts)
+ struct bch_opts opts)
{
size_t nr_devs = 0, i = 0;
char *dev_name, *s, **devs;
(s = strchr(s, ':')) && (*s++ = '\0'))
devs[i++] = s;
- err = bch_register_cache_set(devs, nr_devs, opts, &c);
+ err = bch_fs_open(devs, nr_devs, opts, &c);
if (err) {
/*
* Already open?
if (!c)
goto err_unlock;
- if (!test_bit(CACHE_SET_RUNNING, &c->flags)) {
+ if (!test_bit(BCH_FS_RUNNING, &c->flags)) {
err = "incomplete cache set";
c = NULL;
goto err_unlock;
mutex_unlock(&bch_register_lock);
}
- set_bit(CACHE_SET_BDEV_MOUNTED, &c->flags);
+ set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
err:
kfree(devs);
kfree(dev_name);
static int bch_remount(struct super_block *sb, int *flags, char *data)
{
struct cache_set *c = sb->s_fs_info;
- struct cache_set_opts opts;
+ struct bch_opts opts = bch_opts_empty();
int ret;
- ret = bch_parse_options(&opts, *flags, data);
+ opts.read_only = (*flags & MS_RDONLY) != 0;
+
+ ret = bch_parse_mount_opts(&opts, data);
if (ret)
return ret;
const char *err = NULL;
if (opts.read_only) {
- bch_cache_set_read_only_sync(c);
+ bch_fs_read_only_sync(c);
sb->s_flags |= MS_RDONLY;
} else {
- err = bch_cache_set_read_write(c);
+ err = bch_fs_read_write(c);
if (err) {
bch_err(c, "error going rw: %s", err);
ret = -EINVAL;
struct cache *ca;
struct super_block *sb;
struct inode *inode;
- struct cache_set_opts opts;
+ struct bch_opts opts = bch_opts_empty();
unsigned i;
int ret;
- ret = bch_parse_options(&opts, flags, data);
+ opts.read_only = (flags & MS_RDONLY) != 0;
+
+ ret = bch_parse_mount_opts(&opts, data);
if (ret)
return ERR_PTR(ret);
generic_shutdown_super(sb);
- if (test_bit(CACHE_SET_BDEV_MOUNTED, &c->flags)) {
- DECLARE_COMPLETION_ONSTACK(complete);
-
- c->stop_completion = &complete;
- bch_cache_set_stop(c);
- closure_put(&c->cl);
-
- /* Killable? */
- wait_for_completion(&complete);
- } else
+ if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
+ bch_fs_stop_sync(c);
+ else
closure_put(&c->cl);
}
struct bch_inode_unpacked;
+#ifndef NO_BCACHE_FS
+
/* returns 0 if we want to do the update, or error is passed up */
typedef int (*inode_set_fn)(struct bch_inode_info *,
struct bch_inode_unpacked *, void *);
void bch_fs_exit(void);
int bch_fs_init(void);
+#else
+
+static inline void bch_fs_exit(void) {}
+static inline int bch_fs_init(void) { return 0; }
+
+#endif
+
#endif /* _BCACHE_FS_H */
#ifndef _BCACHE_INODE_H
#define _BCACHE_INODE_H
+#include <linux/math64.h>
+
extern const struct bkey_ops bch_bkey_inode_ops;
struct bch_inode_unpacked {
struct bio *orig = wbio->orig;
struct cache *ca = wbio->ca;
- if (cache_nonfatal_io_err_on(bio->bi_error, ca,
- "data write"))
+ if (bch_dev_nonfatal_io_err_on(bio->bi_error, ca,
+ "data write"))
set_closure_fn(cl, bch_write_io_error, index_update_wq(op));
bch_account_io_completion_time(ca, wbio->submit_time_us,
spin_lock_irqsave(&c->foreground_write_pd_lock, flags);
while ((op = c->write_wait_head)) {
- if (!test_bit(CACHE_SET_RO, &c->flags) &&
- !test_bit(CACHE_SET_STOPPING, &c->flags) &&
+ if (!test_bit(BCH_FS_RO, &c->flags) &&
+ !test_bit(BCH_FS_STOPPING, &c->flags) &&
time_after(op->expires, jiffies)) {
mod_timer(&c->foreground_write_wakeup, op->expires);
break;
}
csum = bch_checksum_bio(c, rbio->crc.csum_type, nonce, src);
- if (cache_nonfatal_io_err_on(bch_crc_cmp(rbio->crc.csum, csum), rbio->ca,
+ if (bch_dev_nonfatal_io_err_on(bch_crc_cmp(rbio->crc.csum, csum), rbio->ca,
"data checksum error, inode %llu offset %llu: expected %0llx%0llx got %0llx%0llx (type %u)",
rbio->inode, (u64) rbio->parent_iter.bi_sector << 9,
rbio->crc.csum.hi, rbio->crc.csum.lo, csum.hi, csum.lo,
}
if (rbio->promote &&
- !test_bit(CACHE_SET_RO, &c->flags) &&
- !test_bit(CACHE_SET_STOPPING, &c->flags)) {
+ !test_bit(BCH_FS_RO, &c->flags) &&
+ !test_bit(BCH_FS_STOPPING, &c->flags)) {
struct cache_promote_op *promote = rbio->promote;
struct closure *cl = &promote->cl;
bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, REQ_OP_READ);
- cache_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read");
+ bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read");
if (error) {
bch_read_error_maybe_retry(c, rbio, error);
return;
}
- if (rbio->crc.compression_type != BCH_COMPRESSION_NONE) {
+ if (rbio->crc.compression_type != BCH_COMPRESSION_NONE ||
+ bch_csum_type_is_encryption(rbio->crc.csum_type)) {
struct bio_decompress_worker *d;
preempt_disable();
/* Interier updates aren't journalled: */
BUG_ON(b->level);
- BUG_ON(seq > journal_seq && test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags));
+ BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags));
if (seq <= journal_seq) {
if (list_empty_careful(&j->seq_blacklist))
* Decrease this back to j->seq + 2 when we next rev the on disk format:
* increasing it temporarily to work around bug in old kernels
*/
- cache_set_inconsistent_on(seq > journal_seq + 4, c,
+ bch_fs_inconsistent_on(seq > journal_seq + 4, c,
"bset journal seq too far in the future: %llu > %llu",
seq, journal_seq);
ret = submit_bio_wait(bio);
- if (cache_fatal_io_err_on(ret, ca,
+ if (bch_dev_fatal_io_err_on(ret, ca,
"journal read from sector %llu",
offset) ||
bch_meta_read_fault("journal"))
for_each_cache(ca, c, i)
if (is_journal_device(ca))
- bch_cache_group_add_cache(&c->journal.devs, ca);
+ bch_dev_group_add(&c->journal.devs, ca);
list_for_each_entry(bl, &j->seq_blacklist, list)
new_seq = max(new_seq, bl->seq);
return 0;
}
-int bch_cache_journal_alloc(struct cache *ca)
+int bch_dev_journal_alloc(struct cache *ca)
{
struct journal_device *ja = &ca->journal;
struct bch_sb_field_journal *journal_buckets;
j->last_flushed = jiffies;
}
- if (!test_bit(CACHE_SET_RO, &c->flags))
+ if (!test_bit(BCH_FS_RO, &c->flags))
queue_delayed_work(system_freezable_wq, &j->reclaim_work,
msecs_to_jiffies(j->reclaim_delay_ms));
}
struct cache *ca = bio->bi_private;
struct journal *j = &ca->set->journal;
- if (cache_fatal_io_err_on(bio->bi_error, ca, "journal write") ||
+ if (bch_dev_fatal_io_err_on(bio->bi_error, ca, "journal write") ||
bch_meta_write_fault("journal"))
bch_journal_halt(j);
ssize_t bch_journal_print_debug(struct journal *, char *);
-int bch_cache_journal_alloc(struct cache *);
+int bch_dev_journal_alloc(struct cache *);
static inline unsigned bch_nr_journal_buckets(struct bch_sb_field_journal *j)
{
if (ca->set->opts.nochanges)
return 0;
- if (cache_set_init_fault("moving_gc_start"))
+ if (bch_fs_init_fault("moving_gc_start"))
return -ENOMEM;
t = kthread_create(bch_moving_gc_thread, ca, "bch_copygc_read");
mutex_unlock(&c->uevent_lock);
}
-void bch_notify_cache_set_read_write(struct cache_set *c)
+void bch_notify_fs_read_write(struct cache_set *c)
{
notify_get(c);
notify_var(c, "STATE=active");
notify_put(c);
}
-void bch_notify_cache_set_read_only(struct cache_set *c)
+void bch_notify_fs_read_only(struct cache_set *c)
{
notify_get(c);
notify_var(c, "STATE=readonly");
notify_put(c);
}
-void bch_notify_cache_set_stopped(struct cache_set *c)
+void bch_notify_fs_stopped(struct cache_set *c)
{
notify_get(c);
notify_var(c, "STATE=stopped");
notify_put(c);
}
-void bch_notify_cache_read_write(struct cache *ca)
+void bch_notify_dev_read_write(struct cache *ca)
{
struct cache_set *c = ca->set;
notify_put(c);
}
-void bch_notify_cache_read_only(struct cache *ca)
+void bch_notify_dev_read_only(struct cache *ca)
{
struct cache_set *c = ca->set;
notify_put(c);
}
-void bch_notify_cache_added(struct cache *ca)
+void bch_notify_dev_added(struct cache *ca)
{
struct cache_set *c = ca->set;
notify_put(c);
}
-void bch_notify_cache_removing(struct cache *ca)
+void bch_notify_dev_removing(struct cache *ca)
{
struct cache_set *c = ca->set;
notify_put(c);
}
-void bch_notify_cache_remove_failed(struct cache *ca)
+void bch_notify_dev_remove_failed(struct cache *ca)
{
struct cache_set *c = ca->set;
notify_put(c);
}
-void bch_notify_cache_removed(struct cache *ca)
+void bch_notify_dev_removed(struct cache *ca)
{
struct cache_set *c = ca->set;
notify_put(c);
}
-void bch_notify_cache_error(struct cache *ca, bool fatal)
+void bch_notify_dev_error(struct cache *ca, bool fatal)
{
struct cache_set *c = ca->set;
#ifndef _NOTIFY_H
#define _NOTIFY_H
-void bch_notify_cache_set_read_write(struct cache_set *);
-void bch_notify_cache_set_read_only(struct cache_set *);
-void bch_notify_cache_set_stopped(struct cache_set *);
-
-void bch_notify_cache_read_write(struct cache *);
-void bch_notify_cache_read_only(struct cache *);
-void bch_notify_cache_added(struct cache *);
-void bch_notify_cache_removing(struct cache *);
-void bch_notify_cache_removed(struct cache *);
-void bch_notify_cache_remove_failed(struct cache *);
-void bch_notify_cache_error(struct cache *, bool);
+#ifndef NO_BCACHE_NOTIFY
+
+void bch_notify_fs_read_write(struct cache_set *);
+void bch_notify_fs_read_only(struct cache_set *);
+void bch_notify_fs_stopped(struct cache_set *);
+
+void bch_notify_dev_read_write(struct cache *);
+void bch_notify_dev_read_only(struct cache *);
+void bch_notify_dev_added(struct cache *);
+void bch_notify_dev_removing(struct cache *);
+void bch_notify_dev_removed(struct cache *);
+void bch_notify_dev_remove_failed(struct cache *);
+void bch_notify_dev_error(struct cache *, bool);
+
+#else
+
+static inline void bch_notify_fs_read_write(struct cache_set *c) {}
+static inline void bch_notify_fs_read_only(struct cache_set *c) {}
+static inline void bch_notify_fs_stopped(struct cache_set *c) {}
+
+static inline void bch_notify_dev_read_write(struct cache *ca) {}
+static inline void bch_notify_dev_read_only(struct cache *ca) {}
+static inline void bch_notify_dev_added(struct cache *ca) {}
+static inline void bch_notify_dev_removing(struct cache *ca) {}
+static inline void bch_notify_dev_removed(struct cache *ca) {}
+static inline void bch_notify_dev_remove_failed(struct cache *ca) {}
+static inline void bch_notify_dev_error(struct cache *ca, bool b) {}
+
+#endif
#endif /* _NOTIFY_H */
NULL
};
-const char * const bch_cache_state[] = {
+const char * const bch_dev_state[] = {
"active",
"readonly",
"failed",
NULL
};
-
-const char * const bch_bool_opt[] = {
- "0",
- "1",
- NULL
-};
-
-const char * const bch_uint_opt[] = {
- NULL
-};
-
-enum bch_opts {
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
- Opt_##_name,
-
+const struct bch_option bch_opt_table[] = {
+#define OPT_BOOL() .type = BCH_OPT_BOOL
+#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, .min = _min, .max = _max
+#define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices
+
+#define BCH_OPT(_name, _mode, _sb_opt, _bits, _type) \
+ [Opt_##_name] = { \
+ .name = #_name, \
+ .set_sb = SET_##_sb_opt, \
+ _type \
+ },
BCH_VISIBLE_OPTS()
#undef BCH_OPT
-
- Opt_bad_opt,
};
-struct bch_option {
- const char *name;
- const char * const *opts;
- unsigned long min, max;
-};
-
-struct bch_opt_result {
- enum bch_opts opt;
- unsigned val;
-};
-
-static int parse_bool_opt(const struct bch_option *opt, const char *s)
+static enum bch_opt_id bch_opt_lookup(const char *name)
{
- if (!strcmp(opt->name, s))
- return true;
+ const struct bch_option *i;
- if (!strncmp("no", s, 2) && !strcmp(opt->name, s + 2))
- return false;
+ for (i = bch_opt_table;
+ i < bch_opt_table + ARRAY_SIZE(bch_opt_table);
+ i++)
+ if (!strcmp(name, i->name))
+ return i - bch_opt_table;
return -1;
}
-static int parse_uint_opt(const struct bch_option *opt, const char *s)
+static u64 bch_opt_get(struct bch_opts *opts, enum bch_opt_id id)
{
- unsigned long v;
- int ret;
-
- if (strncmp(opt->name, s, strlen(opt->name)))
- return -1;
+ switch (id) {
+#define BCH_OPT(_name, ...) \
+ case Opt_##_name: \
+ return opts->_name; \
- s += strlen(opt->name);
-
- if (*s != '=')
- return -1;
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
- s++;
+ default:
+ BUG();
+ }
+}
- ret = kstrtoul(s, 10, &v);
- if (ret)
- return ret;
+void bch_opt_set(struct bch_opts *opts, enum bch_opt_id id, u64 v)
+{
+ switch (id) {
+#define BCH_OPT(_name, ...) \
+ case Opt_##_name: \
+ opts->_name = v; \
+ break;
- if (v < opt->min || v >= opt->max)
- return -ERANGE;
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
- return 0;
+ default:
+ BUG();
+ }
}
-static int parse_string_opt(const struct bch_option *opt, const char *s)
+/*
+ * Initial options from superblock - here we don't want any options undefined,
+ * any options the superblock doesn't specify are set to 0:
+ */
+struct bch_opts bch_sb_opts(struct bch_sb *sb)
{
- if (strncmp(opt->name, s, strlen(opt->name)))
- return -1;
+ struct bch_opts opts = bch_opts_empty();
- s += strlen(opt->name);
+#define BCH_OPT(_name, _mode, _sb_opt, ...) \
+ if (_sb_opt != NO_SB_OPT) \
+ opts._name = _sb_opt(sb);
- if (*s != '=')
- return -1;
-
- s++;
+ BCH_OPTS()
+#undef BCH_OPT
- return bch_read_string_list(s, opt->opts);
+ return opts;
}
-static struct bch_opt_result parse_one_opt(const char *opt)
+int parse_one_opt(enum bch_opt_id id, const char *val, u64 *res)
{
- static const struct bch_option opt_table[] = {
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
- [Opt_##_name] = { \
- .name = #_name, \
- .opts = _choices, \
- .min = _min, \
- .max = _max, \
- },
- BCH_VISIBLE_OPTS()
-#undef BCH_OPT
- }, *i;
-
- for (i = opt_table;
- i < opt_table + ARRAY_SIZE(opt_table);
- i++) {
- int res = i->opts == bch_bool_opt ? parse_bool_opt(i, opt)
- : i->opts == bch_uint_opt ? parse_uint_opt(i, opt)
- : parse_string_opt(i, opt);
-
- if (res >= 0)
- return (struct bch_opt_result) {
- i - opt_table, res
- };
+ const struct bch_option *opt = &bch_opt_table[id];
+ ssize_t ret;
+
+ switch (opt->type) {
+ case BCH_OPT_BOOL:
+ ret = kstrtou64(val, 10, res);
+ if (ret < 0)
+ return ret;
+
+ if (*res > 1)
+ return -ERANGE;
+ break;
+ case BCH_OPT_UINT:
+ ret = kstrtou64(val, 10, res);
+ if (ret < 0)
+ return ret;
+
+ if (*res < opt->min || *res >= opt->max)
+ return -ERANGE;
+ break;
+ case BCH_OPT_STR:
+ ret = bch_read_string_list(val, opt->choices);
+ if (ret < 0)
+ return ret;
+
+ *res = ret;
+ break;
}
- return (struct bch_opt_result) { Opt_bad_opt };
+ return 0;
}
-int bch_parse_options(struct cache_set_opts *opts, int flags, char *options)
+int bch_parse_mount_opts(struct bch_opts *opts, char *options)
{
- char *p;
+ char *opt, *name, *val;
+ enum bch_opt_id id;
+ int ret;
+ u64 v;
+
+ while ((opt = strsep(&options, ",")) != NULL) {
+ name = strsep(&opt, "=");
+ val = opt;
+
+ if (val) {
+ id = bch_opt_lookup(name);
+ if (id < 0)
+ return -EINVAL;
+
+ ret = parse_one_opt(id, val, &v);
+ if (ret < 0)
+ return ret;
+ } else {
+ id = bch_opt_lookup(name);
+ v = 1;
+
+ if (id < 0 &&
+ !strncmp("no", name, 2)) {
+ id = bch_opt_lookup(name + 2);
+ v = 0;
+ }
+
+ if (bch_opt_table[id].type != BCH_OPT_BOOL)
+ return -EINVAL;
+ }
- *opts = cache_set_opts_empty();
+ bch_opt_set(opts, id, v);
+ }
- opts->read_only = (flags & MS_RDONLY) != 0;
+ return 0;
+}
- if (!options)
- return 0;
+enum bch_opt_id bch_parse_sysfs_opt(const char *name, const char *val,
+ u64 *res)
+{
+ enum bch_opt_id id = bch_opt_lookup(name);
+ int ret;
- while ((p = strsep(&options, ",")) != NULL) {
- struct bch_opt_result res = parse_one_opt(p);
+ if (id < 0)
+ return -EINVAL;
- switch (res.opt) {
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
- case Opt_##_name: \
- opts->_name = res.val; \
- break;
+ ret = parse_one_opt(id, val, res);
+ if (ret < 0)
+ return ret;
- BCH_VISIBLE_OPTS()
-#undef BCH_OPT
+ return id;
+}
- case Opt_bad_opt:
- return -EINVAL;
- default:
- BUG();
- }
- }
+ssize_t bch_opt_show(struct bch_opts *opts, const char *name,
+ char *buf, size_t size)
+{
+ enum bch_opt_id id = bch_opt_lookup(name);
+ const struct bch_option *opt;
+ u64 v;
- return 0;
+ if (id < 0)
+ return -EINVAL;
+
+ v = bch_opt_get(opts, id);
+ opt = &bch_opt_table[id];
+
+ return opt->type == BCH_OPT_STR
+ ? bch_snprint_string_list(buf, size, opt->choices, v)
+ : snprintf(buf, size, "%lli\n", v);
}
extern const char * const bch_str_hash_types[];
extern const char * const bch_cache_replacement_policies[];
extern const char * const bch_cache_modes[];
-extern const char * const bch_cache_state[];
+extern const char * const bch_dev_state[];
/*
* Mount options; we also store defaults in the superblock.
* updates the superblock.
*
* We store options as signed integers, where -1 means undefined. This means we
- * can pass the mount options to cache_set_alloc() as a whole struct, and then
- * only apply the options from that struct that are defined.
+ * can pass the mount options to bch_fs_alloc() as a whole struct, and then only
+ * apply the options from that struct that are defined.
*/
-extern const char * const bch_bool_opt[];
-extern const char * const bch_uint_opt[];
-
/* dummy option, for options that aren't stored in the superblock */
LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0);
-#define BCH_VISIBLE_OPTS() \
- BCH_OPT(verbose_recovery, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, false) \
- BCH_OPT(posix_acl, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, false) \
- BCH_OPT(journal_flush_disabled, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, true) \
- BCH_OPT(nofsck, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, true) \
- BCH_OPT(fix_errors, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, true) \
- BCH_OPT(nochanges, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
- BCH_OPT(noreplay, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
- BCH_OPT(norecovery, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
- BCH_SB_OPTS()
-
-#define BCH_OPTS() \
- BCH_OPT(read_only, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
+/**
+ * BCH_OPT(name, mode, sb_opt, type, ...)
+ *
+ * @name - name of mount option, sysfs attribute, and struct bch_opts
+ * member
+ *
+ * @mode - sysfs attr permissions
+ *
+ * @sb_option - name of corresponding superblock option
+ *
+ * @type - one of OPT_BOOL, OPT_UINT, OPT_STR
+ */
+
+enum opt_type {
+ BCH_OPT_BOOL,
+ BCH_OPT_UINT,
+ BCH_OPT_STR,
+};
+
+#define BCH_VISIBLE_OPTS() \
+ BCH_OPT(errors, 0644, BCH_SB_ERROR_ACTION, \
+ s8, OPT_STR(bch_error_actions)) \
+ BCH_OPT(metadata_replicas, 0444, BCH_SB_META_REPLICAS_WANT,\
+ s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \
+ BCH_OPT(data_replicas, 0444, BCH_SB_DATA_REPLICAS_WANT,\
+ s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \
+ BCH_OPT(metadata_checksum, 0644, BCH_SB_META_CSUM_TYPE, \
+ s8, OPT_STR(bch_csum_types)) \
+ BCH_OPT(data_checksum, 0644, BCH_SB_DATA_CSUM_TYPE, \
+ s8, OPT_STR(bch_csum_types)) \
+ BCH_OPT(compression, 0644, BCH_SB_COMPRESSION_TYPE,\
+ s8, OPT_STR(bch_compression_types)) \
+ BCH_OPT(str_hash, 0644, BCH_SB_STR_HASH_TYPE, \
+ s8, OPT_STR(bch_str_hash_types)) \
+ BCH_OPT(inodes_32bit, 0644, BCH_SB_INODE_32BIT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(gc_reserve_percent, 0444, BCH_SB_GC_RESERVE, \
+ s8, OPT_UINT(5, 21)) \
+ BCH_OPT(root_reserve_percent, 0444, BCH_SB_ROOT_RESERVE, \
+ s8, OPT_UINT(0, 100)) \
+ BCH_OPT(wide_macs, 0644, BCH_SB_128_BIT_MACS, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(verbose_recovery, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(posix_acl, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(journal_flush_disabled, 0644, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(nofsck, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(fix_errors, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(nochanges, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(noreplay, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
+ BCH_OPT(norecovery, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL())
+
+#define BCH_OPTS() \
+ BCH_OPT(read_only, 0444, NO_SB_OPT, \
+ s8, OPT_BOOL()) \
BCH_VISIBLE_OPTS()
-struct cache_set_opts {
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
- s8 _name;
+struct bch_opts {
+#define BCH_OPT(_name, _mode, _sb_opt, _bits, ...) \
+ _bits _name;
BCH_OPTS()
#undef BCH_OPT
};
-static inline struct cache_set_opts cache_set_opts_empty(void)
+enum bch_opt_id {
+#define BCH_OPT(_name, ...) \
+ Opt_##_name,
+
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
+};
+
+struct bch_option {
+ const char *name;
+ void (*set_sb)(struct bch_sb *, u64);
+ enum opt_type type;
+
+ union {
+ struct {
+ u64 min, max;
+ };
+ struct {
+ const char * const *choices;
+ };
+ };
+
+};
+
+extern const struct bch_option bch_opt_table[];
+
+static inline struct bch_opts bch_opts_empty(void)
{
- struct cache_set_opts ret;
+ struct bch_opts ret;
memset(&ret, 255, sizeof(ret));
return ret;
}
-/*
- * Initial options from superblock - here we don't want any options undefined,
- * any options the superblock doesn't specify are set to 0:
- */
-static inline struct cache_set_opts cache_superblock_opts(struct bch_sb *sb)
+static inline void bch_opts_apply(struct bch_opts *dst, struct bch_opts src)
{
- return (struct cache_set_opts) {
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
- ._name = _sb_opt##_BITS ? _sb_opt(sb) : 0,
+#define BCH_OPT(_name, ...) \
+ if (src._name >= 0) \
+ dst->_name = src._name;
- BCH_SB_OPTS()
+ BCH_OPTS()
#undef BCH_OPT
- };
}
-static inline void cache_set_opts_apply(struct cache_set_opts *dst,
- struct cache_set_opts src)
-{
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
- BUILD_BUG_ON(_max > S8_MAX); \
- if (src._name >= 0) \
- dst->_name = src._name;
+void bch_opt_set(struct bch_opts *, enum bch_opt_id, u64);
+struct bch_opts bch_sb_opts(struct bch_sb *);
- BCH_SB_OPTS()
-#undef BCH_OPT
-}
+int bch_parse_mount_opts(struct bch_opts *, char *);
+enum bch_opt_id bch_parse_sysfs_opt(const char *, const char *, u64 *);
-int bch_parse_options(struct cache_set_opts *, int, char *);
+ssize_t bch_opt_show(struct bch_opts *, const char *, char *, size_t);
#endif /* _BCACHE_OPTS_H */
struct cached_dev;
struct bcache_device;
+#ifndef NO_BCACHE_ACCOUNTING
+
void bch_cache_accounting_init(struct cache_accounting *, struct closure *);
int bch_cache_accounting_add_kobjs(struct cache_accounting *, struct kobject *);
void bch_cache_accounting_clear(struct cache_accounting *);
void bch_cache_accounting_destroy(struct cache_accounting *);
+#else
+
+static inline void bch_cache_accounting_init(struct cache_accounting *acc,
+ struct closure *cl) {}
+static inline int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
+ struct kobject *cl)
+{
+ return 0;
+}
+static inline void bch_cache_accounting_clear(struct cache_accounting *acc) {}
+static inline void bch_cache_accounting_destroy(struct cache_accounting *acc) {}
+
+#endif
+
static inline void mark_cache_stats(struct cache_stat_collector *stats,
bool hit, bool bypass)
{
unsigned i;
rcu_read_lock();
- list_for_each_entry(c, &bch_cache_sets, list)
+ list_for_each_entry(c, &bch_fs_list, list)
for_each_cache_rcu(ca, c, i)
if (ca->disk_sb.bdev == bdev) {
rcu_read_unlock();
}
static const char *bch_blkdev_open(const char *path, void *holder,
- struct cache_set_opts opts,
+ struct bch_opts opts,
struct block_device **ret)
{
struct block_device *bdev;
}
/* Update cached mi: */
-int bch_cache_set_mi_update(struct cache_set *c,
- struct bch_member *mi,
- unsigned nr_devices)
+int bch_fs_mi_update(struct cache_set *c, struct bch_member *mi,
+ unsigned nr_devices)
{
struct cache_member_rcu *new, *old;
struct cache *ca;
if (bch_fs_sb_realloc(c, le32_to_cpu(src->u64s) - journal_u64s))
return -ENOMEM;
- if (bch_cache_set_mi_update(c, members->members, src->nr_devices))
+ if (bch_fs_mi_update(c, members->members, src->nr_devices))
return -ENOMEM;
__copy_super(c->disk_sb, src);
}
const char *bch_read_super(struct bcache_superblock *sb,
- struct cache_set_opts opts,
+ struct bch_opts opts,
const char *path)
{
struct bch_sb_layout layout;
goto err;
err = "dynamic fault";
- if (cache_set_init_fault("read_super"))
+ if (bch_fs_init_fault("read_super"))
goto err;
err = read_one_super(sb, BCH_SB_SECTOR);
/* XXX: return errors directly */
- cache_fatal_io_err_on(bio->bi_error, ca, "superblock write");
+ bch_dev_fatal_io_err_on(bio->bi_error, ca, "superblock write");
bch_account_io_completion(ca);
} while (wrote);
/* Make new options visible after they're persistent: */
- bch_cache_set_mi_update(c, members->members, c->sb.nr_devices);
+ bch_fs_mi_update(c, members->members, c->sb.nr_devices);
bch_sb_update(c);
}
};
}
-int bch_cache_set_mi_update(struct cache_set *, struct bch_member *, unsigned);
+int bch_fs_mi_update(struct cache_set *, struct bch_member *, unsigned);
int bch_sb_to_cache_set(struct cache_set *, struct bch_sb *);
int bch_sb_from_cache_set(struct cache_set *, struct cache *);
const char *bch_validate_cache_super(struct bcache_superblock *);
const char *bch_read_super(struct bcache_superblock *,
- struct cache_set_opts, const char *);
+ struct bch_opts, const char *);
void bch_write_super(struct cache_set *);
void bch_check_mark_super_slowpath(struct cache_set *,
static struct kset *bcache_kset;
struct mutex bch_register_lock;
-LIST_HEAD(bch_cache_sets);
+LIST_HEAD(bch_fs_list);
-static int bch_chardev_major;
-static struct class *bch_chardev_class;
-static struct device *bch_chardev;
-static DEFINE_IDR(bch_chardev_minor);
static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
struct workqueue_struct *bcache_io_wq;
struct crypto_shash *bch_sha256;
-static void bch_cache_stop(struct cache *);
-static int bch_cache_online(struct cache *);
+static void bch_dev_stop(struct cache *);
+static int bch_dev_online(struct cache *);
static int bch_congested_fn(void *data, int bdi_bits)
{
* - allocator depends on the journal (when it rewrites prios and gens)
*/
-static void __bch_cache_set_read_only(struct cache_set *c)
+static void __bch_fs_read_only(struct cache_set *c)
{
struct cache *ca;
unsigned i;
bch_btree_flush(c);
for_each_cache(ca, c, i)
- bch_cache_allocator_stop(ca);
+ bch_dev_allocator_stop(ca);
/*
* Write a journal entry after flushing the btree, so we don't end up
{
struct cache_set *c = container_of(writes, struct cache_set, writes);
- set_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags);
+ set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
wake_up(&bch_read_only_wait);
}
-static void bch_cache_set_read_only_work(struct work_struct *work)
+static void bch_fs_read_only_work(struct work_struct *work)
{
struct cache_set *c =
container_of(work, struct cache_set, read_only_work);
c->foreground_write_pd.rate.rate = UINT_MAX;
bch_wake_delayed_writes((unsigned long) c);
- if (!test_bit(CACHE_SET_EMERGENCY_RO, &c->flags)) {
+ if (!test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
/*
* If we're not doing an emergency shutdown, we want to wait on
* outstanding writes to complete so they don't see spurious
* errors due to shutting down the allocator:
*/
wait_event(bch_read_only_wait,
- test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+ test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
- __bch_cache_set_read_only(c);
+ __bch_fs_read_only(c);
if (!bch_journal_error(&c->journal) &&
- !test_bit(CACHE_SET_ERROR, &c->flags)) {
+ !test_bit(BCH_FS_ERROR, &c->flags)) {
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb, true);
bch_write_super(c);
* we do need to wait on them before returning and signalling
* that going RO is complete:
*/
- __bch_cache_set_read_only(c);
+ __bch_fs_read_only(c);
wait_event(bch_read_only_wait,
- test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+ test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
}
- bch_notify_cache_set_read_only(c);
- trace_bcache_cache_set_read_only_done(c);
+ bch_notify_fs_read_only(c);
+ trace_fs_read_only_done(c);
- set_bit(CACHE_SET_RO_COMPLETE, &c->flags);
+ set_bit(BCH_FS_RO_COMPLETE, &c->flags);
wake_up(&bch_read_only_wait);
}
-bool bch_cache_set_read_only(struct cache_set *c)
+bool bch_fs_read_only(struct cache_set *c)
{
- if (test_and_set_bit(CACHE_SET_RO, &c->flags))
+ if (test_and_set_bit(BCH_FS_RO, &c->flags))
return false;
- trace_bcache_cache_set_read_only(c);
+ trace_fs_read_only(c);
percpu_ref_get(&c->writes);
*
* (This is really blocking new _allocations_, writes to previously
* allocated space can still happen until stopping the allocator in
- * bch_cache_allocator_stop()).
+ * bch_dev_allocator_stop()).
*/
percpu_ref_kill(&c->writes);
return true;
}
-bool bch_cache_set_emergency_read_only(struct cache_set *c)
+bool bch_fs_emergency_read_only(struct cache_set *c)
{
- bool ret = !test_and_set_bit(CACHE_SET_EMERGENCY_RO, &c->flags);
+ bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
- bch_cache_set_read_only(c);
+ bch_fs_read_only(c);
bch_journal_halt(&c->journal);
wake_up(&bch_read_only_wait);
return ret;
}
-void bch_cache_set_read_only_sync(struct cache_set *c)
+void bch_fs_read_only_sync(struct cache_set *c)
{
- /* so we don't race with bch_cache_set_read_write() */
+ /* so we don't race with bch_fs_read_write() */
lockdep_assert_held(&bch_register_lock);
- bch_cache_set_read_only(c);
+ bch_fs_read_only(c);
wait_event(bch_read_only_wait,
- test_bit(CACHE_SET_RO_COMPLETE, &c->flags) &&
- test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+ test_bit(BCH_FS_RO_COMPLETE, &c->flags) &&
+ test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
}
-static const char *__bch_cache_set_read_write(struct cache_set *c)
+static const char *__bch_fs_read_write(struct cache_set *c)
{
struct cache *ca;
const char *err;
err = "error starting allocator thread";
for_each_cache(ca, c, i)
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
- bch_cache_allocator_start(ca)) {
+ bch_dev_allocator_start(ca)) {
percpu_ref_put(&ca->ref);
goto err;
}
return NULL;
err:
- __bch_cache_set_read_only(c);
+ __bch_fs_read_only(c);
return err;
}
-const char *bch_cache_set_read_write(struct cache_set *c)
+const char *bch_fs_read_write(struct cache_set *c)
{
const char *err;
lockdep_assert_held(&bch_register_lock);
- if (!test_bit(CACHE_SET_RO_COMPLETE, &c->flags))
+ if (!test_bit(BCH_FS_RO_COMPLETE, &c->flags))
return NULL;
- err = __bch_cache_set_read_write(c);
+ err = __bch_fs_read_write(c);
if (err)
return err;
percpu_ref_reinit(&c->writes);
- clear_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags);
- clear_bit(CACHE_SET_EMERGENCY_RO, &c->flags);
- clear_bit(CACHE_SET_RO_COMPLETE, &c->flags);
- clear_bit(CACHE_SET_RO, &c->flags);
+ clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+ clear_bit(BCH_FS_EMERGENCY_RO, &c->flags);
+ clear_bit(BCH_FS_RO_COMPLETE, &c->flags);
+ clear_bit(BCH_FS_RO, &c->flags);
return NULL;
}
/* Cache set startup/shutdown: */
-static void cache_set_free(struct cache_set *c)
+static void bch_fs_free(struct cache_set *c)
{
del_timer_sync(&c->foreground_write_wakeup);
cancel_delayed_work_sync(&c->pd_controllers_update);
cancel_work_sync(&c->bio_submit_work);
cancel_work_sync(&c->read_retry_work);
- bch_cache_set_encryption_free(c);
+ bch_fs_encryption_free(c);
bch_btree_cache_free(c);
bch_journal_free(&c->journal);
bch_io_clock_exit(&c->io_clock[WRITE]);
bch_io_clock_exit(&c->io_clock[READ]);
bch_compress_free(c);
+ bch_fs_blockdev_exit(c);
bdi_destroy(&c->bdi);
lg_lock_free(&c->bucket_stats_lock);
free_percpu(c->bucket_stats_percpu);
mempool_exit(&c->btree_interior_update_pool);
mempool_exit(&c->btree_reserve_pool);
mempool_exit(&c->fill_iter);
- mempool_exit(&c->search);
percpu_ref_exit(&c->writes);
if (c->copygc_wq)
}
/*
- * should be __cache_set_stop4 - block devices are closed, now we can finally
+ * should be __bch_fs_stop4 - block devices are closed, now we can finally
* free it
*/
-void bch_cache_set_release(struct kobject *kobj)
+void bch_fs_release(struct kobject *kobj)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
struct completion *stop_completion = c->stop_completion;
- bch_notify_cache_set_stopped(c);
+ bch_notify_fs_stopped(c);
bch_info(c, "stopped");
- cache_set_free(c);
+ bch_fs_free(c);
if (stop_completion)
complete(stop_completion);
/*
* All activity on the cache_set should have stopped now - close devices:
*/
-static void __cache_set_stop3(struct closure *cl)
+static void __bch_fs_stop3(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, cl);
struct cache *ca;
mutex_lock(&bch_register_lock);
for_each_cache(ca, c, i)
- bch_cache_stop(ca);
- mutex_unlock(&bch_register_lock);
+ bch_dev_stop(ca);
- mutex_lock(&bch_register_lock);
list_del(&c->list);
- if (c->minor >= 0)
- idr_remove(&bch_chardev_minor, c->minor);
mutex_unlock(&bch_register_lock);
closure_debug_destroy(&c->cl);
* Openers (i.e. block devices) should have exited, shutdown all userspace
* interfaces and wait for &c->cl to hit 0
*/
-static void __cache_set_stop2(struct closure *cl)
+static void __bch_fs_stop2(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
bch_debug_exit_cache_set(c);
-
- if (!IS_ERR_OR_NULL(c->chardev))
- device_unregister(c->chardev);
+ bch_fs_chardev_exit(c);
if (c->kobj.state_in_sysfs)
kobject_del(&c->kobj);
kobject_put(&c->internal);
mutex_lock(&bch_register_lock);
- bch_cache_set_read_only_sync(c);
+ bch_fs_read_only_sync(c);
mutex_unlock(&bch_register_lock);
closure_return(cl);
}
/*
- * First phase of the shutdown process that's kicked off by cache_set_stop(); we
+ * First phase of the shutdown process that's kicked off by bch_fs_stop(); we
* haven't waited for anything to stop yet, we're just punting to process
* context to shut down block devices:
*/
-static void __cache_set_stop1(struct closure *cl)
+static void __bch_fs_stop1(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
bch_blockdevs_stop(c);
- continue_at(cl, __cache_set_stop2, system_wq);
+ continue_at(cl, __bch_fs_stop2, system_wq);
}
-void bch_cache_set_stop(struct cache_set *c)
+void bch_fs_stop(struct cache_set *c)
{
- if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags))
+ if (!test_and_set_bit(BCH_FS_STOPPING, &c->flags))
closure_queue(&c->caching);
}
-void bch_cache_set_unregister(struct cache_set *c)
+void bch_fs_stop_sync(struct cache_set *c)
{
- if (!test_and_set_bit(CACHE_SET_UNREGISTERING, &c->flags))
- bch_cache_set_stop(c);
+ DECLARE_COMPLETION_ONSTACK(complete);
+
+ c->stop_completion = &complete;
+ bch_fs_stop(c);
+ closure_put(&c->cl);
+
+ /* Killable? */
+ wait_for_completion(&complete);
}
-static unsigned cache_set_nr_devices(struct cache_set *c)
+/* Stop, detaching from backing devices: */
+void bch_fs_detach(struct cache_set *c)
+{
+ if (!test_and_set_bit(BCH_FS_DETACHING, &c->flags))
+ bch_fs_stop(c);
+}
+
+static unsigned bch_fs_nr_devices(struct cache_set *c)
{
struct bch_sb_field_members *mi;
unsigned i, nr = 0;
return nr;
}
-static unsigned cache_set_nr_online_devices(struct cache_set *c)
+static unsigned bch_fs_nr_online_devices(struct cache_set *c)
{
unsigned i, nr = 0;
#define alloc_bucket_pages(gfp, ca) \
((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca))))
-static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb,
- struct cache_set_opts opts)
+static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
{
struct cache_set *c;
unsigned iter_size, journal_entry_bytes;
mutex_init(&c->btree_cache_lock);
mutex_init(&c->bucket_lock);
mutex_init(&c->btree_root_lock);
- INIT_WORK(&c->read_only_work, bch_cache_set_read_only_work);
+ INIT_WORK(&c->read_only_work, bch_fs_read_only_work);
init_rwsem(&c->gc_lock);
scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid);
- c->opts = cache_superblock_opts(sb);
- cache_set_opts_apply(&c->opts, opts);
+ bch_opts_apply(&c->opts, bch_sb_opts(sb));
+ bch_opts_apply(&c->opts, opts);
c->opts.nochanges |= c->opts.noreplay;
c->opts.read_only |= c->opts.nochanges;
c->block_bits = ilog2(c->sb.block_size);
- if (cache_set_init_fault("cache_set_alloc"))
+ if (bch_fs_init_fault("fs_alloc"))
goto err;
iter_size = (btree_blocks(c) + 1) * 2 *
!(c->copygc_wq = alloc_workqueue("bcache_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch_writes_disabled, 0, GFP_KERNEL) ||
- mempool_init_slab_pool(&c->search, 1, bch_search_cache) ||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
sizeof(struct btree_reserve)) ||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
mempool_init_page_pool(&c->btree_bounce_pool, 1,
ilog2(btree_pages(c))) ||
bdi_setup_and_register(&c->bdi, "bcache") ||
+ bch_fs_blockdev_init(c) ||
bch_io_clock_init(&c->io_clock[READ]) ||
bch_io_clock_init(&c->io_clock[WRITE]) ||
bch_journal_alloc(&c->journal, journal_entry_bytes) ||
bch_btree_cache_alloc(c) ||
- bch_cache_set_encryption_init(c) ||
+ bch_fs_encryption_init(c) ||
bch_compress_init(c) ||
bch_check_set_has_compressed_data(c, c->opts.compression))
goto err;
closure_init(&c->cl, NULL);
c->kobj.kset = bcache_kset;
- kobject_init(&c->kobj, &bch_cache_set_ktype);
- kobject_init(&c->internal, &bch_cache_set_internal_ktype);
- kobject_init(&c->opts_dir, &bch_cache_set_opts_dir_ktype);
- kobject_init(&c->time_stats, &bch_cache_set_time_stats_ktype);
+ kobject_init(&c->kobj, &bch_fs_ktype);
+ kobject_init(&c->internal, &bch_fs_internal_ktype);
+ kobject_init(&c->opts_dir, &bch_fs_opts_dir_ktype);
+ kobject_init(&c->time_stats, &bch_fs_time_stats_ktype);
bch_cache_accounting_init(&c->accounting, &c->cl);
closure_init(&c->caching, &c->cl);
- set_closure_fn(&c->caching, __cache_set_stop1, system_wq);
+ set_closure_fn(&c->caching, __bch_fs_stop1, system_wq);
- continue_at_noreturn(&c->cl, __cache_set_stop3, system_wq);
+ continue_at_noreturn(&c->cl, __bch_fs_stop3, system_wq);
return c;
err:
- cache_set_free(c);
+ bch_fs_free(c);
return NULL;
}
-static int bch_cache_set_online(struct cache_set *c)
+static int bch_fs_online(struct cache_set *c)
{
struct cache *ca;
unsigned i;
+ int ret;
lockdep_assert_held(&bch_register_lock);
- if (c->kobj.state_in_sysfs)
+ if (!list_empty(&c->list))
return 0;
- c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
- if (c->minor < 0)
- return c->minor;
+ list_add(&c->list, &bch_fs_list);
- c->chardev = device_create(bch_chardev_class, NULL,
- MKDEV(bch_chardev_major, c->minor), NULL,
- "bcache%u-ctl", c->minor);
- if (IS_ERR(c->chardev))
- return PTR_ERR(c->chardev);
+ ret = bch_fs_chardev_init(c);
+ if (ret)
+ return ret;
if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
kobject_add(&c->internal, &c->kobj, "internal") ||
return -1;
for_each_cache(ca, c, i)
- if (bch_cache_online(ca)) {
+ if (bch_dev_online(ca)) {
percpu_ref_put(&ca->ref);
return -1;
}
- list_add(&c->list, &bch_cache_sets);
return 0;
}
-static const char *run_cache_set(struct cache_set *c)
+static const char *bch_fs_start(struct cache_set *c)
{
const char *err = "cannot allocate memory";
struct bch_sb_field_members *mi;
int ret = -EINVAL;
lockdep_assert_held(&bch_register_lock);
- BUG_ON(test_bit(CACHE_SET_RUNNING, &c->flags));
+ BUG_ON(test_bit(BCH_FS_RUNNING, &c->flags));
/* We don't want bch_fatal_error() to free underneath us */
closure_get(&c->caching);
bch_recalc_min_prio(ca, WRITE);
}
- /*
- * If bch_prio_read() fails it'll call cache_set_error and we'll
- * tear everything down right away, but if we perhaps checked
- * sooner we could avoid journal replay.
- */
-
for (id = 0; id < BTREE_ID_NR; id++) {
unsigned level;
struct bkey_i *k;
err = "error starting allocator thread";
for_each_cache(ca, c, i)
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
- bch_cache_allocator_start(ca)) {
+ bch_dev_allocator_start(ca)) {
percpu_ref_put(&ca->ref);
goto err;
}
err = "unable to allocate journal buckets";
for_each_cache(ca, c, i)
- if (bch_cache_journal_alloc(ca)) {
+ if (bch_dev_journal_alloc(ca)) {
percpu_ref_put(&ca->ref);
goto err;
}
err = "error starting allocator thread";
for_each_cache(ca, c, i)
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
- bch_cache_allocator_start(ca)) {
+ bch_dev_allocator_start(ca)) {
percpu_ref_put(&ca->ref);
goto err;
}
}
recovery_done:
if (c->opts.read_only) {
- bch_cache_set_read_only_sync(c);
+ bch_fs_read_only_sync(c);
} else {
- err = __bch_cache_set_read_write(c);
+ err = __bch_fs_read_write(c);
if (err)
goto err;
}
mutex_unlock(&c->sb_lock);
err = "dynamic fault";
- if (cache_set_init_fault("run_cache_set"))
+ if (bch_fs_init_fault("fs_start"))
goto err;
err = "error creating kobject";
- if (bch_cache_set_online(c))
+ if (bch_fs_online(c))
goto err;
err = "can't bring up blockdev volumes";
goto err;
bch_debug_init_cache_set(c);
- set_bit(CACHE_SET_RUNNING, &c->flags);
+ set_bit(BCH_FS_RUNNING, &c->flags);
bch_attach_backing_devs(c);
- bch_notify_cache_set_read_write(c);
+ bch_notify_fs_read_write(c);
err = NULL;
out:
bch_journal_entries_free(&journal);
}
BUG_ON(!err);
- set_bit(CACHE_SET_ERROR, &c->flags);
+ set_bit(BCH_FS_ERROR, &c->flags);
goto out;
}
-static const char *can_add_cache(struct bch_sb *sb,
- struct cache_set *c)
+static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c)
{
struct bch_sb_field_members *sb_mi;
return NULL;
}
-static const char *can_attach_cache(struct bch_sb *sb, struct cache_set *c)
+static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c)
{
struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb);
struct bch_sb_field_members *dev_mi = bch_sb_get_members(sb);
uuid_le dev_uuid = dev_mi->members[sb->dev_idx].uuid;
const char *err;
- err = can_add_cache(sb, c);
+ err = bch_dev_may_add(sb, c);
if (err)
return err;
/* Cache device */
-bool bch_cache_read_only(struct cache *ca)
+bool bch_dev_read_only(struct cache *ca)
{
struct cache_set *c = ca->set;
struct bch_sb_field_members *mi;
if (ca->mi.state != BCH_MEMBER_STATE_ACTIVE)
return false;
- if (!bch_cache_may_remove(ca)) {
+ if (!bch_dev_may_remove(ca)) {
bch_err(c, "required member %s going RO, forcing fs RO", buf);
- bch_cache_set_read_only_sync(c);
+ bch_fs_read_only_sync(c);
}
trace_bcache_cache_read_only(ca);
* buckets) and then waits for all existing writes to
* complete.
*/
- bch_cache_allocator_stop(ca);
+ bch_dev_allocator_stop(ca);
- bch_cache_group_remove_cache(&c->journal.devs, ca);
+ bch_dev_group_remove(&c->journal.devs, ca);
/*
* Device data write barrier -- no non-meta-data writes should
trace_bcache_cache_read_only_done(ca);
bch_notice(c, "%s read only", bdevname(ca->disk_sb.bdev, buf));
- bch_notify_cache_read_only(ca);
+ bch_notify_dev_read_only(ca);
mutex_lock(&c->sb_lock);
mi = bch_sb_get_members(c->disk_sb);
return true;
}
-static const char *__bch_cache_read_write(struct cache_set *c, struct cache *ca)
+static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
{
lockdep_assert_held(&bch_register_lock);
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
return NULL;
- if (test_bit(CACHE_DEV_REMOVING, &ca->flags))
+ if (test_bit(BCH_DEV_REMOVING, &ca->flags))
return "removing";
trace_bcache_cache_read_write(ca);
- if (bch_cache_allocator_start(ca))
+ if (bch_dev_allocator_start(ca))
return "error starting allocator thread";
if (bch_moving_gc_thread_start(ca))
return "error starting moving GC thread";
- bch_cache_group_add_cache(&c->journal.devs, ca);
+ bch_dev_group_add(&c->journal.devs, ca);
wake_up_process(c->tiering_read);
- bch_notify_cache_read_write(ca);
+ bch_notify_dev_read_write(ca);
trace_bcache_cache_read_write_done(ca);
return NULL;
}
-const char *bch_cache_read_write(struct cache *ca)
+const char *bch_dev_read_write(struct cache *ca)
{
struct cache_set *c = ca->set;
struct bch_sb_field_members *mi;
const char *err;
- err = __bch_cache_read_write(c, ca);
+ err = __bch_dev_read_write(c, ca);
if (err)
return err;
}
/*
- * bch_cache_stop has already returned, so we no longer hold the register
+ * bch_dev_stop has already returned, so we no longer hold the register
* lock at the point this is called.
*/
-void bch_cache_release(struct kobject *kobj)
+void bch_dev_release(struct kobject *kobj)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
kfree(ca);
}
-static void bch_cache_free_work(struct work_struct *work)
+static void bch_dev_free_work(struct work_struct *work)
{
struct cache *ca = container_of(work, struct cache, free_work);
struct cache_set *c = ca->set;
bch_free_super(&ca->disk_sb);
/*
- * bch_cache_stop can be called in the middle of initialization
+ * bch_dev_stop can be called in the middle of initialization
* of the struct cache object.
* As such, not all the sub-structures may be initialized.
* However, they were zeroed when the object was allocated.
kobject_put(&c->kobj);
}
-static void bch_cache_percpu_ref_release(struct percpu_ref *ref)
+static void bch_dev_percpu_ref_release(struct percpu_ref *ref)
{
struct cache *ca = container_of(ref, struct cache, ref);
schedule_work(&ca->free_work);
}
-static void bch_cache_free_rcu(struct rcu_head *rcu)
+static void bch_dev_free_rcu(struct rcu_head *rcu)
{
struct cache *ca = container_of(rcu, struct cache, free_rcu);
/*
* This decrements the ref count to ca, and once the ref count
* is 0 (outstanding bios to the ca also incremented it and
- * decrement it on completion/error), bch_cache_percpu_ref_release
- * is called, and that eventually results in bch_cache_free_work
- * being called, which in turn results in bch_cache_release being
+ * decrement it on completion/error), bch_dev_percpu_ref_release
+ * is called, and that eventually results in bch_dev_free_work
+ * being called, which in turn results in bch_dev_release being
* called.
*
* In particular, these functions won't be called until there are no
percpu_ref_kill(&ca->ref);
}
-static void bch_cache_stop(struct cache *ca)
+static void bch_dev_stop(struct cache *ca)
{
struct cache_set *c = ca->set;
rcu_assign_pointer(c->cache[ca->dev_idx], NULL);
}
- call_rcu(&ca->free_rcu, bch_cache_free_rcu);
+ call_rcu(&ca->free_rcu, bch_dev_free_rcu);
}
-static void bch_cache_remove_work(struct work_struct *work)
+static void bch_dev_remove_work(struct work_struct *work)
{
struct cache *ca = container_of(work, struct cache, remove_work);
struct bch_sb_field_members *mi;
struct cache_set *c = ca->set;
char name[BDEVNAME_SIZE];
- bool force = test_bit(CACHE_DEV_FORCE_REMOVE, &ca->flags);
+ bool force = test_bit(BCH_DEV_FORCE_REMOVE, &ca->flags);
unsigned dev_idx = ca->dev_idx;
bdevname(ca->disk_sb.bdev, name);
/*
* Device should already be RO, now migrate data off:
*
- * XXX: locking is sketchy, bch_cache_read_write() has to check
- * CACHE_DEV_REMOVING bit
+ * XXX: locking is sketchy, bch_dev_read_write() has to check
+ * BCH_DEV_REMOVING bit
*/
if (!ca->mi.has_data) {
/* Nothing to do: */
} else {
bch_err(c, "Remove of %s failed, unable to migrate data off",
name);
- clear_bit(CACHE_DEV_REMOVING, &ca->flags);
+ clear_bit(BCH_DEV_REMOVING, &ca->flags);
return;
}
} else {
bch_err(c, "Remove of %s failed, unable to migrate metadata off",
name);
- clear_bit(CACHE_DEV_REMOVING, &ca->flags);
+ clear_bit(BCH_DEV_REMOVING, &ca->flags);
return;
}
* Ok, really doing the remove:
* Drop device's prio pointer before removing it from superblock:
*/
- bch_notify_cache_removed(ca);
+ bch_notify_dev_removed(ca);
spin_lock(&c->journal.lock);
c->journal.prio_buckets[dev_idx] = 0;
closure_get(&c->cl);
mutex_lock(&bch_register_lock);
- bch_cache_stop(ca);
+ bch_dev_stop(ca);
/*
* RCU barrier between dropping between c->cache and dropping from
closure_put(&c->cl);
}
-bool bch_cache_remove(struct cache *ca, bool force)
+bool bch_dev_remove(struct cache *ca, bool force)
{
mutex_lock(&bch_register_lock);
- if (test_bit(CACHE_DEV_REMOVING, &ca->flags))
+ if (test_bit(BCH_DEV_REMOVING, &ca->flags))
return false;
- if (!bch_cache_may_remove(ca)) {
+ if (!bch_dev_may_remove(ca)) {
bch_err(ca->set, "Can't remove last device in tier %u",
ca->mi.tier);
- bch_notify_cache_remove_failed(ca);
+ bch_notify_dev_remove_failed(ca);
return false;
}
/* First, go RO before we try to migrate data off: */
- bch_cache_read_only(ca);
+ bch_dev_read_only(ca);
if (force)
- set_bit(CACHE_DEV_FORCE_REMOVE, &ca->flags);
- set_bit(CACHE_DEV_REMOVING, &ca->flags);
- bch_notify_cache_removing(ca);
+ set_bit(BCH_DEV_FORCE_REMOVE, &ca->flags);
+ set_bit(BCH_DEV_REMOVING, &ca->flags);
+ bch_notify_dev_removing(ca);
mutex_unlock(&bch_register_lock);
return true;
}
-static int bch_cache_online(struct cache *ca)
+static int bch_dev_online(struct cache *ca)
{
char buf[12];
return 0;
}
-static const char *cache_alloc(struct bcache_superblock *sb,
- struct cache_set *c,
- struct cache **ret)
+static const char *bch_dev_alloc(struct bcache_superblock *sb,
+ struct cache_set *c,
+ struct cache **ret)
{
struct bch_member *member;
size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve;
if (c->sb.nr_devices == 1)
bdevname(sb->bdev, c->name);
- if (cache_set_init_fault("cache_alloc"))
+ if (bch_fs_init_fault("dev_alloc"))
return err;
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
return err;
- if (percpu_ref_init(&ca->ref, bch_cache_percpu_ref_release,
+ if (percpu_ref_init(&ca->ref, bch_dev_percpu_ref_release,
0, GFP_KERNEL)) {
kfree(ca);
return err;
}
- kobject_init(&ca->kobj, &bch_cache_ktype);
+ kobject_init(&ca->kobj, &bch_dev_ktype);
spin_lock_init(&ca->self.lock);
ca->self.nr_devices = 1;
rcu_assign_pointer(ca->self.d[0].dev, ca);
ca->dev_idx = sb->sb->dev_idx;
- INIT_WORK(&ca->free_work, bch_cache_free_work);
- INIT_WORK(&ca->remove_work, bch_cache_remove_work);
+ INIT_WORK(&ca->free_work, bch_dev_free_work);
+ INIT_WORK(&ca->remove_work, bch_dev_remove_work);
spin_lock_init(&ca->freelist_lock);
spin_lock_init(&ca->prio_buckets_lock);
mutex_init(&ca->heap_lock);
INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work);
err = "dynamic fault";
- if (cache_set_init_fault("cache_alloc"))
+ if (bch_fs_init_fault("dev_alloc"))
goto err;
member = bch_sb_get_members(ca->disk_sb.sb)->members +
err = "error creating kobject";
if (c->kobj.state_in_sysfs &&
- bch_cache_online(ca))
+ bch_dev_online(ca))
goto err;
if (ret)
kobject_put(&ca->kobj);
return NULL;
err:
- bch_cache_stop(ca);
+ bch_dev_stop(ca);
return err;
}
-static struct cache_set *cache_set_lookup(uuid_le uuid)
+static struct cache_set *bch_fs_lookup(uuid_le uuid)
{
struct cache_set *c;
lockdep_assert_held(&bch_register_lock);
- list_for_each_entry(c, &bch_cache_sets, list)
+ list_for_each_entry(c, &bch_fs_list, list)
if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le)))
return c;
return NULL;
}
-static const char *register_cache(struct bcache_superblock *sb,
- struct cache_set_opts opts)
-{
- char name[BDEVNAME_SIZE];
- const char *err;
- struct cache_set *c;
- bool allocated_cache_set = false;
-
- err = bch_validate_cache_super(sb);
- if (err)
- return err;
-
- bdevname(sb->bdev, name);
-
- c = cache_set_lookup(sb->sb->uuid);
- if (c) {
- err = can_attach_cache(sb->sb, c);
- if (err)
- return err;
- } else {
- c = bch_cache_set_alloc(sb->sb, opts);
- if (!c)
- return "cannot allocate memory";
-
- allocated_cache_set = true;
- }
-
- err = cache_alloc(sb, c, NULL);
- if (err)
- goto err;
-
- if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) {
- err = run_cache_set(c);
- if (err)
- goto err;
- } else {
- err = "error creating kobject";
- if (bch_cache_set_online(c))
- goto err;
- }
-
- bch_info(c, "started");
- return NULL;
-err:
- if (allocated_cache_set)
- bch_cache_set_stop(c);
- return err;
-}
-
-int bch_cache_set_add_cache(struct cache_set *c, const char *path)
+int bch_dev_add(struct cache_set *c, const char *path)
{
struct bcache_superblock sb;
const char *err;
mutex_lock(&c->sb_lock);
- err = can_add_cache(sb.sb, c);
+ err = bch_dev_may_add(sb.sb, c);
if (err)
goto err_unlock;
if (dynamic_fault("bcache:add:no_slot"))
goto no_slot;
- if (test_bit(CACHE_SET_GC_FAILURE, &c->flags))
+ if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
goto no_slot;
mi = bch_sb_get_members(c->disk_sb);
sb.sb->dev_idx = dev_idx;
sb.sb->nr_devices = nr_devices;
- if (bch_cache_set_mi_update(c, dev_mi->members, nr_devices)) {
+ if (bch_fs_mi_update(c, dev_mi->members, nr_devices)) {
err = "cannot allocate memory";
ret = -ENOMEM;
goto err_unlock;
c->disk_sb->nr_devices = nr_devices;
c->sb.nr_devices = nr_devices;
- err = cache_alloc(&sb, c, &ca);
+ err = bch_dev_alloc(&sb, c, &ca);
if (err)
goto err_unlock;
bch_write_super(c);
err = "journal alloc failed";
- if (bch_cache_journal_alloc(ca))
+ if (bch_dev_journal_alloc(ca))
goto err_put;
- bch_notify_cache_added(ca);
+ bch_notify_dev_added(ca);
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
- err = __bch_cache_read_write(c, ca);
+ err = __bch_dev_read_write(c, ca);
if (err)
goto err_put;
}
mutex_unlock(&bch_register_lock);
return 0;
err_put:
- bch_cache_stop(ca);
+ bch_dev_stop(ca);
err_unlock:
mutex_unlock(&c->sb_lock);
err_unlock_register:
return ret ?: -EINVAL;
}
-const char *bch_register_cache_set(char * const *devices, unsigned nr_devices,
- struct cache_set_opts opts,
- struct cache_set **ret)
+const char *bch_fs_open(char * const *devices, unsigned nr_devices,
+ struct bch_opts opts, struct cache_set **ret)
{
const char *err;
struct cache_set *c = NULL;
}
err = "cache set already registered";
- if (cache_set_lookup(sb->sb->uuid))
+ if (bch_fs_lookup(sb->sb->uuid))
goto err_unlock;
err = "cannot allocate memory";
- c = bch_cache_set_alloc(sb[0].sb, opts);
+ c = bch_fs_alloc(sb[0].sb, opts);
if (!c)
goto err_unlock;
for (i = 0; i < nr_devices; i++) {
- err = cache_alloc(&sb[i], c, NULL);
+ err = bch_dev_alloc(&sb[i], c, NULL);
if (err)
goto err_unlock;
}
err = "insufficient devices";
- if (cache_set_nr_online_devices(c) != cache_set_nr_devices(c))
+ if (bch_fs_nr_online_devices(c) != bch_fs_nr_devices(c))
goto err_unlock;
- err = run_cache_set(c);
+ err = bch_fs_start(c);
if (err)
goto err_unlock;
err = "error creating kobject";
- if (bch_cache_set_online(c))
+ if (bch_fs_online(c))
goto err_unlock;
if (ret) {
return err;
err_unlock:
if (c)
- bch_cache_set_stop(c);
+ bch_fs_stop(c);
mutex_unlock(&bch_register_lock);
err:
for (i = 0; i < nr_devices; i++)
goto out;
}
-const char *bch_register_one(const char *path)
+static const char *__bch_fs_open_incremental(struct bcache_superblock *sb,
+ struct bch_opts opts)
+{
+ char name[BDEVNAME_SIZE];
+ const char *err;
+ struct cache_set *c;
+ bool allocated_cache_set = false;
+
+ err = bch_validate_cache_super(sb);
+ if (err)
+ return err;
+
+ bdevname(sb->bdev, name);
+
+ c = bch_fs_lookup(sb->sb->uuid);
+ if (c) {
+ err = bch_dev_in_fs(sb->sb, c);
+ if (err)
+ return err;
+ } else {
+ c = bch_fs_alloc(sb->sb, opts);
+ if (!c)
+ return "cannot allocate memory";
+
+ allocated_cache_set = true;
+ }
+
+ err = bch_dev_alloc(sb, c, NULL);
+ if (err)
+ goto err;
+
+ if (bch_fs_nr_online_devices(c) == bch_fs_nr_devices(c)) {
+ err = bch_fs_start(c);
+ if (err)
+ goto err;
+ } else {
+ err = "error creating kobject";
+ if (bch_fs_online(c))
+ goto err;
+ }
+
+ bch_info(c, "started");
+ return NULL;
+err:
+ if (allocated_cache_set)
+ bch_fs_stop(c);
+ return err;
+}
+
+const char *bch_fs_open_incremental(const char *path)
{
struct bcache_superblock sb;
- struct cache_set_opts opts = cache_set_opts_empty();
+ struct bch_opts opts = bch_opts_empty();
const char *err;
mutex_lock(&bch_register_lock);
if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version)))
err = bch_backing_dev_register(&sb);
else
- err = register_cache(&sb, opts);
+ err = __bch_fs_open_incremental(&sb, opts);
bch_free_super(&sb);
err:
if (!(path = kstrndup(skip_spaces(buffer), size, GFP_KERNEL)))
goto err;
- err = bch_register_one(strim(path));
+ err = bch_fs_open_incremental(strim(path));
if (err)
goto err;
mutex_lock(&bch_register_lock);
- if (!list_empty(&bch_cache_sets))
+ if (!list_empty(&bch_fs_list))
pr_info("Setting all devices read only:");
- list_for_each_entry(c, &bch_cache_sets, list)
- bch_cache_set_read_only(c);
+ list_for_each_entry(c, &bch_fs_list, list)
+ bch_fs_read_only(c);
- list_for_each_entry(c, &bch_cache_sets, list)
- bch_cache_set_read_only_sync(c);
+ list_for_each_entry(c, &bch_fs_list, list)
+ bch_fs_read_only_sync(c);
mutex_unlock(&bch_register_lock);
}
bch_debug_exit();
bch_fs_exit();
bch_blockdev_exit();
+ bch_chardev_exit();
if (bcache_kset)
kset_unregister(bcache_kset);
if (bcache_io_wq)
destroy_workqueue(bcache_io_wq);
- if (!IS_ERR_OR_NULL(bch_chardev_class))
- device_destroy(bch_chardev_class,
- MKDEV(bch_chardev_major, 0));
- if (!IS_ERR_OR_NULL(bch_chardev_class))
- class_destroy(bch_chardev_class);
- if (bch_chardev_major > 0)
- unregister_chrdev(bch_chardev_major, "bcache");
if (!IS_ERR_OR_NULL(bch_sha256))
crypto_free_shash(bch_sha256);
unregister_reboot_notifier(&reboot);
if (IS_ERR(bch_sha256))
goto err;
- bch_chardev_major = register_chrdev(0, "bcache-ctl", &bch_chardev_fops);
- if (bch_chardev_major < 0)
- goto err;
-
- bch_chardev_class = class_create(THIS_MODULE, "bcache");
- if (IS_ERR(bch_chardev_class))
- goto err;
-
- bch_chardev = device_create(bch_chardev_class, NULL,
- MKDEV(bch_chardev_major, 255),
- NULL, "bcache-ctl");
- if (IS_ERR(bch_chardev))
- goto err;
-
if (!(bcache_io_wq = create_freezable_workqueue("bcache_io")) ||
!(bcache_kset = kset_create_and_add("bcache", NULL, fs_kobj)) ||
sysfs_create_files(&bcache_kset->kobj, files) ||
+ bch_chardev_init() ||
bch_blockdev_init() ||
bch_fs_init() ||
bch_debug_init())
(ca = bch_get_next_cache(c, &(iter))); \
percpu_ref_put(&ca->ref), (iter)++)
-static inline bool bch_cache_may_remove(struct cache *ca)
+static inline bool bch_dev_may_remove(struct cache *ca)
{
struct cache_set *c = ca->set;
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
rcu_access_pointer(tier->d[0].dev) != ca;
}
-void bch_cache_set_release(struct kobject *);
-void bch_cache_release(struct kobject *);
+void bch_dev_release(struct kobject *);
-void bch_cache_set_unregister(struct cache_set *);
-void bch_cache_set_stop(struct cache_set *);
+bool bch_dev_read_only(struct cache *);
+const char *bch_dev_read_write(struct cache *);
+bool bch_dev_remove(struct cache *, bool force);
+int bch_dev_add(struct cache_set *, const char *);
-const char *bch_register_one(const char *path);
-const char *bch_register_cache_set(char * const *, unsigned,
- struct cache_set_opts,
- struct cache_set **);
+void bch_fs_detach(struct cache_set *);
-bool bch_cache_set_read_only(struct cache_set *);
-bool bch_cache_set_emergency_read_only(struct cache_set *);
-void bch_cache_set_read_only_sync(struct cache_set *);
-const char *bch_cache_set_read_write(struct cache_set *);
+bool bch_fs_read_only(struct cache_set *);
+bool bch_fs_emergency_read_only(struct cache_set *);
+void bch_fs_read_only_sync(struct cache_set *);
+const char *bch_fs_read_write(struct cache_set *);
-bool bch_cache_read_only(struct cache *);
-const char *bch_cache_read_write(struct cache *);
-bool bch_cache_remove(struct cache *, bool force);
-int bch_cache_set_add_cache(struct cache_set *, const char *);
+void bch_fs_release(struct kobject *);
+void bch_fs_stop(struct cache_set *);
+void bch_fs_stop_sync(struct cache_set *);
+
+const char *bch_fs_open(char * const *, unsigned, struct bch_opts,
+ struct cache_set **);
+const char *bch_fs_open_incremental(const char *path);
extern struct mutex bch_register_lock;
-extern struct list_head bch_cache_sets;
-extern struct idr bch_cache_set_minor;
+extern struct list_head bch_fs_list;
extern struct workqueue_struct *bcache_io_wq;
extern struct crypto_shash *bch_sha256;
-extern struct kobj_type bch_cache_set_ktype;
-extern struct kobj_type bch_cache_set_internal_ktype;
-extern struct kobj_type bch_cache_set_time_stats_ktype;
-extern struct kobj_type bch_cache_set_opts_dir_ktype;
-extern struct kobj_type bch_cache_ktype;
+extern struct kobj_type bch_fs_ktype;
+extern struct kobj_type bch_fs_internal_ktype;
+extern struct kobj_type bch_fs_time_stats_ktype;
+extern struct kobj_type bch_fs_opts_dir_ktype;
+extern struct kobj_type bch_dev_ktype;
#endif /* _BCACHE_SUPER_H */
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _mode, ...) \
static struct attribute sysfs_opt_##_name = { \
- .name = #_name, \
- .mode = S_IRUGO|(_perm ? S_IWUSR : 0) \
+ .name = #_name, .mode = _mode, \
};
BCH_VISIBLE_OPTS()
if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid))
return -EINVAL;
- list_for_each_entry(c, &bch_cache_sets, list) {
+ list_for_each_entry(c, &bch_fs_list, list) {
v = bch_cached_dev_attach(dc, c);
if (!v)
return size;
return (bytes * 100) / btree_bytes(c);
}
-static size_t bch_cache_size(struct cache_set *c)
+static size_t bch_btree_cache_size(struct cache_set *c)
{
size_t ret = 0;
struct btree *b;
return ret;
}
-static unsigned bch_cache_available_percent(struct cache_set *c)
+static unsigned bch_fs_available_percent(struct cache_set *c)
{
return div64_u64((u64) sectors_available(c) * 100,
c->capacity ?: 1);
}
#endif
-static ssize_t show_cache_set_alloc_debug(struct cache_set *c, char *buf)
+static ssize_t show_fs_alloc_debug(struct cache_set *c, char *buf)
{
struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c);
compressed_sectors_uncompressed << 9);
}
-SHOW(bch_cache_set)
+SHOW(bch_fs)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
sysfs_hprint(btree_node_size, c->sb.btree_node_size << 9);
sysfs_print(btree_node_size_bytes, c->sb.btree_node_size << 9);
- sysfs_hprint(btree_cache_size, bch_cache_size(c));
- sysfs_print(cache_available_percent, bch_cache_available_percent(c));
+ sysfs_hprint(btree_cache_size, bch_btree_cache_size(c));
+ sysfs_print(cache_available_percent, bch_fs_available_percent(c));
sysfs_print(btree_gc_running, c->gc_pos.phase != GC_PHASE_DONE);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
- if (!test_bit(CACHE_SET_RUNNING, &c->flags))
+ if (!test_bit(BCH_FS_RUNNING, &c->flags))
return -EPERM;
if (attr == &sysfs_bset_tree_stats)
return bch_bset_print_stats(c, buf);
if (attr == &sysfs_alloc_debug)
- return show_cache_set_alloc_debug(c, buf);
+ return show_fs_alloc_debug(c, buf);
sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level);
sysfs_print(root_usage_percent, bch_root_usage(c));
return 0;
}
-STORE(__bch_cache_set)
+STORE(__bch_fs)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
if (attr == &sysfs_unregister) {
- bch_cache_set_unregister(c);
+ bch_fs_detach(c);
return size;
}
if (attr == &sysfs_stop) {
- bch_cache_set_stop(c);
+ bch_fs_stop(c);
return size;
}
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
- if (!test_bit(CACHE_SET_RUNNING, &c->flags))
+ if (!test_bit(BCH_FS_RUNNING, &c->flags))
return -EPERM;
- if (test_bit(CACHE_SET_STOPPING, &c->flags))
+ if (test_bit(BCH_FS_STOPPING, &c->flags))
return -EINTR;
if (attr == &sysfs_blockdev_volume_create) {
return size;
}
-STORE(bch_cache_set)
+STORE(bch_fs)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
mutex_lock(&bch_register_lock);
- size = __bch_cache_set_store(kobj, attr, buf, size);
+ size = __bch_fs_store(kobj, attr, buf, size);
mutex_unlock(&bch_register_lock);
if (attr == &sysfs_add_device) {
char *path = kstrdup(buf, GFP_KERNEL);
- int r = bch_cache_set_add_cache(c, strim(path));
+ int r = bch_dev_add(c, strim(path));
kfree(path);
if (r)
return size;
}
-static struct attribute *bch_cache_set_files[] = {
+static struct attribute *bch_fs_files[] = {
&sysfs_unregister,
&sysfs_stop,
&sysfs_journal_write_delay_ms,
&sysfs_journal_flush,
NULL
};
-KTYPE(bch_cache_set);
+KTYPE(bch_fs);
/* internal dir - just a wrapper */
-SHOW(bch_cache_set_internal)
+SHOW(bch_fs_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
- return bch_cache_set_show(&c->kobj, attr, buf);
+ return bch_fs_show(&c->kobj, attr, buf);
}
-STORE(bch_cache_set_internal)
+STORE(bch_fs_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
- return bch_cache_set_store(&c->kobj, attr, buf, size);
+ return bch_fs_store(&c->kobj, attr, buf, size);
}
-static void bch_cache_set_internal_release(struct kobject *k)
+static void bch_fs_internal_release(struct kobject *k)
{
}
-static struct attribute *bch_cache_set_internal_files[] = {
+static struct attribute *bch_fs_internal_files[] = {
&sysfs_journal_debug,
&sysfs_alloc_debug,
NULL
};
-KTYPE(bch_cache_set_internal);
+KTYPE(bch_fs_internal);
/* options */
-SHOW(bch_cache_set_opts_dir)
+SHOW(bch_fs_opts_dir)
{
struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
- if (attr == &sysfs_opt_##_name) \
- return _choices == bch_bool_opt || _choices == bch_uint_opt\
- ? snprintf(buf, PAGE_SIZE, "%i\n", c->opts._name)\
- : bch_snprint_string_list(buf, PAGE_SIZE, \
- _choices, c->opts._name);\
-
- BCH_VISIBLE_OPTS()
-#undef BCH_OPT
-
- return 0;
+ return bch_opt_show(&c->opts, attr->name, buf, PAGE_SIZE);
}
-STORE(bch_cache_set_opts_dir)
+STORE(bch_fs_opts_dir)
{
struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
+ const struct bch_option *opt;
+ enum bch_opt_id id;
+ u64 v;
+
+ id = bch_parse_sysfs_opt(attr->name, buf, &v);
+ if (id < 0)
+ return id;
+
+ opt = &bch_opt_table[id];
+
+ mutex_lock(&c->sb_lock);
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
- if (attr == &sysfs_opt_##_name) { \
- ssize_t v = (_choices == bch_bool_opt || \
- _choices == bch_uint_opt) \
- ? strtoul_restrict_or_return(buf, _min, _max - 1)\
- : bch_read_string_list(buf, _choices); \
- \
- if (v < 0) \
- return v; \
- \
- mutex_lock(&c->sb_lock); \
- if (attr == &sysfs_opt_compression) { \
- int ret = bch_check_set_has_compressed_data(c, v);\
- if (ret) { \
- mutex_unlock(&c->sb_lock); \
- return ret; \
- } \
- } \
- \
- if (_sb_opt##_BITS && v != _sb_opt(c->disk_sb)) { \
- SET_##_sb_opt(c->disk_sb, v); \
- bch_write_super(c); \
- } \
- \
- c->opts._name = v; \
- mutex_unlock(&c->sb_lock); \
- \
- return size; \
+ if (id == Opt_compression) {
+ int ret = bch_check_set_has_compressed_data(c, v);
+ if (ret) {
+ mutex_unlock(&c->sb_lock);
+ return ret;
+ }
}
- BCH_VISIBLE_OPTS()
-#undef BCH_OPT
+ if (opt->set_sb != SET_NO_SB_OPT) {
+ opt->set_sb(c->disk_sb, v);
+ bch_write_super(c);
+ }
+
+ bch_opt_set(&c->opts, id, v);
+
+ mutex_unlock(&c->sb_lock);
return size;
}
-static void bch_cache_set_opts_dir_release(struct kobject *k)
+static void bch_fs_opts_dir_release(struct kobject *k)
{
}
-static struct attribute *bch_cache_set_opts_dir_files[] = {
-#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+static struct attribute *bch_fs_opts_dir_files[] = {
+#define BCH_OPT(_name, ...) \
&sysfs_opt_##_name,
BCH_VISIBLE_OPTS()
NULL
};
-KTYPE(bch_cache_set_opts_dir);
+KTYPE(bch_fs_opts_dir);
/* time stats */
-SHOW(bch_cache_set_time_stats)
+SHOW(bch_fs_time_stats)
{
struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
return 0;
}
-STORE(bch_cache_set_time_stats)
+STORE(bch_fs_time_stats)
{
struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
return size;
}
-static void bch_cache_set_time_stats_release(struct kobject *k)
+static void bch_fs_time_stats_release(struct kobject *k)
{
}
-static struct attribute *bch_cache_set_time_stats_files[] = {
+static struct attribute *bch_fs_time_stats_files[] = {
#define BCH_TIME_STAT(name, frequency_units, duration_units) \
sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
BCH_TIME_STATS()
NULL
};
-KTYPE(bch_cache_set_time_stats);
+KTYPE(bch_fs_time_stats);
typedef unsigned (bucket_map_fn)(struct cache *, struct bucket *, void *);
return ret;
}
-static ssize_t show_cache_alloc_debug(struct cache *ca, char *buf)
+static ssize_t show_dev_alloc_debug(struct cache *ca, char *buf)
{
struct cache_set *c = ca->set;
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
return ret;
}
-SHOW(bch_cache)
+SHOW(bch_dev)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
struct cache_set *c = ca->set;
if (attr == &sysfs_state_rw)
return bch_snprint_string_list(buf, PAGE_SIZE,
- bch_cache_state,
+ bch_dev_state,
ca->mi.state);
if (attr == &sysfs_read_priority_stats)
if (attr == &sysfs_reserve_stats)
return show_reserve_stats(ca, buf);
if (attr == &sysfs_alloc_debug)
- return show_cache_alloc_debug(ca, buf);
+ return show_dev_alloc_debug(ca, buf);
return 0;
}
-STORE(__bch_cache)
+STORE(__bch_dev)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
struct cache_set *c = ca->set;
if (attr == &sysfs_state_rw) {
char name[BDEVNAME_SIZE];
const char *err = NULL;
- ssize_t v = bch_read_string_list(buf, bch_cache_state);
+ ssize_t v = bch_read_string_list(buf, bch_dev_state);
if (v < 0)
return v;
switch (v) {
case BCH_MEMBER_STATE_ACTIVE:
- err = bch_cache_read_write(ca);
+ err = bch_dev_read_write(ca);
break;
case BCH_MEMBER_STATE_RO:
- bch_cache_read_only(ca);
+ bch_dev_read_only(ca);
break;
case BCH_MEMBER_STATE_FAILED:
case BCH_MEMBER_STATE_SPARE:
*/
pr_err("can't set %s %s: not supported",
bdevname(ca->disk_sb.bdev, name),
- bch_cache_state[v]);
+ bch_dev_state[v]);
return -EINVAL;
}
if (err) {
pr_err("can't set %s %s: %s",
bdevname(ca->disk_sb.bdev, name),
- bch_cache_state[v], err);
+ bch_dev_state[v], err);
return -EINVAL;
}
}
if (!strncmp(buf, "force", 5) &&
(buf[5] == '\0' || buf[5] == '\n'))
force = true;
- bch_cache_remove(ca, force);
+ bch_dev_remove(ca, force);
}
if (attr == &sysfs_clear_stats) {
return size;
}
-STORE_LOCKED(bch_cache)
+STORE_LOCKED(bch_dev)
-static struct attribute *bch_cache_files[] = {
+static struct attribute *bch_dev_files[] = {
&sysfs_uuid,
&sysfs_unregister,
&sysfs_bucket_size,
sysfs_pd_controller_files(copy_gc),
NULL
};
-KTYPE(bch_cache);
+KTYPE(bch_dev);
{
struct dirty_io *io = container_of(bio, struct dirty_io, bio);
- cache_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read");
+ bch_dev_nonfatal_io_err_on(bio->bi_error, io->ca, "writeback read");
bch_account_io_completion(io->ca);
}
}
+#ifndef NO_BCACHE_WRITEBACK
+
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, u64, int);
void bch_writeback_recalc_oldest_gens(struct cache_set *);
int bch_cached_dev_writeback_init(struct cached_dev *);
int bch_cached_dev_writeback_start(struct cached_dev *);
+#else
+
+static inline void bcache_dev_sectors_dirty_add(struct cache_set *c,
+ unsigned i, u64 o, int n) {}
+static inline void bch_writeback_recalc_oldest_gens(struct cache_set *c) {}
+static inline void bch_sectors_dirty_init(struct cached_dev *dc,
+ struct cache_set *c) {}
+static inline void bch_cached_dev_writeback_stop(struct cached_dev *dc) {}
+static inline void bch_cached_dev_writeback_free(struct cached_dev *dc) {}
+static inline int bch_cached_dev_writeback_init(struct cached_dev *dc)
+{
+ return 0;
+}
+static inline int bch_cached_dev_writeback_start(struct cached_dev *dc)
+{
+ return 0;
+}
+
+#endif
+
#endif
return ret;
}
-int bch_xattr_set(struct cache_set *c, struct inode *inode,
+int __bch_xattr_set(struct cache_set *c, u64 inum,
+ const struct bch_hash_info *hash_info,
const char *name, const void *value, size_t size,
- int flags, int type)
+ int flags, int type, u64 *journal_seq)
{
- struct bch_inode_info *ei = to_bch_ei(inode);
struct xattr_search_key search = X_SEARCH(type, name, strlen(name));
int ret;
if (!value) {
- ret = bch_hash_delete(xattr_hash_desc, &ei->str_hash,
- c, ei->vfs_inode.i_ino,
- &ei->journal_seq, &search);
+ ret = bch_hash_delete(xattr_hash_desc, hash_info,
+ c, inum,
+ journal_seq, &search);
} else {
struct bkey_i_xattr *xattr;
unsigned u64s = BKEY_U64s +
memcpy(xattr->v.x_name, search.name.name, search.name.len);
memcpy(xattr_val(&xattr->v), value, size);
- ret = bch_hash_set(xattr_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &ei->journal_seq,
+ ret = bch_hash_set(xattr_hash_desc, hash_info, c,
+ inum, journal_seq,
&xattr->k_i,
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
(flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
return ret;
}
+int bch_xattr_set(struct cache_set *c, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags, int type)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+
+ return __bch_xattr_set(c, inode->i_ino, &ei->str_hash,
+ name, value, size, flags, type,
+ &ei->journal_seq);
+}
+
static const struct xattr_handler *bch_xattr_type_to_handler(unsigned);
static size_t bch_xattr_emit(struct dentry *dentry,
struct dentry;
struct xattr_handler;
+struct bch_hash_info;
int bch_xattr_get(struct cache_set *, struct inode *,
const char *, void *, size_t, int);
+int __bch_xattr_set(struct cache_set *, u64, const struct bch_hash_info *,
+ const char *, const void *, size_t, int, int, u64 *);
int bch_xattr_set(struct cache_set *, struct inode *,
const char *, const void *, size_t, int, int);
ssize_t bch_xattr_list(struct dentry *, char *, size_t);
+++ /dev/null
-/*
- * Generic wait-for-completion handler;
- *
- * It differs from semaphores in that their default case is the opposite,
- * wait_for_completion default blocks whereas semaphore default non-block. The
- * interface also makes it easy to 'complete' multiple waiting threads,
- * something which isn't entirely natural for semaphores.
- *
- * But more importantly, the primitive documents the usage. Semaphores would
- * typically be used for exclusion which gives rise to priority inversion.
- * Waiting for completion is a typically sync point, but not an exclusion point.
- */
-
-#include <linux/sched.h>
-#include <linux/completion.h>
-
-/**
- * complete: - signals a single thread waiting on this completion
- * @x: holds the state of this particular completion
- *
- * This will wake up a single thread waiting on this completion. Threads will be
- * awakened in the same order in which they were queued.
- *
- * See also complete_all(), wait_for_completion() and related routines.
- *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
- */
-void complete(struct completion *x)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&x->wait.lock, flags);
- x->done++;
- __wake_up_locked(&x->wait, TASK_NORMAL, 1);
- spin_unlock_irqrestore(&x->wait.lock, flags);
-}
-EXPORT_SYMBOL(complete);
-
-/**
- * complete_all: - signals all threads waiting on this completion
- * @x: holds the state of this particular completion
- *
- * This will wake up all threads waiting on this particular completion event.
- *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
- */
-void complete_all(struct completion *x)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&x->wait.lock, flags);
- x->done += UINT_MAX/2;
- __wake_up_locked(&x->wait, TASK_NORMAL, 0);
- spin_unlock_irqrestore(&x->wait.lock, flags);
-}
-EXPORT_SYMBOL(complete_all);
-
-static inline long __sched
-do_wait_for_common(struct completion *x,
- long (*action)(long), long timeout, int state)
-{
- if (!x->done) {
- DECLARE_WAITQUEUE(wait, current);
-
- __add_wait_queue_tail_exclusive(&x->wait, &wait);
- do {
- __set_current_state(state);
- spin_unlock_irq(&x->wait.lock);
- timeout = action(timeout);
- spin_lock_irq(&x->wait.lock);
- } while (!x->done && timeout);
- __remove_wait_queue(&x->wait, &wait);
- if (!x->done)
- return timeout;
- }
- x->done--;
- return timeout ?: 1;
-}
-
-static inline long __sched
-__wait_for_common(struct completion *x,
- long (*action)(long), long timeout, int state)
-{
- might_sleep();
-
- spin_lock_irq(&x->wait.lock);
- timeout = do_wait_for_common(x, action, timeout, state);
- spin_unlock_irq(&x->wait.lock);
- return timeout;
-}
-
-static long __sched
-wait_for_common(struct completion *x, long timeout, int state)
-{
- return __wait_for_common(x, schedule_timeout, timeout, state);
-}
-
-static long __sched
-wait_for_common_io(struct completion *x, long timeout, int state)
-{
- return __wait_for_common(x, io_schedule_timeout, timeout, state);
-}
-
-/**
- * wait_for_completion: - waits for completion of a task
- * @x: holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It is NOT
- * interruptible and there is no timeout.
- *
- * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
- * and interrupt capability. Also see complete().
- */
-void __sched wait_for_completion(struct completion *x)
-{
- wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion);
-
-/**
- * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
- * @x: holds the state of this particular completion
- * @timeout: timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. The timeout is in jiffies. It is not
- * interruptible.
- *
- * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
- * till timeout) if completed.
- */
-unsigned long __sched
-wait_for_completion_timeout(struct completion *x, unsigned long timeout)
-{
- return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_timeout);
-
-/**
- * wait_for_completion_io: - waits for completion of a task
- * @x: holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It is NOT
- * interruptible and there is no timeout. The caller is accounted as waiting
- * for IO (which traditionally means blkio only).
- */
-void __sched wait_for_completion_io(struct completion *x)
-{
- wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_io);
-
-/**
- * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
- * @x: holds the state of this particular completion
- * @timeout: timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. The timeout is in jiffies. It is not
- * interruptible. The caller is accounted as waiting for IO (which traditionally
- * means blkio only).
- *
- * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
- * till timeout) if completed.
- */
-unsigned long __sched
-wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
-{
- return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_io_timeout);
-
-/**
- * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
- * @x: holds the state of this particular completion
- *
- * This waits for completion of a specific task to be signaled. It is
- * interruptible.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if completed.
- */
-int __sched wait_for_completion_interruptible(struct completion *x)
-{
- wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
- return 0;
-}
-EXPORT_SYMBOL(wait_for_completion_interruptible);
-
-/**
- * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
- * @x: holds the state of this particular completion
- * @timeout: timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be signaled or for a
- * specified timeout to expire. It is interruptible. The timeout is in jiffies.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
- * or number of jiffies left till timeout) if completed.
- */
-long __sched
-wait_for_completion_interruptible_timeout(struct completion *x,
- unsigned long timeout)
-{
- return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
-
-/**
- * wait_for_completion_killable: - waits for completion of a task (killable)
- * @x: holds the state of this particular completion
- *
- * This waits to be signaled for completion of a specific task. It can be
- * interrupted by a kill signal.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if completed.
- */
-int __sched wait_for_completion_killable(struct completion *x)
-{
- wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
- return 0;
-}
-EXPORT_SYMBOL(wait_for_completion_killable);
-
-/**
- * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
- * @x: holds the state of this particular completion
- * @timeout: timeout value in jiffies
- *
- * This waits for either a completion of a specific task to be
- * signaled or for a specified timeout to expire. It can be
- * interrupted by a kill signal. The timeout is in jiffies.
- *
- * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
- * or number of jiffies left till timeout) if completed.
- */
-long __sched
-wait_for_completion_killable_timeout(struct completion *x,
- unsigned long timeout)
-{
- return wait_for_common(x, timeout, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(wait_for_completion_killable_timeout);
-
-/**
- * try_wait_for_completion - try to decrement a completion without blocking
- * @x: completion structure
- *
- * Return: 0 if a decrement cannot be done without blocking
- * 1 if a decrement succeeded.
- *
- * If a completion is being used as a counting completion,
- * attempt to decrement the counter without blocking. This
- * enables us to avoid waiting if the resource the completion
- * is protecting is not available.
- */
-bool try_wait_for_completion(struct completion *x)
-{
- unsigned long flags;
- int ret = 1;
-
- /*
- * Since x->done will need to be locked only
- * in the non-blocking case, we check x->done
- * first without taking the lock so we can
- * return early in the blocking case.
- */
- if (!READ_ONCE(x->done))
- return 0;
-
- spin_lock_irqsave(&x->wait.lock, flags);
- if (!x->done)
- ret = 0;
- else
- x->done--;
- spin_unlock_irqrestore(&x->wait.lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(try_wait_for_completion);
-
-/**
- * completion_done - Test to see if a completion has any waiters
- * @x: completion structure
- *
- * Return: 0 if there are waiters (wait_for_completion() in progress)
- * 1 if there are no waiters.
- *
- */
-bool completion_done(struct completion *x)
-{
- if (!READ_ONCE(x->done))
- return false;
-
- /*
- * If ->done, we need to wait for complete() to release ->wait.lock
- * otherwise we can end up freeing the completion before complete()
- * is done referencing it.
- *
- * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
- * the loads of ->done and ->wait.lock such that we cannot observe
- * the lock before complete() acquires it while observing the ->done
- * after it's acquired the lock.
- */
- smp_rmb();
- //spin_unlock_wait(&x->wait.lock);
- spin_lock(&x->wait.lock);
- spin_unlock(&x->wait.lock);
- return true;
-}
-EXPORT_SYMBOL(completion_done);
*/
#include <linux/log2.h>
-#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/lz4.h>
#include <asm/unaligned.h>
return hash.base + offset;
}
-static inline const u8 *try_match(const struct lz4_hash_table hash,
- const u8 *ip)
-{
- const u8 *ref = hash.add(hash, ip);
-
- return ref >= ip - MAX_DISTANCE &&
- A32(ref) == A32(ip) ? ref : NULL;
-}
-
static inline const u8 *find_match(const struct lz4_hash_table hash,
const u8 **ip, const u8 *anchor,
- const u8 *start, const u8 *end)
+ const u8 *start, const u8 *mflimit)
{
-
int findmatchattempts = (1U << SKIPSTRENGTH) + 3;
- const u8 *next_ip = *ip, *ref;
-
- do {
- *ip = next_ip;
- next_ip += findmatchattempts++ >> SKIPSTRENGTH;
-
- if (unlikely(next_ip > end))
- return NULL;
- } while (!(ref = try_match(hash, *ip)));
-
- /* Catch up */
- while (*ip > anchor &&
- ref > start &&
- unlikely((*ip)[-1] == ref[-1])) {
- (*ip)--;
- ref--;
+
+ while (*ip <= mflimit) {
+ const u8 *ref = hash.add(hash, *ip);
+
+ if (ref >= *ip - MAX_DISTANCE && A32(ref) == A32(*ip)) {
+ /* found match: */
+ while (*ip > anchor &&
+ ref > start &&
+ unlikely((*ip)[-1] == ref[-1])) {
+ (*ip)--;
+ ref--;
+ }
+
+ return ref;
+ }
+
+ *ip += findmatchattempts++ >> SKIPSTRENGTH;
}
- return ref;
+ return NULL;
+}
+
+static inline int length_len(unsigned length)
+{
+ return length / 255 + 1;
}
/*
const u8 *src, size_t src_len,
u8 *dst, size_t *dst_len)
{
- const u8 *ip = src;
- const u8 *anchor = ip, *ref;
+ const u8 *ip = src, *anchor = ip, *ref;
const u8 *const iend = ip + src_len;
const u8 *const mflimit = iend - MFLIMIT;
const u8 *const matchlimit = iend - LASTLITERALS;
- size_t maxoutputsize = *dst_len;
- u8 *op = dst;
- u8 *const oend = op + maxoutputsize;
- int length;
- u8 *token;
+ u8 *op = dst, *token;
+ u8 *const oend = op + *dst_len;
+ size_t literal_len, match_len, match_offset;
/* Init */
- if (src_len < MINLENGTH)
- goto _last_literals;
-
memset(hash.ctx, 0, LZ4_MEM_COMPRESS);
hash.add(hash, ip);
- /* Main Loop */
- while (1) {
- /* Starting a literal: */
- anchor = ip++;
- ref = find_match(hash, &ip, anchor, src, mflimit);
- if (!ref)
- goto _last_literals;
+ /* Always start with a literal: */
+ ip++;
+ while ((ref = find_match(hash, &ip, anchor, src, mflimit))) {
/*
* We found a match; @ip now points to the match and @ref points
* to the prior part of the input we matched with. Everything up
* to @anchor has been encoded; the range from @anchor to @ip
* didn't match and now has to be encoded as a literal:
*/
- length = ip - anchor;
- token = op++;
-
- /* check output limit */
- if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
- (length >> 8) > oend))
- return -(ip - src);
-
- *token = encode_length(&op, length) << ML_BITS;
-
- /* Copy Literals */
- MEMCPY_ADVANCE_CHUNKED(op, anchor, length);
-
- /* Encode matches: */
- while (1) {
- /* Match offset: */
- PUT_LE16_ADVANCE(op, ip - ref);
-
- /* MINMATCH bytes already matched from find_match(): */
- ip += MINMATCH;
- ref += MINMATCH;
-
- length = common_length(ip, ref, matchlimit);
+ literal_len = ip - anchor;
+ match_offset = ip - ref;
- /* Check output limit */
- if (unlikely(op + (1 + LASTLITERALS) +
- (length >> 8) > oend))
- return -(ip - src);
+ /* MINMATCH bytes already matched from find_match(): */
+ ip += MINMATCH;
+ ref += MINMATCH;
+ match_len = common_length(ip, ref, matchlimit);
+ ip += match_len;
- ip += length;
+ /* check output limit */
+ if (unlikely(op +
+ 1 + /* token */
+ 2 + /* match ofset */
+ literal_len +
+ length_len(literal_len) +
+ length_len(match_len) +
+ LASTLITERALS > oend))
+ break;
- *token += encode_length(&op, length);
+ token = op++;
+ *token = encode_length(&op, literal_len) << ML_BITS;
+ MEMCPY_ADVANCE_CHUNKED(op, anchor, literal_len);
+ PUT_LE16_ADVANCE(op, match_offset);
+ *token += encode_length(&op, match_len);
- /* Test end of chunk */
- if (ip > mflimit) {
- anchor = ip;
- break;
- }
+ anchor = ip;
+ }
- /* Fill table */
- hash.add(hash, ip - 2);
+ /* Encode remaining input as literal: */
+ literal_len = iend - anchor;
+ if (unlikely(op +
+ 1 +
+ literal_len +
+ length_len(literal_len) > oend)) {
+ /* Return how much would be able to fit: */
+ ssize_t remaining = oend - op;
+ ssize_t encoded = anchor - src;
- /* Test next position */
- ref = try_match(hash, ip);
- if (!ref)
- break;
+ remaining -= length_len(remaining) + 1;
- token = op++;
- *token = 0;
- }
+ return -max(encoded + remaining, 1L);
}
-_last_literals:
- /* Encode Last Literals */
- length = iend - anchor;
- if ((op - dst) + length + 1 +
- ((length + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
- return -(ip - src);
-
token = op++;
- *token = encode_length(&op, length) << ML_BITS;
- MEMCPY_ADVANCE(op, anchor, iend - anchor);
+ *token = encode_length(&op, literal_len) << ML_BITS;
+ MEMCPY_ADVANCE(op, anchor, literal_len);
/* End */
+ BUG_ON(op > oend);
*dst_len = op - dst;
return 0;
}
return lz4_compressctx(hash, src, src_len, dst, dst_len);
}
}
-EXPORT_SYMBOL(lz4_compress);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 compressor");
#include <linux/random.h>
#include <linux/rhashtable.h>
#include <linux/err.h>
-#include <linux/export.h>
#define HASH_DEFAULT_SIZE 64UL
#define HASH_MIN_SIZE 4U
return rht_head_hashfn(ht, tbl, he, ht->p);
}
-#ifdef CONFIG_PROVE_LOCKING
-#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
-
-int lockdep_rht_mutex_is_held(struct rhashtable *ht)
-{
- return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
-}
-EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
-
-int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
-{
- spinlock_t *lock = rht_bucket_lock(tbl, hash);
-
- return (debug_locks) ? lockdep_is_held(lock) : 1;
-}
-EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
-#else
-#define ASSERT_RHT_MUTEX(HT)
-#endif
-
-
static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
gfp_t gfp)
{
unsigned int i, size;
-#if defined(CONFIG_PROVE_LOCKING)
- unsigned int nr_pcpus = 2;
-#else
unsigned int nr_pcpus = num_possible_cpus();
-#endif
nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
if (sizeof(spinlock_t) != 0) {
tbl->locks = NULL;
-#ifdef CONFIG_NUMA
- if (size * sizeof(spinlock_t) > PAGE_SIZE &&
- gfp == GFP_KERNEL)
- tbl->locks = vmalloc(size * sizeof(spinlock_t));
-#endif
if (gfp != GFP_KERNEL)
gfp |= __GFP_NOWARN | __GFP_NORETRY;
return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
}
-/**
- * rhashtable_expand - Expand hash table while allowing concurrent lookups
- * @ht: the hash table to expand
- *
- * A secondary bucket array is allocated and the hash entries are migrated.
- *
- * This function may only be called in a context where it is safe to call
- * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
- *
- * The caller must ensure that no concurrent resizing occurs by holding
- * ht->mutex.
- *
- * It is valid to have concurrent insertions and deletions protected by per
- * bucket locks or concurrent RCU protected lookups and traversals.
- */
static int rhashtable_expand(struct rhashtable *ht)
{
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
int err;
- ASSERT_RHT_MUTEX(ht);
-
old_tbl = rhashtable_last_table(ht, old_tbl);
new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
return err;
}
-/**
- * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
- * @ht: the hash table to shrink
- *
- * This function shrinks the hash table to fit, i.e., the smallest
- * size would not cause it to expand right away automatically.
- *
- * The caller must ensure that no concurrent resizing occurs by holding
- * ht->mutex.
- *
- * The caller must ensure that no concurrent table mutations take place.
- * It is however valid to have concurrent lookups if they are RCU protected.
- *
- * It is valid to have concurrent insertions and deletions protected by per
- * bucket locks or concurrent RCU protected lookups and traversals.
- */
static int rhashtable_shrink(struct rhashtable *ht)
{
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
unsigned int size = 0;
int err;
- ASSERT_RHT_MUTEX(ht);
-
if (nelems)
size = roundup_pow_of_two(nelems * 3 / 2);
if (size < ht->p.min_size)
return err;
}
-EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
const void *key,
else
return ERR_PTR(err);
}
-EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
-
-/**
- * rhashtable_walk_init - Initialise an iterator
- * @ht: Table to walk over
- * @iter: Hash table Iterator
- * @gfp: GFP flags for allocations
- *
- * This function prepares a hash table walk.
- *
- * Note that if you restart a walk after rhashtable_walk_stop you
- * may see the same object twice. Also, you may miss objects if
- * there are removals in between rhashtable_walk_stop and the next
- * call to rhashtable_walk_start.
- *
- * For a completely stable walk you should construct your own data
- * structure outside the hash table.
- *
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
- *
- * You must call rhashtable_walk_exit if this function returns
- * successfully.
- */
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
- gfp_t gfp)
-{
- iter->ht = ht;
- iter->p = NULL;
- iter->slot = 0;
- iter->skip = 0;
-
- iter->walker = kmalloc(sizeof(*iter->walker), gfp);
- if (!iter->walker)
- return -ENOMEM;
-
- spin_lock(&ht->lock);
- iter->walker->tbl =
- rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
- list_add(&iter->walker->list, &iter->walker->tbl->walkers);
- spin_unlock(&ht->lock);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(rhashtable_walk_init);
-
-/**
- * rhashtable_walk_exit - Free an iterator
- * @iter: Hash table Iterator
- *
- * This function frees resources allocated by rhashtable_walk_init.
- */
-void rhashtable_walk_exit(struct rhashtable_iter *iter)
-{
- spin_lock(&iter->ht->lock);
- if (iter->walker->tbl)
- list_del(&iter->walker->list);
- spin_unlock(&iter->ht->lock);
- kfree(iter->walker);
-}
-EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
-
-/**
- * rhashtable_walk_start - Start a hash table walk
- * @iter: Hash table iterator
- *
- * Start a hash table walk. Note that we take the RCU lock in all
- * cases including when we return an error. So you must always call
- * rhashtable_walk_stop to clean up.
- *
- * Returns zero if successful.
- *
- * Returns -EAGAIN if resize event occured. Note that the iterator
- * will rewind back to the beginning and you may use it immediately
- * by calling rhashtable_walk_next.
- */
-int rhashtable_walk_start(struct rhashtable_iter *iter)
- __acquires(RCU)
-{
- struct rhashtable *ht = iter->ht;
-
- rcu_read_lock();
-
- spin_lock(&ht->lock);
- if (iter->walker->tbl)
- list_del(&iter->walker->list);
- spin_unlock(&ht->lock);
-
- if (!iter->walker->tbl) {
- iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
- return -EAGAIN;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(rhashtable_walk_start);
-
-/**
- * rhashtable_walk_next - Return the next object and advance the iterator
- * @iter: Hash table iterator
- *
- * Note that you must call rhashtable_walk_stop when you are finished
- * with the walk.
- *
- * Returns the next object or NULL when the end of the table is reached.
- *
- * Returns -EAGAIN if resize event occured. Note that the iterator
- * will rewind back to the beginning and you may continue to use it.
- */
-void *rhashtable_walk_next(struct rhashtable_iter *iter)
-{
- struct bucket_table *tbl = iter->walker->tbl;
- struct rhashtable *ht = iter->ht;
- struct rhash_head *p = iter->p;
-
- if (p) {
- p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
- goto next;
- }
-
- for (; iter->slot < tbl->size; iter->slot++) {
- int skip = iter->skip;
-
- rht_for_each_rcu(p, tbl, iter->slot) {
- if (!skip)
- break;
- skip--;
- }
-
-next:
- if (!rht_is_a_nulls(p)) {
- iter->skip++;
- iter->p = p;
- return rht_obj(ht, p);
- }
-
- iter->skip = 0;
- }
-
- iter->p = NULL;
-
- /* Ensure we see any new tables. */
- smp_rmb();
-
- iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
- if (iter->walker->tbl) {
- iter->slot = 0;
- iter->skip = 0;
- return ERR_PTR(-EAGAIN);
- }
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(rhashtable_walk_next);
-
-/**
- * rhashtable_walk_stop - Finish a hash table walk
- * @iter: Hash table iterator
- *
- * Finish a hash table walk.
- */
-void rhashtable_walk_stop(struct rhashtable_iter *iter)
- __releases(RCU)
-{
- struct rhashtable *ht;
- struct bucket_table *tbl = iter->walker->tbl;
-
- if (!tbl)
- goto out;
-
- ht = iter->ht;
-
- spin_lock(&ht->lock);
- if (tbl->rehash < tbl->size)
- list_add(&iter->walker->list, &tbl->walkers);
- else
- iter->walker->tbl = NULL;
- spin_unlock(&ht->lock);
-
- iter->p = NULL;
-
-out:
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
static size_t rounded_hashtable_size(const struct rhashtable_params *params)
{
return jhash2(key, length, seed);
}
-/**
- * rhashtable_init - initialize a new hash table
- * @ht: hash table to be initialized
- * @params: configuration parameters
- *
- * Initializes a new hash table based on the provided configuration
- * parameters. A table can be configured either with a variable or
- * fixed length key:
- *
- * Configuration Example 1: Fixed length keys
- * struct test_obj {
- * int key;
- * void * my_member;
- * struct rhash_head node;
- * };
- *
- * struct rhashtable_params params = {
- * .head_offset = offsetof(struct test_obj, node),
- * .key_offset = offsetof(struct test_obj, key),
- * .key_len = sizeof(int),
- * .hashfn = jhash,
- * .nulls_base = (1U << RHT_BASE_SHIFT),
- * };
- *
- * Configuration Example 2: Variable length keys
- * struct test_obj {
- * [...]
- * struct rhash_head node;
- * };
- *
- * u32 my_hash_fn(const void *data, u32 len, u32 seed)
- * {
- * struct test_obj *obj = data;
- *
- * return [... hash ...];
- * }
- *
- * struct rhashtable_params params = {
- * .head_offset = offsetof(struct test_obj, node),
- * .hashfn = jhash,
- * .obj_hashfn = my_hash_fn,
- * };
- */
int rhashtable_init(struct rhashtable *ht,
const struct rhashtable_params *params)
{
return 0;
}
-EXPORT_SYMBOL_GPL(rhashtable_init);
-/**
- * rhashtable_free_and_destroy - free elements and destroy hash table
- * @ht: the hash table to destroy
- * @free_fn: callback to release resources of element
- * @arg: pointer passed to free_fn
- *
- * Stops an eventual async resize. If defined, invokes free_fn for each
- * element to releasal resources. Please note that RCU protected
- * readers may still be accessing the elements. Releasing of resources
- * must occur in a compatible manner. Then frees the bucket array.
- *
- * This function will eventually sleep to wait for an async resize
- * to complete. The caller is responsible that no further write operations
- * occurs in parallel.
- */
-void rhashtable_free_and_destroy(struct rhashtable *ht,
- void (*free_fn)(void *ptr, void *arg),
- void *arg)
+void rhashtable_destroy(struct rhashtable *ht)
{
struct bucket_table *tbl;
- unsigned int i;
cancel_work_sync(&ht->run_work);
mutex_lock(&ht->mutex);
tbl = rht_dereference(ht->tbl, ht);
- if (free_fn) {
- for (i = 0; i < tbl->size; i++) {
- struct rhash_head *pos, *next;
-
- for (pos = rht_dereference(tbl->buckets[i], ht),
- next = !rht_is_a_nulls(pos) ?
- rht_dereference(pos->next, ht) : NULL;
- !rht_is_a_nulls(pos);
- pos = next,
- next = !rht_is_a_nulls(pos) ?
- rht_dereference(pos->next, ht) : NULL)
- free_fn(rht_obj(ht, pos), arg);
- }
- }
-
bucket_table_free(tbl);
mutex_unlock(&ht->mutex);
}
-EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
-
-void rhashtable_destroy(struct rhashtable *ht)
-{
- return rhashtable_free_and_destroy(ht, NULL, NULL);
-}
-EXPORT_SYMBOL_GPL(rhashtable_destroy);
*
* (C) 2004 Nadia Yvette Chambers, Oracle
*/
-#include <linux/export.h>
+
+#include <linux/completion.h>
#include <linux/sched.h>
-#include <linux/mm.h>
#include <linux/wait.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
+static inline int waitqueue_active(wait_queue_head_t *q)
{
- spin_lock_init(&q->lock);
- lockdep_set_class_and_name(&q->lock, key, name);
- INIT_LIST_HEAD(&q->task_list);
+ return !list_empty(&q->task_list);
}
-EXPORT_SYMBOL(__init_waitqueue_head);
-
-void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
{
- unsigned long flags;
-
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
+ list_add(&new->task_list, &head->task_list);
}
-EXPORT_SYMBOL(add_wait_queue);
-void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+ wait_queue_t *new)
{
- unsigned long flags;
+ list_add_tail(&new->task_list, &head->task_list);
+}
+static inline void
+__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+{
wait->flags |= WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
__add_wait_queue_tail(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(add_wait_queue_exclusive);
-void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+static inline void
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
{
- unsigned long flags;
-
- spin_lock_irqsave(&q->lock, flags);
- __remove_wait_queue(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
+ list_del(&old->task_list);
}
-EXPORT_SYMBOL(remove_wait_queue);
-
-/*
- * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
- * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
- * number) then we wake all the non-exclusive tasks and one exclusive task.
- *
- * There are circumstances in which we can try to wake a task which has already
- * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
- * zero in this (rare) case, and we handle it by continuing to scan the queue.
- */
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int wake_flags, void *key)
+ int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
}
}
-/**
- * __wake_up - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
- * @mode: which threads
- * @nr_exclusive: how many wake-one or wake-many threads to wake up
- * @key: is directly passed to the wakeup function
- *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
- */
-void __wake_up(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, void *key)
+static void __wake_up(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, void *key)
{
unsigned long flags;
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(__wake_up);
-/*
- * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
- */
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+void wake_up(wait_queue_head_t *q)
{
- __wake_up_common(q, mode, nr, 0, NULL);
+ __wake_up(q, TASK_NORMAL, 1, NULL);
}
-EXPORT_SYMBOL_GPL(__wake_up_locked);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+static void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
{
- __wake_up_common(q, mode, 1, 0, key);
-}
-EXPORT_SYMBOL_GPL(__wake_up_locked_key);
-
-/**
- * __wake_up_sync_key - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
- * @mode: which threads
- * @nr_exclusive: how many wake-one or wake-many threads to wake up
- * @key: opaque value to be passed to wakeup targets
- *
- * The sync wakeup differs that the waker knows that it will schedule
- * away soon, so while the target thread will be woken up, it will not
- * be migrated to another CPU - ie. the two threads are 'synchronized'
- * with each other. This can prevent needless bouncing between CPUs.
- *
- * On UP it can prevent extra preemption.
- *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
- */
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, void *key)
-{
- unsigned long flags;
- int wake_flags = 1; /* XXX WF_SYNC */
-
- if (unlikely(!q))
- return;
-
- if (unlikely(nr_exclusive != 1))
- wake_flags = 0;
-
- spin_lock_irqsave(&q->lock, flags);
- __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-EXPORT_SYMBOL_GPL(__wake_up_sync_key);
-
-/*
- * __wake_up_sync - see __wake_up_sync_key()
- */
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
-{
- __wake_up_sync_key(q, mode, nr_exclusive, NULL);
+ __wake_up_common(q, mode, nr, 0, NULL);
}
-EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
-/*
- * Note: we use "set_current_state()" _after_ the wait-queue add,
- * because we need a memory barrier there on SMP, so that any
- * wake-function that tests for the wait-queue being active
- * will be guaranteed to see waitqueue addition _or_ subsequent
- * tests in this thread will see the wakeup having taken place.
- *
- * The spin_unlock() itself is semi-permeable and only protects
- * one way (it only protects stuff inside the critical region and
- * stops them from bleeding out - it would still allow subsequent
- * loads to move into the critical region).
- */
void
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(prepare_to_wait);
-void
+static void
prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(prepare_to_wait_exclusive);
-
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
-{
- unsigned long flags;
-
- wait->private = current;
- wait->func = autoremove_wake_function;
-
- spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list)) {
- if (wait->flags & WQ_FLAG_EXCLUSIVE)
- __add_wait_queue_tail(q, wait);
- else
- __add_wait_queue(q, wait);
- }
- set_current_state(state);
- spin_unlock_irqrestore(&q->lock, flags);
-
- return 0;
-}
-EXPORT_SYMBOL(prepare_to_wait_event);
-/**
- * finish_wait - clean up after waiting in a queue
- * @q: waitqueue waited on
- * @wait: wait descriptor
- *
- * Sets current thread back to running state and removes
- * the wait descriptor from the given waitqueue if still
- * queued.
- */
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
spin_unlock_irqrestore(&q->lock, flags);
}
}
-EXPORT_SYMBOL(finish_wait);
-
-/**
- * abort_exclusive_wait - abort exclusive waiting in a queue
- * @q: waitqueue waited on
- * @wait: wait descriptor
- * @mode: runstate of the waiter to be woken
- * @key: key to identify a wait bit queue or %NULL
- *
- * Sets current thread back to running state and removes
- * the wait descriptor from the given waitqueue if still
- * queued.
- *
- * Wakes up the next waiter if the caller is concurrently
- * woken up through the queue.
- *
- * This prevents waiter starvation where an exclusive waiter
- * aborts and is woken up concurrently and no one wakes up
- * the next waiter.
- */
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
- unsigned int mode, void *key)
-{
- unsigned long flags;
-
- __set_current_state(TASK_RUNNING);
- spin_lock_irqsave(&q->lock, flags);
- if (!list_empty(&wait->task_list))
- list_del_init(&wait->task_list);
- else if (waitqueue_active(q))
- __wake_up_locked_key(q, mode, key);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-EXPORT_SYMBOL(abort_exclusive_wait);
int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
void *key)
list_del_init(&wait->task_list);
return ret;
}
-EXPORT_SYMBOL(autoremove_wake_function);
-static inline bool is_kthread_should_stop(void)
-{
- return (current->flags & PF_KTHREAD) && kthread_should_stop();
-}
+struct wait_bit_key {
+ void *flags;
+ int bit_nr;
+ unsigned long timeout;
+};
-/*
- * DEFINE_WAIT_FUNC(wait, woken_wake_func);
- *
- * add_wait_queue(&wq, &wait);
- * for (;;) {
- * if (condition)
- * break;
- *
- * p->state = mode; condition = true;
- * smp_mb(); // A smp_wmb(); // C
- * if (!wait->flags & WQ_FLAG_WOKEN) wait->flags |= WQ_FLAG_WOKEN;
- * schedule() try_to_wake_up();
- * p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~
- * wait->flags &= ~WQ_FLAG_WOKEN; condition = true;
- * smp_mb() // B smp_wmb(); // C
- * wait->flags |= WQ_FLAG_WOKEN;
- * }
- * remove_wait_queue(&wq, &wait);
- *
- */
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
-{
- set_current_state(mode); /* A */
- /*
- * The above implies an smp_mb(), which matches with the smp_wmb() from
- * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
- * also observe all state before the wakeup.
- */
- if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
- timeout = schedule_timeout(timeout);
- __set_current_state(TASK_RUNNING);
-
- /*
- * The below implies an smp_mb(), it too pairs with the smp_wmb() from
- * woken_wake_function() such that we must either observe the wait
- * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
- * an event.
- */
- smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+struct wait_bit_queue {
+ struct wait_bit_key key;
+ wait_queue_t wait;
+};
- return timeout;
-}
-EXPORT_SYMBOL(wait_woken);
-
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
{
- /*
- * Although this function is called under waitqueue lock, LOCK
- * doesn't imply write barrier and the users expects write
- * barrier semantics on wakeup functions. The following
- * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
- * and is paired with smp_store_mb() in wait_woken().
- */
- smp_wmb(); /* C */
- wait->flags |= WQ_FLAG_WOKEN;
+ struct wait_bit_key *key = arg;
+ struct wait_bit_queue *wait_bit =
+ container_of(wait, struct wait_bit_queue, wait);
- return default_wake_function(wait, mode, sync, key);
+ return (wait_bit->key.flags == key->flags &&
+ wait_bit->key.bit_nr == key->bit_nr &&
+ !test_bit(key->bit_nr, key->flags))
+ ? autoremove_wake_function(wait, mode, sync, key) : 0;
}
-EXPORT_SYMBOL(woken_wake_function);
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
-{
- struct wait_bit_key *key = arg;
- struct wait_bit_queue *wait_bit
- = container_of(wait, struct wait_bit_queue, wait);
-
- if (wait_bit->key.flags != key->flags ||
- wait_bit->key.bit_nr != key->bit_nr ||
- test_bit(key->bit_nr, key->flags))
- return 0;
- else
- return autoremove_wake_function(wait, mode, sync, key);
-}
-EXPORT_SYMBOL(wake_bit_function);
+static DECLARE_WAIT_QUEUE_HEAD(bit_wq);
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking)
- * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
- * permitted return codes. Nonzero return codes halt waiting and return.
- */
-int __sched
-__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
- wait_bit_action_f *action, unsigned mode)
-{
- int ret = 0;
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
+ { .flags = word, .bit_nr = bit, }
- do {
- prepare_to_wait(wq, &q->wait, mode);
- if (test_bit(q->key.bit_nr, q->key.flags))
- ret = (*action)(&q->key, mode);
- } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
- finish_wait(wq, &q->wait);
- return ret;
-}
-EXPORT_SYMBOL(__wait_on_bit);
+#define DEFINE_WAIT_BIT(name, word, bit) \
+ struct wait_bit_queue name = { \
+ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
+ .wait = { \
+ .private = current, \
+ .func = wake_bit_function, \
+ .task_list = \
+ LIST_HEAD_INIT((name).wait.task_list), \
+ }, \
+ }
-int __sched out_of_line_wait_on_bit(void *word, int bit,
- wait_bit_action_f *action, unsigned mode)
+void wake_up_bit(void *word, int bit)
{
- wait_queue_head_t *wq = bit_waitqueue(word, bit);
- DEFINE_WAIT_BIT(wait, word, bit);
+ struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
- return __wait_on_bit(wq, &wait, action, mode);
+ if (waitqueue_active(&bit_wq))
+ __wake_up(&bit_wq, TASK_NORMAL, 1, &key);
}
-EXPORT_SYMBOL(out_of_line_wait_on_bit);
-int __sched out_of_line_wait_on_bit_timeout(
- void *word, int bit, wait_bit_action_f *action,
- unsigned mode, unsigned long timeout)
+void __wait_on_bit(void *word, int bit, unsigned mode)
{
- wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit);
- wait.key.timeout = jiffies + timeout;
- return __wait_on_bit(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
-
-int __sched
-__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
- wait_bit_action_f *action, unsigned mode)
-{
do {
- int ret;
+ prepare_to_wait(&bit_wq, &wait.wait, mode);
+ if (test_bit(wait.key.bit_nr, wait.key.flags))
+ schedule();
+ } while (test_bit(wait.key.bit_nr, wait.key.flags));
- prepare_to_wait_exclusive(wq, &q->wait, mode);
- if (!test_bit(q->key.bit_nr, q->key.flags))
- continue;
- ret = action(&q->key, mode);
- if (!ret)
- continue;
- abort_exclusive_wait(wq, &q->wait, mode, &q->key);
- return ret;
- } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
- finish_wait(wq, &q->wait);
- return 0;
+ finish_wait(&bit_wq, &wait.wait);
}
-EXPORT_SYMBOL(__wait_on_bit_lock);
-int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
- wait_bit_action_f *action, unsigned mode)
+void __wait_on_bit_lock(void *word, int bit, unsigned mode)
{
- wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit);
- return __wait_on_bit_lock(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
-
-void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
-{
- struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
- if (waitqueue_active(wq))
- __wake_up(wq, TASK_NORMAL, 1, &key);
-}
-EXPORT_SYMBOL(__wake_up_bit);
-
-/**
- * wake_up_bit - wake up a waiter on a bit
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that wakes up waiters
- * on a bit. For instance, if one were to have waiters on a bitflag,
- * one would call wake_up_bit() after clearing the bit.
- *
- * In order for this to function properly, as it uses waitqueue_active()
- * internally, some kind of memory barrier must be done prior to calling
- * this. Typically, this will be smp_mb__after_atomic(), but in some
- * cases where bitflags are manipulated non-atomically under a lock, one
- * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
- * because spin_unlock() does not guarantee a memory barrier.
- */
-void wake_up_bit(void *word, int bit)
-{
- __wake_up_bit(bit_waitqueue(word, bit), word, bit);
-}
-EXPORT_SYMBOL(wake_up_bit);
-
-static DECLARE_WAIT_QUEUE_HEAD(__bit_waitqueue);
-
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
- return &__bit_waitqueue;
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
-/*
- * Manipulate the atomic_t address to produce a better bit waitqueue table hash
- * index (we're keying off bit -1, but that would produce a horrible hash
- * value).
- */
-static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
-{
- if (BITS_PER_LONG == 64) {
- unsigned long q = (unsigned long)p;
- return bit_waitqueue((void *)(q & ~1), q & 1);
- }
- return bit_waitqueue(p, 0);
-}
-
-static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
- void *arg)
-{
- struct wait_bit_key *key = arg;
- struct wait_bit_queue *wait_bit
- = container_of(wait, struct wait_bit_queue, wait);
- atomic_t *val = key->flags;
-
- if (wait_bit->key.flags != key->flags ||
- wait_bit->key.bit_nr != key->bit_nr ||
- atomic_read(val) != 0)
- return 0;
- return autoremove_wake_function(wait, mode, sync, key);
-}
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
- * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero
- * return codes halt waiting and return.
- */
-static __sched
-int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
- int (*action)(atomic_t *), unsigned mode)
-{
- atomic_t *val;
- int ret = 0;
-
do {
- prepare_to_wait(wq, &q->wait, mode);
- val = q->key.flags;
- if (atomic_read(val) == 0)
- break;
- ret = (*action)(val);
- } while (!ret && atomic_read(val) != 0);
- finish_wait(wq, &q->wait);
- return ret;
+ prepare_to_wait_exclusive(&bit_wq, &wait.wait, mode);
+ if (!test_bit(wait.key.bit_nr, wait.key.flags))
+ continue;
+ schedule();
+ } while (test_and_set_bit(wait.key.bit_nr, wait.key.flags));
+ finish_wait(&bit_wq, &wait.wait);
}
-#define DEFINE_WAIT_ATOMIC_T(name, p) \
- struct wait_bit_queue name = { \
- .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \
- .wait = { \
- .private = current, \
- .func = wake_atomic_t_function, \
- .task_list = \
- LIST_HEAD_INIT((name).wait.task_list), \
- }, \
- }
-
-__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
- unsigned mode)
+void complete(struct completion *x)
{
- wait_queue_head_t *wq = atomic_t_waitqueue(p);
- DEFINE_WAIT_ATOMIC_T(wait, p);
+ unsigned long flags;
- return __wait_on_atomic_t(wq, &wait, action, mode);
+ spin_lock_irqsave(&x->wait.lock, flags);
+ x->done++;
+ __wake_up_locked(&x->wait, TASK_NORMAL, 1);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
}
-EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
-/**
- * wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @p: The atomic_t being waited on, a kernel virtual address
- *
- * Wake up anyone waiting for the atomic_t to go to zero.
- *
- * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
- * check is done by the waiter's wake function, not the by the waker itself).
- */
-void wake_up_atomic_t(atomic_t *p)
+void wait_for_completion(struct completion *x)
{
- __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
-}
-EXPORT_SYMBOL(wake_up_atomic_t);
+ spin_lock_irq(&x->wait.lock);
-__sched int bit_wait(struct wait_bit_key *word, int mode)
-{
- schedule();
- return 0;
-}
-EXPORT_SYMBOL(bit_wait);
-
-__sched int bit_wait_io(struct wait_bit_key *word, int mode)
-{
- io_schedule();
- return 0;
-}
-EXPORT_SYMBOL(bit_wait_io);
+ if (!x->done) {
+ DECLARE_WAITQUEUE(wait, current);
-__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
-{
- unsigned long now = jiffies;
- if (time_after_eq(now, word->timeout))
- return -EAGAIN;
- schedule_timeout(word->timeout - now);
- return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_timeout);
+ __add_wait_queue_tail_exclusive(&x->wait, &wait);
+ do {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&x->wait.lock);
-__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
-{
- unsigned long now = jiffies;
- if (time_after_eq(now, word->timeout))
- return -EAGAIN;
- io_schedule_timeout(word->timeout - now);
- return 0;
+ schedule();
+ spin_lock_irq(&x->wait.lock);
+ } while (!x->done);
+ __remove_wait_queue(&x->wait, &wait);
+ if (!x->done)
+ goto out;
+ }
+ x->done--;
+out:
+ spin_unlock_irq(&x->wait.lock);
}
-EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
+++ /dev/null
-/* +++ deflate.c */
-/* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-1996 Jean-loup Gailly.
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/*
- * ALGORITHM
- *
- * The "deflation" process depends on being able to identify portions
- * of the input text which are identical to earlier input (within a
- * sliding window trailing behind the input currently being processed).
- *
- * The most straightforward technique turns out to be the fastest for
- * most input files: try all possible matches and select the longest.
- * The key feature of this algorithm is that insertions into the string
- * dictionary are very simple and thus fast, and deletions are avoided
- * completely. Insertions are performed at each input character, whereas
- * string matches are performed only when the previous match ends. So it
- * is preferable to spend more time in matches to allow very fast string
- * insertions and avoid deletions. The matching algorithm for small
- * strings is inspired from that of Rabin & Karp. A brute force approach
- * is used to find longer strings when a small match has been found.
- * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
- * (by Leonid Broukhis).
- * A previous version of this file used a more sophisticated algorithm
- * (by Fiala and Greene) which is guaranteed to run in linear amortized
- * time, but has a larger average cost, uses more memory and is patented.
- * However the F&G algorithm may be faster for some highly redundant
- * files if the parameter max_chain_length (described below) is too large.
- *
- * ACKNOWLEDGEMENTS
- *
- * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
- * I found it in 'freeze' written by Leonid Broukhis.
- * Thanks to many people for bug reports and testing.
- *
- * REFERENCES
- *
- * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
- * Available in ftp://ds.internic.net/rfc/rfc1951.txt
- *
- * A description of the Rabin and Karp algorithm is given in the book
- * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
- *
- * Fiala,E.R., and Greene,D.H.
- * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
- *
- */
-
-#include <linux/module.h>
-#include <linux/zutil.h>
-#include "defutil.h"
-
-
-/* ===========================================================================
- * Function prototypes.
- */
-typedef enum {
- need_more, /* block not completed, need more input or more output */
- block_done, /* block flush performed */
- finish_started, /* finish started, need only more output at next deflate */
- finish_done /* finish done, accept no more input or output */
-} block_state;
-
-typedef block_state (*compress_func) (deflate_state *s, int flush);
-/* Compression function. Returns the block state after the call. */
-
-static void fill_window (deflate_state *s);
-static block_state deflate_stored (deflate_state *s, int flush);
-static block_state deflate_fast (deflate_state *s, int flush);
-static block_state deflate_slow (deflate_state *s, int flush);
-static void lm_init (deflate_state *s);
-static void putShortMSB (deflate_state *s, uInt b);
-static void flush_pending (z_streamp strm);
-static int read_buf (z_streamp strm, Byte *buf, unsigned size);
-static uInt longest_match (deflate_state *s, IPos cur_match);
-
-#ifdef DEBUG_ZLIB
-static void check_match (deflate_state *s, IPos start, IPos match,
- int length);
-#endif
-
-/* ===========================================================================
- * Local data
- */
-
-#define NIL 0
-/* Tail of hash chains */
-
-#ifndef TOO_FAR
-# define TOO_FAR 4096
-#endif
-/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-
-#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-/* Minimum amount of lookahead, except at the end of the input file.
- * See deflate.c for comments about the MIN_MATCH+1.
- */
-
-/* Values for max_lazy_match, good_match and max_chain_length, depending on
- * the desired pack level (0..9). The values given below have been tuned to
- * exclude worst case performance for pathological files. Better values may be
- * found for specific files.
- */
-typedef struct config_s {
- ush good_length; /* reduce lazy search above this match length */
- ush max_lazy; /* do not perform lazy search above this match length */
- ush nice_length; /* quit search above this match length */
- ush max_chain;
- compress_func func;
-} config;
-
-static const config configuration_table[10] = {
-/* good lazy nice chain */
-/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
-/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */
-/* 2 */ {4, 5, 16, 8, deflate_fast},
-/* 3 */ {4, 6, 32, 32, deflate_fast},
-
-/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
-/* 5 */ {8, 16, 32, 32, deflate_slow},
-/* 6 */ {8, 16, 128, 128, deflate_slow},
-/* 7 */ {8, 32, 128, 256, deflate_slow},
-/* 8 */ {32, 128, 258, 1024, deflate_slow},
-/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */
-
-/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
- * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
- * meaning.
- */
-
-#define EQUAL 0
-/* result of memcmp for equal strings */
-
-/* ===========================================================================
- * Update a hash value with the given input byte
- * IN assertion: all calls to UPDATE_HASH are made with consecutive
- * input characters, so that a running hash key can be computed from the
- * previous key instead of complete recalculation each time.
- */
-#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
-
-
-/* ===========================================================================
- * Insert string str in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * IN assertion: all calls to INSERT_STRING are made with consecutive
- * input characters and the first MIN_MATCH bytes of str are valid
- * (except for the last MIN_MATCH-1 bytes of the input file).
- */
-#define INSERT_STRING(s, str, match_head) \
- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
- s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \
- s->head[s->ins_h] = (Pos)(str))
-
-/* ===========================================================================
- * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
- * prev[] will be initialized on the fly.
- */
-#define CLEAR_HASH(s) \
- s->head[s->hash_size-1] = NIL; \
- memset((char *)s->head, 0, (unsigned)(s->hash_size-1)*sizeof(*s->head));
-
-/* ========================================================================= */
-int zlib_deflateInit2(
- z_streamp strm,
- int level,
- int method,
- int windowBits,
- int memLevel,
- int strategy
-)
-{
- deflate_state *s;
- int noheader = 0;
- deflate_workspace *mem;
- char *next;
-
- ush *overlay;
- /* We overlay pending_buf and d_buf+l_buf. This works since the average
- * output size for (length,distance) codes is <= 24 bits.
- */
-
- if (strm == NULL) return Z_STREAM_ERROR;
-
- strm->msg = NULL;
-
- if (level == Z_DEFAULT_COMPRESSION) level = 6;
-
- mem = (deflate_workspace *) strm->workspace;
-
- if (windowBits < 0) { /* undocumented feature: suppress zlib header */
- noheader = 1;
- windowBits = -windowBits;
- }
- if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
- windowBits < 9 || windowBits > 15 || level < 0 || level > 9 ||
- strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
- return Z_STREAM_ERROR;
- }
-
- /*
- * Direct the workspace's pointers to the chunks that were allocated
- * along with the deflate_workspace struct.
- */
- next = (char *) mem;
- next += sizeof(*mem);
- mem->window_memory = (Byte *) next;
- next += zlib_deflate_window_memsize(windowBits);
- mem->prev_memory = (Pos *) next;
- next += zlib_deflate_prev_memsize(windowBits);
- mem->head_memory = (Pos *) next;
- next += zlib_deflate_head_memsize(memLevel);
- mem->overlay_memory = next;
-
- s = (deflate_state *) &(mem->deflate_memory);
- strm->state = (struct internal_state *)s;
- s->strm = strm;
-
- s->noheader = noheader;
- s->w_bits = windowBits;
- s->w_size = 1 << s->w_bits;
- s->w_mask = s->w_size - 1;
-
- s->hash_bits = memLevel + 7;
- s->hash_size = 1 << s->hash_bits;
- s->hash_mask = s->hash_size - 1;
- s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
-
- s->window = (Byte *) mem->window_memory;
- s->prev = (Pos *) mem->prev_memory;
- s->head = (Pos *) mem->head_memory;
-
- s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
-
- overlay = (ush *) mem->overlay_memory;
- s->pending_buf = (uch *) overlay;
- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
-
- s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
-
- s->level = level;
- s->strategy = strategy;
- s->method = (Byte)method;
-
- return zlib_deflateReset(strm);
-}
-
-/* ========================================================================= */
-int zlib_deflateReset(
- z_streamp strm
-)
-{
- deflate_state *s;
-
- if (strm == NULL || strm->state == NULL)
- return Z_STREAM_ERROR;
-
- strm->total_in = strm->total_out = 0;
- strm->msg = NULL;
- strm->data_type = Z_UNKNOWN;
-
- s = (deflate_state *)strm->state;
- s->pending = 0;
- s->pending_out = s->pending_buf;
-
- if (s->noheader < 0) {
- s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */
- }
- s->status = s->noheader ? BUSY_STATE : INIT_STATE;
- strm->adler = 1;
- s->last_flush = Z_NO_FLUSH;
-
- zlib_tr_init(s);
- lm_init(s);
-
- return Z_OK;
-}
-
-/* =========================================================================
- * Put a short in the pending buffer. The 16-bit value is put in MSB order.
- * IN assertion: the stream state is correct and there is enough room in
- * pending_buf.
- */
-static void putShortMSB(
- deflate_state *s,
- uInt b
-)
-{
- put_byte(s, (Byte)(b >> 8));
- put_byte(s, (Byte)(b & 0xff));
-}
-
-/* =========================================================================
- * Flush as much pending output as possible. All deflate() output goes
- * through this function so some applications may wish to modify it
- * to avoid allocating a large strm->next_out buffer and copying into it.
- * (See also read_buf()).
- */
-static void flush_pending(
- z_streamp strm
-)
-{
- deflate_state *s = (deflate_state *) strm->state;
- unsigned len = s->pending;
-
- if (len > strm->avail_out) len = strm->avail_out;
- if (len == 0) return;
-
- if (strm->next_out != NULL) {
- memcpy(strm->next_out, s->pending_out, len);
- strm->next_out += len;
- }
- s->pending_out += len;
- strm->total_out += len;
- strm->avail_out -= len;
- s->pending -= len;
- if (s->pending == 0) {
- s->pending_out = s->pending_buf;
- }
-}
-
-/* ========================================================================= */
-int zlib_deflate(
- z_streamp strm,
- int flush
-)
-{
- int old_flush; /* value of flush param for previous deflate call */
- deflate_state *s;
-
- if (strm == NULL || strm->state == NULL ||
- flush > Z_FINISH || flush < 0) {
- return Z_STREAM_ERROR;
- }
- s = (deflate_state *) strm->state;
-
- if ((strm->next_in == NULL && strm->avail_in != 0) ||
- (s->status == FINISH_STATE && flush != Z_FINISH)) {
- return Z_STREAM_ERROR;
- }
- if (strm->avail_out == 0) return Z_BUF_ERROR;
-
- s->strm = strm; /* just in case */
- old_flush = s->last_flush;
- s->last_flush = flush;
-
- /* Write the zlib header */
- if (s->status == INIT_STATE) {
-
- uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
- uInt level_flags = (s->level-1) >> 1;
-
- if (level_flags > 3) level_flags = 3;
- header |= (level_flags << 6);
- if (s->strstart != 0) header |= PRESET_DICT;
- header += 31 - (header % 31);
-
- s->status = BUSY_STATE;
- putShortMSB(s, header);
-
- /* Save the adler32 of the preset dictionary: */
- if (s->strstart != 0) {
- putShortMSB(s, (uInt)(strm->adler >> 16));
- putShortMSB(s, (uInt)(strm->adler & 0xffff));
- }
- strm->adler = 1L;
- }
-
- /* Flush as much pending output as possible */
- if (s->pending != 0) {
- flush_pending(strm);
- if (strm->avail_out == 0) {
- /* Since avail_out is 0, deflate will be called again with
- * more output space, but possibly with both pending and
- * avail_in equal to zero. There won't be anything to do,
- * but this is not an error situation so make sure we
- * return OK instead of BUF_ERROR at next call of deflate:
- */
- s->last_flush = -1;
- return Z_OK;
- }
-
- /* Make sure there is something to do and avoid duplicate consecutive
- * flushes. For repeated and useless calls with Z_FINISH, we keep
- * returning Z_STREAM_END instead of Z_BUFF_ERROR.
- */
- } else if (strm->avail_in == 0 && flush <= old_flush &&
- flush != Z_FINISH) {
- return Z_BUF_ERROR;
- }
-
- /* User must not provide more input after the first FINISH: */
- if (s->status == FINISH_STATE && strm->avail_in != 0) {
- return Z_BUF_ERROR;
- }
-
- /* Start a new block or continue the current one.
- */
- if (strm->avail_in != 0 || s->lookahead != 0 ||
- (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
- block_state bstate;
-
- bstate = (*(configuration_table[s->level].func))(s, flush);
-
- if (bstate == finish_started || bstate == finish_done) {
- s->status = FINISH_STATE;
- }
- if (bstate == need_more || bstate == finish_started) {
- if (strm->avail_out == 0) {
- s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
- }
- return Z_OK;
- /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
- * of deflate should use the same flush parameter to make sure
- * that the flush is complete. So we don't have to output an
- * empty block here, this will be done at next call. This also
- * ensures that for a very small output buffer, we emit at most
- * one empty block.
- */
- }
- if (bstate == block_done) {
- if (flush == Z_PARTIAL_FLUSH) {
- zlib_tr_align(s);
- } else if (flush == Z_PACKET_FLUSH) {
- /* Output just the 3-bit `stored' block type value,
- but not a zero length. */
- zlib_tr_stored_type_only(s);
- } else { /* FULL_FLUSH or SYNC_FLUSH */
- zlib_tr_stored_block(s, (char*)0, 0L, 0);
- /* For a full flush, this empty block will be recognized
- * as a special marker by inflate_sync().
- */
- if (flush == Z_FULL_FLUSH) {
- CLEAR_HASH(s); /* forget history */
- }
- }
- flush_pending(strm);
- if (strm->avail_out == 0) {
- s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
- return Z_OK;
- }
- }
- }
- Assert(strm->avail_out > 0, "bug2");
-
- if (flush != Z_FINISH) return Z_OK;
- if (s->noheader) return Z_STREAM_END;
-
- /* Write the zlib trailer (adler32) */
- putShortMSB(s, (uInt)(strm->adler >> 16));
- putShortMSB(s, (uInt)(strm->adler & 0xffff));
- flush_pending(strm);
- /* If avail_out is zero, the application will call deflate again
- * to flush the rest.
- */
- s->noheader = -1; /* write the trailer only once! */
- return s->pending != 0 ? Z_OK : Z_STREAM_END;
-}
-
-/* ========================================================================= */
-int zlib_deflateEnd(
- z_streamp strm
-)
-{
- int status;
- deflate_state *s;
-
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- s = (deflate_state *) strm->state;
-
- status = s->status;
- if (status != INIT_STATE && status != BUSY_STATE &&
- status != FINISH_STATE) {
- return Z_STREAM_ERROR;
- }
-
- strm->state = NULL;
-
- return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
-}
-
-/* ===========================================================================
- * Read a new buffer from the current input stream, update the adler32
- * and total number of bytes read. All deflate() input goes through
- * this function so some applications may wish to modify it to avoid
- * allocating a large strm->next_in buffer and copying from it.
- * (See also flush_pending()).
- */
-static int read_buf(
- z_streamp strm,
- Byte *buf,
- unsigned size
-)
-{
- unsigned len = strm->avail_in;
-
- if (len > size) len = size;
- if (len == 0) return 0;
-
- strm->avail_in -= len;
-
- if (!((deflate_state *)(strm->state))->noheader) {
- strm->adler = zlib_adler32(strm->adler, strm->next_in, len);
- }
- memcpy(buf, strm->next_in, len);
- strm->next_in += len;
- strm->total_in += len;
-
- return (int)len;
-}
-
-/* ===========================================================================
- * Initialize the "longest match" routines for a new zlib stream
- */
-static void lm_init(
- deflate_state *s
-)
-{
- s->window_size = (ulg)2L*s->w_size;
-
- CLEAR_HASH(s);
-
- /* Set the default configuration parameters:
- */
- s->max_lazy_match = configuration_table[s->level].max_lazy;
- s->good_match = configuration_table[s->level].good_length;
- s->nice_match = configuration_table[s->level].nice_length;
- s->max_chain_length = configuration_table[s->level].max_chain;
-
- s->strstart = 0;
- s->block_start = 0L;
- s->lookahead = 0;
- s->match_length = s->prev_length = MIN_MATCH-1;
- s->match_available = 0;
- s->ins_h = 0;
-}
-
-/* ===========================================================================
- * Set match_start to the longest match starting at the given string and
- * return its length. Matches shorter or equal to prev_length are discarded,
- * in which case the result is equal to prev_length and match_start is
- * garbage.
- * IN assertions: cur_match is the head of the hash chain for the current
- * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
- * OUT assertion: the match length is not greater than s->lookahead.
- */
-/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
- * match.S. The code will be functionally equivalent.
- */
-static uInt longest_match(
- deflate_state *s,
- IPos cur_match /* current match */
-)
-{
- unsigned chain_length = s->max_chain_length;/* max hash chain length */
- register Byte *scan = s->window + s->strstart; /* current string */
- register Byte *match; /* matched string */
- register int len; /* length of current match */
- int best_len = s->prev_length; /* best match length so far */
- int nice_match = s->nice_match; /* stop if match long enough */
- IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
- s->strstart - (IPos)MAX_DIST(s) : NIL;
- /* Stop when cur_match becomes <= limit. To simplify the code,
- * we prevent matches with the string of window index 0.
- */
- Pos *prev = s->prev;
- uInt wmask = s->w_mask;
-
-#ifdef UNALIGNED_OK
- /* Compare two bytes at a time. Note: this is not always beneficial.
- * Try with and without -DUNALIGNED_OK to check.
- */
- register Byte *strend = s->window + s->strstart + MAX_MATCH - 1;
- register ush scan_start = *(ush*)scan;
- register ush scan_end = *(ush*)(scan+best_len-1);
-#else
- register Byte *strend = s->window + s->strstart + MAX_MATCH;
- register Byte scan_end1 = scan[best_len-1];
- register Byte scan_end = scan[best_len];
-#endif
-
- /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
- * It is easy to get rid of this optimization if necessary.
- */
- Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-
- /* Do not waste too much time if we already have a good match: */
- if (s->prev_length >= s->good_match) {
- chain_length >>= 2;
- }
- /* Do not look for matches beyond the end of the input. This is necessary
- * to make deflate deterministic.
- */
- if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
-
- Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
-
- do {
- Assert(cur_match < s->strstart, "no future");
- match = s->window + cur_match;
-
- /* Skip to next match if the match length cannot increase
- * or if the match length is less than 2:
- */
-#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
- /* This code assumes sizeof(unsigned short) == 2. Do not use
- * UNALIGNED_OK if your compiler uses a different size.
- */
- if (*(ush*)(match+best_len-1) != scan_end ||
- *(ush*)match != scan_start) continue;
-
- /* It is not necessary to compare scan[2] and match[2] since they are
- * always equal when the other bytes match, given that the hash keys
- * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
- * strstart+3, +5, ... up to strstart+257. We check for insufficient
- * lookahead only every 4th comparison; the 128th check will be made
- * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
- * necessary to put more guard bytes at the end of the window, or
- * to check more often for insufficient lookahead.
- */
- Assert(scan[2] == match[2], "scan[2]?");
- scan++, match++;
- do {
- } while (*(ush*)(scan+=2) == *(ush*)(match+=2) &&
- *(ush*)(scan+=2) == *(ush*)(match+=2) &&
- *(ush*)(scan+=2) == *(ush*)(match+=2) &&
- *(ush*)(scan+=2) == *(ush*)(match+=2) &&
- scan < strend);
- /* The funny "do {}" generates better code on most compilers */
-
- /* Here, scan <= window+strstart+257 */
- Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
- if (*scan == *match) scan++;
-
- len = (MAX_MATCH - 1) - (int)(strend-scan);
- scan = strend - (MAX_MATCH-1);
-
-#else /* UNALIGNED_OK */
-
- if (match[best_len] != scan_end ||
- match[best_len-1] != scan_end1 ||
- *match != *scan ||
- *++match != scan[1]) continue;
-
- /* The check at best_len-1 can be removed because it will be made
- * again later. (This heuristic is not always a win.)
- * It is not necessary to compare scan[2] and match[2] since they
- * are always equal when the other bytes match, given that
- * the hash keys are equal and that HASH_BITS >= 8.
- */
- scan += 2, match++;
- Assert(*scan == *match, "match[2]?");
-
- /* We check for insufficient lookahead only every 8th comparison;
- * the 256th check will be made at strstart+258.
- */
- do {
- } while (*++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- scan < strend);
-
- Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-
- len = MAX_MATCH - (int)(strend - scan);
- scan = strend - MAX_MATCH;
-
-#endif /* UNALIGNED_OK */
-
- if (len > best_len) {
- s->match_start = cur_match;
- best_len = len;
- if (len >= nice_match) break;
-#ifdef UNALIGNED_OK
- scan_end = *(ush*)(scan+best_len-1);
-#else
- scan_end1 = scan[best_len-1];
- scan_end = scan[best_len];
-#endif
- }
- } while ((cur_match = prev[cur_match & wmask]) > limit
- && --chain_length != 0);
-
- if ((uInt)best_len <= s->lookahead) return best_len;
- return s->lookahead;
-}
-
-#ifdef DEBUG_ZLIB
-/* ===========================================================================
- * Check that the match at match_start is indeed a match.
- */
-static void check_match(
- deflate_state *s,
- IPos start,
- IPos match,
- int length
-)
-{
- /* check that the match is indeed a match */
- if (memcmp((char *)s->window + match,
- (char *)s->window + start, length) != EQUAL) {
- fprintf(stderr, " start %u, match %u, length %d\n",
- start, match, length);
- do {
- fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
- } while (--length != 0);
- z_error("invalid match");
- }
- if (z_verbose > 1) {
- fprintf(stderr,"\\[%d,%d]", start-match, length);
- do { putc(s->window[start++], stderr); } while (--length != 0);
- }
-}
-#else
-# define check_match(s, start, match, length)
-#endif
-
-/* ===========================================================================
- * Fill the window when the lookahead becomes insufficient.
- * Updates strstart and lookahead.
- *
- * IN assertion: lookahead < MIN_LOOKAHEAD
- * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
- * At least one byte has been read, or avail_in == 0; reads are
- * performed for at least two bytes (required for the zip translate_eol
- * option -- not supported here).
- */
-static void fill_window(
- deflate_state *s
-)
-{
- register unsigned n, m;
- register Pos *p;
- unsigned more; /* Amount of free space at the end of the window. */
- uInt wsize = s->w_size;
-
- do {
- more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
-
- /* Deal with !@#$% 64K limit: */
- if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
- more = wsize;
-
- } else if (more == (unsigned)(-1)) {
- /* Very unlikely, but possible on 16 bit machine if strstart == 0
- * and lookahead == 1 (input done one byte at time)
- */
- more--;
-
- /* If the window is almost full and there is insufficient lookahead,
- * move the upper half to the lower one to make room in the upper half.
- */
- } else if (s->strstart >= wsize+MAX_DIST(s)) {
-
- memcpy((char *)s->window, (char *)s->window+wsize,
- (unsigned)wsize);
- s->match_start -= wsize;
- s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
- s->block_start -= (long) wsize;
-
- /* Slide the hash table (could be avoided with 32 bit values
- at the expense of memory usage). We slide even when level == 0
- to keep the hash table consistent if we switch back to level > 0
- later. (Using level 0 permanently is not an optimal usage of
- zlib, so we don't care about this pathological case.)
- */
- n = s->hash_size;
- p = &s->head[n];
- do {
- m = *--p;
- *p = (Pos)(m >= wsize ? m-wsize : NIL);
- } while (--n);
-
- n = wsize;
- p = &s->prev[n];
- do {
- m = *--p;
- *p = (Pos)(m >= wsize ? m-wsize : NIL);
- /* If n is not on any hash chain, prev[n] is garbage but
- * its value will never be used.
- */
- } while (--n);
- more += wsize;
- }
- if (s->strm->avail_in == 0) return;
-
- /* If there was no sliding:
- * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
- * more == window_size - lookahead - strstart
- * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
- * => more >= window_size - 2*WSIZE + 2
- * In the BIG_MEM or MMAP case (not yet supported),
- * window_size == input_size + MIN_LOOKAHEAD &&
- * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
- * Otherwise, window_size == 2*WSIZE so more >= 2.
- * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
- */
- Assert(more >= 2, "more < 2");
-
- n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
- s->lookahead += n;
-
- /* Initialize the hash value now that we have some input: */
- if (s->lookahead >= MIN_MATCH) {
- s->ins_h = s->window[s->strstart];
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-#if MIN_MATCH != 3
- Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
- }
- /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
- * but this is not important since only literal bytes will be emitted.
- */
-
- } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-}
-
-/* ===========================================================================
- * Flush the current block, with given end-of-file flag.
- * IN assertion: strstart is set to the end of the current match.
- */
-#define FLUSH_BLOCK_ONLY(s, eof) { \
- zlib_tr_flush_block(s, (s->block_start >= 0L ? \
- (char *)&s->window[(unsigned)s->block_start] : \
- NULL), \
- (ulg)((long)s->strstart - s->block_start), \
- (eof)); \
- s->block_start = s->strstart; \
- flush_pending(s->strm); \
- Tracev((stderr,"[FLUSH]")); \
-}
-
-/* Same but force premature exit if necessary. */
-#define FLUSH_BLOCK(s, eof) { \
- FLUSH_BLOCK_ONLY(s, eof); \
- if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
-}
-
-/* ===========================================================================
- * Copy without compression as much as possible from the input stream, return
- * the current block state.
- * This function does not insert new strings in the dictionary since
- * uncompressible data is probably not useful. This function is used
- * only for the level=0 compression option.
- * NOTE: this function should be optimized to avoid extra copying from
- * window to pending_buf.
- */
-static block_state deflate_stored(
- deflate_state *s,
- int flush
-)
-{
- /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
- * to pending_buf_size, and each stored block has a 5 byte header:
- */
- ulg max_block_size = 0xffff;
- ulg max_start;
-
- if (max_block_size > s->pending_buf_size - 5) {
- max_block_size = s->pending_buf_size - 5;
- }
-
- /* Copy as much as possible from input to output: */
- for (;;) {
- /* Fill the window as much as possible: */
- if (s->lookahead <= 1) {
-
- Assert(s->strstart < s->w_size+MAX_DIST(s) ||
- s->block_start >= (long)s->w_size, "slide too late");
-
- fill_window(s);
- if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
-
- if (s->lookahead == 0) break; /* flush the current block */
- }
- Assert(s->block_start >= 0L, "block gone");
-
- s->strstart += s->lookahead;
- s->lookahead = 0;
-
- /* Emit a stored block if pending_buf will be full: */
- max_start = s->block_start + max_block_size;
- if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
- /* strstart == 0 is possible when wraparound on 16-bit machine */
- s->lookahead = (uInt)(s->strstart - max_start);
- s->strstart = (uInt)max_start;
- FLUSH_BLOCK(s, 0);
- }
- /* Flush if we may have to slide, otherwise block_start may become
- * negative and the data will be gone:
- */
- if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
- FLUSH_BLOCK(s, 0);
- }
- }
- FLUSH_BLOCK(s, flush == Z_FINISH);
- return flush == Z_FINISH ? finish_done : block_done;
-}
-
-/* ===========================================================================
- * Compress as much as possible from the input stream, return the current
- * block state.
- * This function does not perform lazy evaluation of matches and inserts
- * new strings in the dictionary only for unmatched strings or for short
- * matches. It is used only for the fast compression options.
- */
-static block_state deflate_fast(
- deflate_state *s,
- int flush
-)
-{
- IPos hash_head = NIL; /* head of the hash chain */
- int bflush; /* set if current block must be flushed */
-
- for (;;) {
- /* Make sure that we always have enough lookahead, except
- * at the end of the input file. We need MAX_MATCH bytes
- * for the next match, plus MIN_MATCH bytes to insert the
- * string following the next match.
- */
- if (s->lookahead < MIN_LOOKAHEAD) {
- fill_window(s);
- if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
- return need_more;
- }
- if (s->lookahead == 0) break; /* flush the current block */
- }
-
- /* Insert the string window[strstart .. strstart+2] in the
- * dictionary, and set hash_head to the head of the hash chain:
- */
- if (s->lookahead >= MIN_MATCH) {
- INSERT_STRING(s, s->strstart, hash_head);
- }
-
- /* Find the longest match, discarding those <= prev_length.
- * At this point we have always match_length < MIN_MATCH
- */
- if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
- /* To simplify the code, we prevent matches with the string
- * of window index 0 (in particular we have to avoid a match
- * of the string with itself at the start of the input file).
- */
- if (s->strategy != Z_HUFFMAN_ONLY) {
- s->match_length = longest_match (s, hash_head);
- }
- /* longest_match() sets match_start */
- }
- if (s->match_length >= MIN_MATCH) {
- check_match(s, s->strstart, s->match_start, s->match_length);
-
- bflush = zlib_tr_tally(s, s->strstart - s->match_start,
- s->match_length - MIN_MATCH);
-
- s->lookahead -= s->match_length;
-
- /* Insert new strings in the hash table only if the match length
- * is not too large. This saves time but degrades compression.
- */
- if (s->match_length <= s->max_insert_length &&
- s->lookahead >= MIN_MATCH) {
- s->match_length--; /* string at strstart already in hash table */
- do {
- s->strstart++;
- INSERT_STRING(s, s->strstart, hash_head);
- /* strstart never exceeds WSIZE-MAX_MATCH, so there are
- * always MIN_MATCH bytes ahead.
- */
- } while (--s->match_length != 0);
- s->strstart++;
- } else {
- s->strstart += s->match_length;
- s->match_length = 0;
- s->ins_h = s->window[s->strstart];
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-#if MIN_MATCH != 3
- Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
- /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
- * matter since it will be recomputed at next deflate call.
- */
- }
- } else {
- /* No match, output a literal byte */
- Tracevv((stderr,"%c", s->window[s->strstart]));
- bflush = zlib_tr_tally (s, 0, s->window[s->strstart]);
- s->lookahead--;
- s->strstart++;
- }
- if (bflush) FLUSH_BLOCK(s, 0);
- }
- FLUSH_BLOCK(s, flush == Z_FINISH);
- return flush == Z_FINISH ? finish_done : block_done;
-}
-
-/* ===========================================================================
- * Same as above, but achieves better compression. We use a lazy
- * evaluation for matches: a match is finally adopted only if there is
- * no better match at the next window position.
- */
-static block_state deflate_slow(
- deflate_state *s,
- int flush
-)
-{
- IPos hash_head = NIL; /* head of hash chain */
- int bflush; /* set if current block must be flushed */
-
- /* Process the input block. */
- for (;;) {
- /* Make sure that we always have enough lookahead, except
- * at the end of the input file. We need MAX_MATCH bytes
- * for the next match, plus MIN_MATCH bytes to insert the
- * string following the next match.
- */
- if (s->lookahead < MIN_LOOKAHEAD) {
- fill_window(s);
- if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
- return need_more;
- }
- if (s->lookahead == 0) break; /* flush the current block */
- }
-
- /* Insert the string window[strstart .. strstart+2] in the
- * dictionary, and set hash_head to the head of the hash chain:
- */
- if (s->lookahead >= MIN_MATCH) {
- INSERT_STRING(s, s->strstart, hash_head);
- }
-
- /* Find the longest match, discarding those <= prev_length.
- */
- s->prev_length = s->match_length, s->prev_match = s->match_start;
- s->match_length = MIN_MATCH-1;
-
- if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
- s->strstart - hash_head <= MAX_DIST(s)) {
- /* To simplify the code, we prevent matches with the string
- * of window index 0 (in particular we have to avoid a match
- * of the string with itself at the start of the input file).
- */
- if (s->strategy != Z_HUFFMAN_ONLY) {
- s->match_length = longest_match (s, hash_head);
- }
- /* longest_match() sets match_start */
-
- if (s->match_length <= 5 && (s->strategy == Z_FILTERED ||
- (s->match_length == MIN_MATCH &&
- s->strstart - s->match_start > TOO_FAR))) {
-
- /* If prev_match is also MIN_MATCH, match_start is garbage
- * but we will ignore the current match anyway.
- */
- s->match_length = MIN_MATCH-1;
- }
- }
- /* If there was a match at the previous step and the current
- * match is not better, output the previous match:
- */
- if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
- uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
- /* Do not insert strings in hash table beyond this. */
-
- check_match(s, s->strstart-1, s->prev_match, s->prev_length);
-
- bflush = zlib_tr_tally(s, s->strstart -1 - s->prev_match,
- s->prev_length - MIN_MATCH);
-
- /* Insert in hash table all strings up to the end of the match.
- * strstart-1 and strstart are already inserted. If there is not
- * enough lookahead, the last two strings are not inserted in
- * the hash table.
- */
- s->lookahead -= s->prev_length-1;
- s->prev_length -= 2;
- do {
- if (++s->strstart <= max_insert) {
- INSERT_STRING(s, s->strstart, hash_head);
- }
- } while (--s->prev_length != 0);
- s->match_available = 0;
- s->match_length = MIN_MATCH-1;
- s->strstart++;
-
- if (bflush) FLUSH_BLOCK(s, 0);
-
- } else if (s->match_available) {
- /* If there was no match at the previous position, output a
- * single literal. If there was a match but the current match
- * is longer, truncate the previous match to a single literal.
- */
- Tracevv((stderr,"%c", s->window[s->strstart-1]));
- if (zlib_tr_tally (s, 0, s->window[s->strstart-1])) {
- FLUSH_BLOCK_ONLY(s, 0);
- }
- s->strstart++;
- s->lookahead--;
- if (s->strm->avail_out == 0) return need_more;
- } else {
- /* There is no previous match to compare with, wait for
- * the next step to decide.
- */
- s->match_available = 1;
- s->strstart++;
- s->lookahead--;
- }
- }
- Assert (flush != Z_NO_FLUSH, "no flush?");
- if (s->match_available) {
- Tracevv((stderr,"%c", s->window[s->strstart-1]));
- zlib_tr_tally (s, 0, s->window[s->strstart-1]);
- s->match_available = 0;
- }
- FLUSH_BLOCK(s, flush == Z_FINISH);
- return flush == Z_FINISH ? finish_done : block_done;
-}
-
-int zlib_deflate_workspacesize(int windowBits, int memLevel)
-{
- if (windowBits < 0) /* undocumented feature: suppress zlib header */
- windowBits = -windowBits;
-
- /* Since the return value is typically passed to vmalloc() unchecked... */
- BUG_ON(memLevel < 1 || memLevel > MAX_MEM_LEVEL || windowBits < 9 ||
- windowBits > 15);
-
- return sizeof(deflate_workspace)
- + zlib_deflate_window_memsize(windowBits)
- + zlib_deflate_prev_memsize(windowBits)
- + zlib_deflate_head_memsize(memLevel)
- + zlib_deflate_overlay_memsize(memLevel);
-}
+++ /dev/null
-/* +++ trees.c */
-/* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1995-1996 Jean-loup Gailly
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/*
- * ALGORITHM
- *
- * The "deflation" process uses several Huffman trees. The more
- * common source values are represented by shorter bit sequences.
- *
- * Each code tree is stored in a compressed form which is itself
- * a Huffman encoding of the lengths of all the code strings (in
- * ascending order by source values). The actual code strings are
- * reconstructed from the lengths in the inflate process, as described
- * in the deflate specification.
- *
- * REFERENCES
- *
- * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
- * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
- *
- * Storer, James A.
- * Data Compression: Methods and Theory, pp. 49-50.
- * Computer Science Press, 1988. ISBN 0-7167-8156-5.
- *
- * Sedgewick, R.
- * Algorithms, p290.
- * Addison-Wesley, 1983. ISBN 0-201-06672-6.
- */
-
-/* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */
-
-/* #include "deflate.h" */
-
-#include <linux/zutil.h>
-#include <linux/bitrev.h>
-#include "defutil.h"
-
-#ifdef DEBUG_ZLIB
-# include <ctype.h>
-#endif
-
-/* ===========================================================================
- * Constants
- */
-
-#define MAX_BL_BITS 7
-/* Bit length codes must not exceed MAX_BL_BITS bits */
-
-#define END_BLOCK 256
-/* end of block literal code */
-
-#define REP_3_6 16
-/* repeat previous bit length 3-6 times (2 bits of repeat count) */
-
-#define REPZ_3_10 17
-/* repeat a zero length 3-10 times (3 bits of repeat count) */
-
-#define REPZ_11_138 18
-/* repeat a zero length 11-138 times (7 bits of repeat count) */
-
-static const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
- = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
-
-static const int extra_dbits[D_CODES] /* extra bits for each distance code */
- = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
-
-static const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
- = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
-
-static const uch bl_order[BL_CODES]
- = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
-/* The lengths of the bit length codes are sent in order of decreasing
- * probability, to avoid transmitting the lengths for unused bit length codes.
- */
-
-#define Buf_size (8 * 2*sizeof(char))
-/* Number of bits used within bi_buf. (bi_buf might be implemented on
- * more than 16 bits on some systems.)
- */
-
-/* ===========================================================================
- * Local data. These are initialized only once.
- */
-
-static ct_data static_ltree[L_CODES+2];
-/* The static literal tree. Since the bit lengths are imposed, there is no
- * need for the L_CODES extra codes used during heap construction. However
- * The codes 286 and 287 are needed to build a canonical tree (see zlib_tr_init
- * below).
- */
-
-static ct_data static_dtree[D_CODES];
-/* The static distance tree. (Actually a trivial tree since all codes use
- * 5 bits.)
- */
-
-static uch dist_code[512];
-/* distance codes. The first 256 values correspond to the distances
- * 3 .. 258, the last 256 values correspond to the top 8 bits of
- * the 15 bit distances.
- */
-
-static uch length_code[MAX_MATCH-MIN_MATCH+1];
-/* length code for each normalized match length (0 == MIN_MATCH) */
-
-static int base_length[LENGTH_CODES];
-/* First normalized length for each code (0 = MIN_MATCH) */
-
-static int base_dist[D_CODES];
-/* First normalized distance for each code (0 = distance of 1) */
-
-struct static_tree_desc_s {
- const ct_data *static_tree; /* static tree or NULL */
- const int *extra_bits; /* extra bits for each code or NULL */
- int extra_base; /* base index for extra_bits */
- int elems; /* max number of elements in the tree */
- int max_length; /* max bit length for the codes */
-};
-
-static static_tree_desc static_l_desc =
-{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
-
-static static_tree_desc static_d_desc =
-{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
-
-static static_tree_desc static_bl_desc =
-{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
-
-/* ===========================================================================
- * Local (static) routines in this file.
- */
-
-static void tr_static_init (void);
-static void init_block (deflate_state *s);
-static void pqdownheap (deflate_state *s, ct_data *tree, int k);
-static void gen_bitlen (deflate_state *s, tree_desc *desc);
-static void gen_codes (ct_data *tree, int max_code, ush *bl_count);
-static void build_tree (deflate_state *s, tree_desc *desc);
-static void scan_tree (deflate_state *s, ct_data *tree, int max_code);
-static void send_tree (deflate_state *s, ct_data *tree, int max_code);
-static int build_bl_tree (deflate_state *s);
-static void send_all_trees (deflate_state *s, int lcodes, int dcodes,
- int blcodes);
-static void compress_block (deflate_state *s, ct_data *ltree,
- ct_data *dtree);
-static void set_data_type (deflate_state *s);
-static void bi_windup (deflate_state *s);
-static void bi_flush (deflate_state *s);
-static void copy_block (deflate_state *s, char *buf, unsigned len,
- int header);
-
-#ifndef DEBUG_ZLIB
-# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
- /* Send a code of the given tree. c and tree must not have side effects */
-
-#else /* DEBUG_ZLIB */
-# define send_code(s, c, tree) \
- { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
- send_bits(s, tree[c].Code, tree[c].Len); }
-#endif
-
-#define d_code(dist) \
- ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)])
-/* Mapping from a distance to a distance code. dist is the distance - 1 and
- * must not have side effects. dist_code[256] and dist_code[257] are never
- * used.
- */
-
-/* ===========================================================================
- * Send a value on a given number of bits.
- * IN assertion: length <= 16 and value fits in length bits.
- */
-#ifdef DEBUG_ZLIB
-static void send_bits (deflate_state *s, int value, int length);
-
-static void send_bits(
- deflate_state *s,
- int value, /* value to send */
- int length /* number of bits */
-)
-{
- Tracevv((stderr," l %2d v %4x ", length, value));
- Assert(length > 0 && length <= 15, "invalid length");
- s->bits_sent += (ulg)length;
-
- /* If not enough room in bi_buf, use (valid) bits from bi_buf and
- * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
- * unused bits in value.
- */
- if (s->bi_valid > (int)Buf_size - length) {
- s->bi_buf |= (value << s->bi_valid);
- put_short(s, s->bi_buf);
- s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
- s->bi_valid += length - Buf_size;
- } else {
- s->bi_buf |= value << s->bi_valid;
- s->bi_valid += length;
- }
-}
-#else /* !DEBUG_ZLIB */
-
-#define send_bits(s, value, length) \
-{ int len = length;\
- if (s->bi_valid > (int)Buf_size - len) {\
- int val = value;\
- s->bi_buf |= (val << s->bi_valid);\
- put_short(s, s->bi_buf);\
- s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
- s->bi_valid += len - Buf_size;\
- } else {\
- s->bi_buf |= (value) << s->bi_valid;\
- s->bi_valid += len;\
- }\
-}
-#endif /* DEBUG_ZLIB */
-
-/* ===========================================================================
- * Initialize the various 'constant' tables. In a multi-threaded environment,
- * this function may be called by two threads concurrently, but this is
- * harmless since both invocations do exactly the same thing.
- */
-static void tr_static_init(void)
-{
- static int static_init_done;
- int n; /* iterates over tree elements */
- int bits; /* bit counter */
- int length; /* length value */
- int code; /* code value */
- int dist; /* distance index */
- ush bl_count[MAX_BITS+1];
- /* number of codes at each bit length for an optimal tree */
-
- if (static_init_done) return;
-
- /* Initialize the mapping length (0..255) -> length code (0..28) */
- length = 0;
- for (code = 0; code < LENGTH_CODES-1; code++) {
- base_length[code] = length;
- for (n = 0; n < (1<<extra_lbits[code]); n++) {
- length_code[length++] = (uch)code;
- }
- }
- Assert (length == 256, "tr_static_init: length != 256");
- /* Note that the length 255 (match length 258) can be represented
- * in two different ways: code 284 + 5 bits or code 285, so we
- * overwrite length_code[255] to use the best encoding:
- */
- length_code[length-1] = (uch)code;
-
- /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
- dist = 0;
- for (code = 0 ; code < 16; code++) {
- base_dist[code] = dist;
- for (n = 0; n < (1<<extra_dbits[code]); n++) {
- dist_code[dist++] = (uch)code;
- }
- }
- Assert (dist == 256, "tr_static_init: dist != 256");
- dist >>= 7; /* from now on, all distances are divided by 128 */
- for ( ; code < D_CODES; code++) {
- base_dist[code] = dist << 7;
- for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
- dist_code[256 + dist++] = (uch)code;
- }
- }
- Assert (dist == 256, "tr_static_init: 256+dist != 512");
-
- /* Construct the codes of the static literal tree */
- for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
- n = 0;
- while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
- while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
- while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
- while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
- /* Codes 286 and 287 do not exist, but we must include them in the
- * tree construction to get a canonical Huffman tree (longest code
- * all ones)
- */
- gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
-
- /* The static distance tree is trivial: */
- for (n = 0; n < D_CODES; n++) {
- static_dtree[n].Len = 5;
- static_dtree[n].Code = bitrev32((u32)n) >> (32 - 5);
- }
- static_init_done = 1;
-}
-
-/* ===========================================================================
- * Initialize the tree data structures for a new zlib stream.
- */
-void zlib_tr_init(
- deflate_state *s
-)
-{
- tr_static_init();
-
- s->compressed_len = 0L;
-
- s->l_desc.dyn_tree = s->dyn_ltree;
- s->l_desc.stat_desc = &static_l_desc;
-
- s->d_desc.dyn_tree = s->dyn_dtree;
- s->d_desc.stat_desc = &static_d_desc;
-
- s->bl_desc.dyn_tree = s->bl_tree;
- s->bl_desc.stat_desc = &static_bl_desc;
-
- s->bi_buf = 0;
- s->bi_valid = 0;
- s->last_eob_len = 8; /* enough lookahead for inflate */
-#ifdef DEBUG_ZLIB
- s->bits_sent = 0L;
-#endif
-
- /* Initialize the first block of the first file: */
- init_block(s);
-}
-
-/* ===========================================================================
- * Initialize a new block.
- */
-static void init_block(
- deflate_state *s
-)
-{
- int n; /* iterates over tree elements */
-
- /* Initialize the trees. */
- for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
- for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
- for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
-
- s->dyn_ltree[END_BLOCK].Freq = 1;
- s->opt_len = s->static_len = 0L;
- s->last_lit = s->matches = 0;
-}
-
-#define SMALLEST 1
-/* Index within the heap array of least frequent node in the Huffman tree */
-
-
-/* ===========================================================================
- * Remove the smallest element from the heap and recreate the heap with
- * one less element. Updates heap and heap_len.
- */
-#define pqremove(s, tree, top) \
-{\
- top = s->heap[SMALLEST]; \
- s->heap[SMALLEST] = s->heap[s->heap_len--]; \
- pqdownheap(s, tree, SMALLEST); \
-}
-
-/* ===========================================================================
- * Compares to subtrees, using the tree depth as tie breaker when
- * the subtrees have equal frequency. This minimizes the worst case length.
- */
-#define smaller(tree, n, m, depth) \
- (tree[n].Freq < tree[m].Freq || \
- (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
-
-/* ===========================================================================
- * Restore the heap property by moving down the tree starting at node k,
- * exchanging a node with the smallest of its two sons if necessary, stopping
- * when the heap property is re-established (each father smaller than its
- * two sons).
- */
-static void pqdownheap(
- deflate_state *s,
- ct_data *tree, /* the tree to restore */
- int k /* node to move down */
-)
-{
- int v = s->heap[k];
- int j = k << 1; /* left son of k */
- while (j <= s->heap_len) {
- /* Set j to the smallest of the two sons: */
- if (j < s->heap_len &&
- smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
- j++;
- }
- /* Exit if v is smaller than both sons */
- if (smaller(tree, v, s->heap[j], s->depth)) break;
-
- /* Exchange v with the smallest son */
- s->heap[k] = s->heap[j]; k = j;
-
- /* And continue down the tree, setting j to the left son of k */
- j <<= 1;
- }
- s->heap[k] = v;
-}
-
-/* ===========================================================================
- * Compute the optimal bit lengths for a tree and update the total bit length
- * for the current block.
- * IN assertion: the fields freq and dad are set, heap[heap_max] and
- * above are the tree nodes sorted by increasing frequency.
- * OUT assertions: the field len is set to the optimal bit length, the
- * array bl_count contains the frequencies for each bit length.
- * The length opt_len is updated; static_len is also updated if stree is
- * not null.
- */
-static void gen_bitlen(
- deflate_state *s,
- tree_desc *desc /* the tree descriptor */
-)
-{
- ct_data *tree = desc->dyn_tree;
- int max_code = desc->max_code;
- const ct_data *stree = desc->stat_desc->static_tree;
- const int *extra = desc->stat_desc->extra_bits;
- int base = desc->stat_desc->extra_base;
- int max_length = desc->stat_desc->max_length;
- int h; /* heap index */
- int n, m; /* iterate over the tree elements */
- int bits; /* bit length */
- int xbits; /* extra bits */
- ush f; /* frequency */
- int overflow = 0; /* number of elements with bit length too large */
-
- for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
-
- /* In a first pass, compute the optimal bit lengths (which may
- * overflow in the case of the bit length tree).
- */
- tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
-
- for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
- n = s->heap[h];
- bits = tree[tree[n].Dad].Len + 1;
- if (bits > max_length) bits = max_length, overflow++;
- tree[n].Len = (ush)bits;
- /* We overwrite tree[n].Dad which is no longer needed */
-
- if (n > max_code) continue; /* not a leaf node */
-
- s->bl_count[bits]++;
- xbits = 0;
- if (n >= base) xbits = extra[n-base];
- f = tree[n].Freq;
- s->opt_len += (ulg)f * (bits + xbits);
- if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
- }
- if (overflow == 0) return;
-
- Trace((stderr,"\nbit length overflow\n"));
- /* This happens for example on obj2 and pic of the Calgary corpus */
-
- /* Find the first bit length which could increase: */
- do {
- bits = max_length-1;
- while (s->bl_count[bits] == 0) bits--;
- s->bl_count[bits]--; /* move one leaf down the tree */
- s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
- s->bl_count[max_length]--;
- /* The brother of the overflow item also moves one step up,
- * but this does not affect bl_count[max_length]
- */
- overflow -= 2;
- } while (overflow > 0);
-
- /* Now recompute all bit lengths, scanning in increasing frequency.
- * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
- * lengths instead of fixing only the wrong ones. This idea is taken
- * from 'ar' written by Haruhiko Okumura.)
- */
- for (bits = max_length; bits != 0; bits--) {
- n = s->bl_count[bits];
- while (n != 0) {
- m = s->heap[--h];
- if (m > max_code) continue;
- if (tree[m].Len != (unsigned) bits) {
- Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
- s->opt_len += ((long)bits - (long)tree[m].Len)
- *(long)tree[m].Freq;
- tree[m].Len = (ush)bits;
- }
- n--;
- }
- }
-}
-
-/* ===========================================================================
- * Generate the codes for a given tree and bit counts (which need not be
- * optimal).
- * IN assertion: the array bl_count contains the bit length statistics for
- * the given tree and the field len is set for all tree elements.
- * OUT assertion: the field code is set for all tree elements of non
- * zero code length.
- */
-static void gen_codes(
- ct_data *tree, /* the tree to decorate */
- int max_code, /* largest code with non zero frequency */
- ush *bl_count /* number of codes at each bit length */
-)
-{
- ush next_code[MAX_BITS+1]; /* next code value for each bit length */
- ush code = 0; /* running code value */
- int bits; /* bit index */
- int n; /* code index */
-
- /* The distribution counts are first used to generate the code values
- * without bit reversal.
- */
- for (bits = 1; bits <= MAX_BITS; bits++) {
- next_code[bits] = code = (code + bl_count[bits-1]) << 1;
- }
- /* Check that the bit counts in bl_count are consistent. The last code
- * must be all ones.
- */
- Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
- "inconsistent bit counts");
- Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
-
- for (n = 0; n <= max_code; n++) {
- int len = tree[n].Len;
- if (len == 0) continue;
- /* Now reverse the bits */
- tree[n].Code = bitrev32((u32)(next_code[len]++)) >> (32 - len);
-
- Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
- n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
- }
-}
-
-/* ===========================================================================
- * Construct one Huffman tree and assigns the code bit strings and lengths.
- * Update the total bit length for the current block.
- * IN assertion: the field freq is set for all tree elements.
- * OUT assertions: the fields len and code are set to the optimal bit length
- * and corresponding code. The length opt_len is updated; static_len is
- * also updated if stree is not null. The field max_code is set.
- */
-static void build_tree(
- deflate_state *s,
- tree_desc *desc /* the tree descriptor */
-)
-{
- ct_data *tree = desc->dyn_tree;
- const ct_data *stree = desc->stat_desc->static_tree;
- int elems = desc->stat_desc->elems;
- int n, m; /* iterate over heap elements */
- int max_code = -1; /* largest code with non zero frequency */
- int node; /* new node being created */
-
- /* Construct the initial heap, with least frequent element in
- * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
- * heap[0] is not used.
- */
- s->heap_len = 0, s->heap_max = HEAP_SIZE;
-
- for (n = 0; n < elems; n++) {
- if (tree[n].Freq != 0) {
- s->heap[++(s->heap_len)] = max_code = n;
- s->depth[n] = 0;
- } else {
- tree[n].Len = 0;
- }
- }
-
- /* The pkzip format requires that at least one distance code exists,
- * and that at least one bit should be sent even if there is only one
- * possible code. So to avoid special checks later on we force at least
- * two codes of non zero frequency.
- */
- while (s->heap_len < 2) {
- node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
- tree[node].Freq = 1;
- s->depth[node] = 0;
- s->opt_len--; if (stree) s->static_len -= stree[node].Len;
- /* node is 0 or 1 so it does not have extra bits */
- }
- desc->max_code = max_code;
-
- /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
- * establish sub-heaps of increasing lengths:
- */
- for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
-
- /* Construct the Huffman tree by repeatedly combining the least two
- * frequent nodes.
- */
- node = elems; /* next internal node of the tree */
- do {
- pqremove(s, tree, n); /* n = node of least frequency */
- m = s->heap[SMALLEST]; /* m = node of next least frequency */
-
- s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
- s->heap[--(s->heap_max)] = m;
-
- /* Create a new node father of n and m */
- tree[node].Freq = tree[n].Freq + tree[m].Freq;
- s->depth[node] = (uch) (max(s->depth[n], s->depth[m]) + 1);
- tree[n].Dad = tree[m].Dad = (ush)node;
-#ifdef DUMP_BL_TREE
- if (tree == s->bl_tree) {
- fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
- node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
- }
-#endif
- /* and insert the new node in the heap */
- s->heap[SMALLEST] = node++;
- pqdownheap(s, tree, SMALLEST);
-
- } while (s->heap_len >= 2);
-
- s->heap[--(s->heap_max)] = s->heap[SMALLEST];
-
- /* At this point, the fields freq and dad are set. We can now
- * generate the bit lengths.
- */
- gen_bitlen(s, (tree_desc *)desc);
-
- /* The field len is now set, we can generate the bit codes */
- gen_codes ((ct_data *)tree, max_code, s->bl_count);
-}
-
-/* ===========================================================================
- * Scan a literal or distance tree to determine the frequencies of the codes
- * in the bit length tree.
- */
-static void scan_tree(
- deflate_state *s,
- ct_data *tree, /* the tree to be scanned */
- int max_code /* and its largest code of non zero frequency */
-)
-{
- int n; /* iterates over all tree elements */
- int prevlen = -1; /* last emitted length */
- int curlen; /* length of current code */
- int nextlen = tree[0].Len; /* length of next code */
- int count = 0; /* repeat count of the current code */
- int max_count = 7; /* max repeat count */
- int min_count = 4; /* min repeat count */
-
- if (nextlen == 0) max_count = 138, min_count = 3;
- tree[max_code+1].Len = (ush)0xffff; /* guard */
-
- for (n = 0; n <= max_code; n++) {
- curlen = nextlen; nextlen = tree[n+1].Len;
- if (++count < max_count && curlen == nextlen) {
- continue;
- } else if (count < min_count) {
- s->bl_tree[curlen].Freq += count;
- } else if (curlen != 0) {
- if (curlen != prevlen) s->bl_tree[curlen].Freq++;
- s->bl_tree[REP_3_6].Freq++;
- } else if (count <= 10) {
- s->bl_tree[REPZ_3_10].Freq++;
- } else {
- s->bl_tree[REPZ_11_138].Freq++;
- }
- count = 0; prevlen = curlen;
- if (nextlen == 0) {
- max_count = 138, min_count = 3;
- } else if (curlen == nextlen) {
- max_count = 6, min_count = 3;
- } else {
- max_count = 7, min_count = 4;
- }
- }
-}
-
-/* ===========================================================================
- * Send a literal or distance tree in compressed form, using the codes in
- * bl_tree.
- */
-static void send_tree(
- deflate_state *s,
- ct_data *tree, /* the tree to be scanned */
- int max_code /* and its largest code of non zero frequency */
-)
-{
- int n; /* iterates over all tree elements */
- int prevlen = -1; /* last emitted length */
- int curlen; /* length of current code */
- int nextlen = tree[0].Len; /* length of next code */
- int count = 0; /* repeat count of the current code */
- int max_count = 7; /* max repeat count */
- int min_count = 4; /* min repeat count */
-
- /* tree[max_code+1].Len = -1; */ /* guard already set */
- if (nextlen == 0) max_count = 138, min_count = 3;
-
- for (n = 0; n <= max_code; n++) {
- curlen = nextlen; nextlen = tree[n+1].Len;
- if (++count < max_count && curlen == nextlen) {
- continue;
- } else if (count < min_count) {
- do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
-
- } else if (curlen != 0) {
- if (curlen != prevlen) {
- send_code(s, curlen, s->bl_tree); count--;
- }
- Assert(count >= 3 && count <= 6, " 3_6?");
- send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
-
- } else if (count <= 10) {
- send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
-
- } else {
- send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
- }
- count = 0; prevlen = curlen;
- if (nextlen == 0) {
- max_count = 138, min_count = 3;
- } else if (curlen == nextlen) {
- max_count = 6, min_count = 3;
- } else {
- max_count = 7, min_count = 4;
- }
- }
-}
-
-/* ===========================================================================
- * Construct the Huffman tree for the bit lengths and return the index in
- * bl_order of the last bit length code to send.
- */
-static int build_bl_tree(
- deflate_state *s
-)
-{
- int max_blindex; /* index of last bit length code of non zero freq */
-
- /* Determine the bit length frequencies for literal and distance trees */
- scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
- scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
-
- /* Build the bit length tree: */
- build_tree(s, (tree_desc *)(&(s->bl_desc)));
- /* opt_len now includes the length of the tree representations, except
- * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
- */
-
- /* Determine the number of bit length codes to send. The pkzip format
- * requires that at least 4 bit length codes be sent. (appnote.txt says
- * 3 but the actual value used is 4.)
- */
- for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
- if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
- }
- /* Update opt_len to include the bit length tree and counts */
- s->opt_len += 3*(max_blindex+1) + 5+5+4;
- Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
- s->opt_len, s->static_len));
-
- return max_blindex;
-}
-
-/* ===========================================================================
- * Send the header for a block using dynamic Huffman trees: the counts, the
- * lengths of the bit length codes, the literal tree and the distance tree.
- * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
- */
-static void send_all_trees(
- deflate_state *s,
- int lcodes, /* number of codes for each tree */
- int dcodes, /* number of codes for each tree */
- int blcodes /* number of codes for each tree */
-)
-{
- int rank; /* index in bl_order */
-
- Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
- Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
- "too many codes");
- Tracev((stderr, "\nbl counts: "));
- send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
- send_bits(s, dcodes-1, 5);
- send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
- for (rank = 0; rank < blcodes; rank++) {
- Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
- send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
- }
- Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
-
- send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
- Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
-
- send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
- Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
-}
-
-/* ===========================================================================
- * Send a stored block
- */
-void zlib_tr_stored_block(
- deflate_state *s,
- char *buf, /* input block */
- ulg stored_len, /* length of input block */
- int eof /* true if this is the last block for a file */
-)
-{
- send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
- s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
- s->compressed_len += (stored_len + 4) << 3;
-
- copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
-}
-
-/* Send just the `stored block' type code without any length bytes or data.
- */
-void zlib_tr_stored_type_only(
- deflate_state *s
-)
-{
- send_bits(s, (STORED_BLOCK << 1), 3);
- bi_windup(s);
- s->compressed_len = (s->compressed_len + 3) & ~7L;
-}
-
-
-/* ===========================================================================
- * Send one empty static block to give enough lookahead for inflate.
- * This takes 10 bits, of which 7 may remain in the bit buffer.
- * The current inflate code requires 9 bits of lookahead. If the
- * last two codes for the previous block (real code plus EOB) were coded
- * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
- * the last real code. In this case we send two empty static blocks instead
- * of one. (There are no problems if the previous block is stored or fixed.)
- * To simplify the code, we assume the worst case of last real code encoded
- * on one bit only.
- */
-void zlib_tr_align(
- deflate_state *s
-)
-{
- send_bits(s, STATIC_TREES<<1, 3);
- send_code(s, END_BLOCK, static_ltree);
- s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
- bi_flush(s);
- /* Of the 10 bits for the empty block, we have already sent
- * (10 - bi_valid) bits. The lookahead for the last real code (before
- * the EOB of the previous block) was thus at least one plus the length
- * of the EOB plus what we have just sent of the empty static block.
- */
- if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
- send_bits(s, STATIC_TREES<<1, 3);
- send_code(s, END_BLOCK, static_ltree);
- s->compressed_len += 10L;
- bi_flush(s);
- }
- s->last_eob_len = 7;
-}
-
-/* ===========================================================================
- * Determine the best encoding for the current block: dynamic trees, static
- * trees or store, and output the encoded block to the zip file. This function
- * returns the total compressed length for the file so far.
- */
-ulg zlib_tr_flush_block(
- deflate_state *s,
- char *buf, /* input block, or NULL if too old */
- ulg stored_len, /* length of input block */
- int eof /* true if this is the last block for a file */
-)
-{
- ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
- int max_blindex = 0; /* index of last bit length code of non zero freq */
-
- /* Build the Huffman trees unless a stored block is forced */
- if (s->level > 0) {
-
- /* Check if the file is ascii or binary */
- if (s->data_type == Z_UNKNOWN) set_data_type(s);
-
- /* Construct the literal and distance trees */
- build_tree(s, (tree_desc *)(&(s->l_desc)));
- Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
- s->static_len));
-
- build_tree(s, (tree_desc *)(&(s->d_desc)));
- Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
- s->static_len));
- /* At this point, opt_len and static_len are the total bit lengths of
- * the compressed block data, excluding the tree representations.
- */
-
- /* Build the bit length tree for the above two trees, and get the index
- * in bl_order of the last bit length code to send.
- */
- max_blindex = build_bl_tree(s);
-
- /* Determine the best encoding. Compute first the block length in bytes*/
- opt_lenb = (s->opt_len+3+7)>>3;
- static_lenb = (s->static_len+3+7)>>3;
-
- Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
- opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
- s->last_lit));
-
- if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
-
- } else {
- Assert(buf != (char*)0, "lost buf");
- opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
- }
-
- /* If compression failed and this is the first and last block,
- * and if the .zip file can be seeked (to rewrite the local header),
- * the whole file is transformed into a stored file:
- */
-#ifdef STORED_FILE_OK
-# ifdef FORCE_STORED_FILE
- if (eof && s->compressed_len == 0L) { /* force stored file */
-# else
- if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) {
-# endif
- /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
- if (buf == (char*)0) error ("block vanished");
-
- copy_block(s, buf, (unsigned)stored_len, 0); /* without header */
- s->compressed_len = stored_len << 3;
- s->method = STORED;
- } else
-#endif /* STORED_FILE_OK */
-
-#ifdef FORCE_STORED
- if (buf != (char*)0) { /* force stored block */
-#else
- if (stored_len+4 <= opt_lenb && buf != (char*)0) {
- /* 4: two words for the lengths */
-#endif
- /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
- * Otherwise we can't have processed more than WSIZE input bytes since
- * the last block flush, because compression would have been
- * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
- * transform a block into a stored block.
- */
- zlib_tr_stored_block(s, buf, stored_len, eof);
-
-#ifdef FORCE_STATIC
- } else if (static_lenb >= 0) { /* force static trees */
-#else
- } else if (static_lenb == opt_lenb) {
-#endif
- send_bits(s, (STATIC_TREES<<1)+eof, 3);
- compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
- s->compressed_len += 3 + s->static_len;
- } else {
- send_bits(s, (DYN_TREES<<1)+eof, 3);
- send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
- max_blindex+1);
- compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
- s->compressed_len += 3 + s->opt_len;
- }
- Assert (s->compressed_len == s->bits_sent, "bad compressed size");
- init_block(s);
-
- if (eof) {
- bi_windup(s);
- s->compressed_len += 7; /* align on byte boundary */
- }
- Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
- s->compressed_len-7*eof));
-
- return s->compressed_len >> 3;
-}
-
-/* ===========================================================================
- * Save the match info and tally the frequency counts. Return true if
- * the current block must be flushed.
- */
-int zlib_tr_tally(
- deflate_state *s,
- unsigned dist, /* distance of matched string */
- unsigned lc /* match length-MIN_MATCH or unmatched char (if dist==0) */
-)
-{
- s->d_buf[s->last_lit] = (ush)dist;
- s->l_buf[s->last_lit++] = (uch)lc;
- if (dist == 0) {
- /* lc is the unmatched char */
- s->dyn_ltree[lc].Freq++;
- } else {
- s->matches++;
- /* Here, lc is the match length - MIN_MATCH */
- dist--; /* dist = match distance - 1 */
- Assert((ush)dist < (ush)MAX_DIST(s) &&
- (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
- (ush)d_code(dist) < (ush)D_CODES, "zlib_tr_tally: bad match");
-
- s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++;
- s->dyn_dtree[d_code(dist)].Freq++;
- }
-
- /* Try to guess if it is profitable to stop the current block here */
- if ((s->last_lit & 0xfff) == 0 && s->level > 2) {
- /* Compute an upper bound for the compressed length */
- ulg out_length = (ulg)s->last_lit*8L;
- ulg in_length = (ulg)((long)s->strstart - s->block_start);
- int dcode;
- for (dcode = 0; dcode < D_CODES; dcode++) {
- out_length += (ulg)s->dyn_dtree[dcode].Freq *
- (5L+extra_dbits[dcode]);
- }
- out_length >>= 3;
- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
- s->last_lit, in_length, out_length,
- 100L - out_length*100L/in_length));
- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
- }
- return (s->last_lit == s->lit_bufsize-1);
- /* We avoid equality with lit_bufsize because of wraparound at 64K
- * on 16 bit machines and because stored blocks are restricted to
- * 64K-1 bytes.
- */
-}
-
-/* ===========================================================================
- * Send the block data compressed using the given Huffman trees
- */
-static void compress_block(
- deflate_state *s,
- ct_data *ltree, /* literal tree */
- ct_data *dtree /* distance tree */
-)
-{
- unsigned dist; /* distance of matched string */
- int lc; /* match length or unmatched char (if dist == 0) */
- unsigned lx = 0; /* running index in l_buf */
- unsigned code; /* the code to send */
- int extra; /* number of extra bits to send */
-
- if (s->last_lit != 0) do {
- dist = s->d_buf[lx];
- lc = s->l_buf[lx++];
- if (dist == 0) {
- send_code(s, lc, ltree); /* send a literal byte */
- Tracecv(isgraph(lc), (stderr," '%c' ", lc));
- } else {
- /* Here, lc is the match length - MIN_MATCH */
- code = length_code[lc];
- send_code(s, code+LITERALS+1, ltree); /* send the length code */
- extra = extra_lbits[code];
- if (extra != 0) {
- lc -= base_length[code];
- send_bits(s, lc, extra); /* send the extra length bits */
- }
- dist--; /* dist is now the match distance - 1 */
- code = d_code(dist);
- Assert (code < D_CODES, "bad d_code");
-
- send_code(s, code, dtree); /* send the distance code */
- extra = extra_dbits[code];
- if (extra != 0) {
- dist -= base_dist[code];
- send_bits(s, dist, extra); /* send the extra distance bits */
- }
- } /* literal or match pair ? */
-
- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
- Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow");
-
- } while (lx < s->last_lit);
-
- send_code(s, END_BLOCK, ltree);
- s->last_eob_len = ltree[END_BLOCK].Len;
-}
-
-/* ===========================================================================
- * Set the data type to ASCII or BINARY, using a crude approximation:
- * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
- * IN assertion: the fields freq of dyn_ltree are set and the total of all
- * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
- */
-static void set_data_type(
- deflate_state *s
-)
-{
- int n = 0;
- unsigned ascii_freq = 0;
- unsigned bin_freq = 0;
- while (n < 7) bin_freq += s->dyn_ltree[n++].Freq;
- while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq;
- while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
- s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
-}
-
-/* ===========================================================================
- * Copy a stored block, storing first the length and its
- * one's complement if requested.
- */
-static void copy_block(
- deflate_state *s,
- char *buf, /* the input data */
- unsigned len, /* its length */
- int header /* true if block header must be written */
-)
-{
- bi_windup(s); /* align on byte boundary */
- s->last_eob_len = 8; /* enough lookahead for inflate */
-
- if (header) {
- put_short(s, (ush)len);
- put_short(s, (ush)~len);
-#ifdef DEBUG_ZLIB
- s->bits_sent += 2*16;
-#endif
- }
-#ifdef DEBUG_ZLIB
- s->bits_sent += (ulg)len<<3;
-#endif
- /* bundle up the put_byte(s, *buf++) calls */
- memcpy(&s->pending_buf[s->pending], buf, len);
- s->pending += len;
-}
-
+++ /dev/null
-
-
-
-#define Assert(err, str)
-#define Trace(dummy)
-#define Tracev(dummy)
-#define Tracecv(err, dummy)
-#define Tracevv(dummy)
-
-
-
-#define LENGTH_CODES 29
-/* number of length codes, not counting the special END_BLOCK code */
-
-#define LITERALS 256
-/* number of literal bytes 0..255 */
-
-#define L_CODES (LITERALS+1+LENGTH_CODES)
-/* number of Literal or Length codes, including the END_BLOCK code */
-
-#define D_CODES 30
-/* number of distance codes */
-
-#define BL_CODES 19
-/* number of codes used to transfer the bit lengths */
-
-#define HEAP_SIZE (2*L_CODES+1)
-/* maximum heap size */
-
-#define MAX_BITS 15
-/* All codes must not exceed MAX_BITS bits */
-
-#define INIT_STATE 42
-#define BUSY_STATE 113
-#define FINISH_STATE 666
-/* Stream status */
-
-
-/* Data structure describing a single value and its code string. */
-typedef struct ct_data_s {
- union {
- ush freq; /* frequency count */
- ush code; /* bit string */
- } fc;
- union {
- ush dad; /* father node in Huffman tree */
- ush len; /* length of bit string */
- } dl;
-} ct_data;
-
-#define Freq fc.freq
-#define Code fc.code
-#define Dad dl.dad
-#define Len dl.len
-
-typedef struct static_tree_desc_s static_tree_desc;
-
-typedef struct tree_desc_s {
- ct_data *dyn_tree; /* the dynamic tree */
- int max_code; /* largest code with non zero frequency */
- static_tree_desc *stat_desc; /* the corresponding static tree */
-} tree_desc;
-
-typedef ush Pos;
-typedef unsigned IPos;
-
-/* A Pos is an index in the character window. We use short instead of int to
- * save space in the various tables. IPos is used only for parameter passing.
- */
-
-typedef struct deflate_state {
- z_streamp strm; /* pointer back to this zlib stream */
- int status; /* as the name implies */
- Byte *pending_buf; /* output still pending */
- ulg pending_buf_size; /* size of pending_buf */
- Byte *pending_out; /* next pending byte to output to the stream */
- int pending; /* nb of bytes in the pending buffer */
- int noheader; /* suppress zlib header and adler32 */
- Byte data_type; /* UNKNOWN, BINARY or ASCII */
- Byte method; /* STORED (for zip only) or DEFLATED */
- int last_flush; /* value of flush param for previous deflate call */
-
- /* used by deflate.c: */
-
- uInt w_size; /* LZ77 window size (32K by default) */
- uInt w_bits; /* log2(w_size) (8..16) */
- uInt w_mask; /* w_size - 1 */
-
- Byte *window;
- /* Sliding window. Input bytes are read into the second half of the window,
- * and move to the first half later to keep a dictionary of at least wSize
- * bytes. With this organization, matches are limited to a distance of
- * wSize-MAX_MATCH bytes, but this ensures that IO is always
- * performed with a length multiple of the block size. Also, it limits
- * the window size to 64K, which is quite useful on MSDOS.
- * To do: use the user input buffer as sliding window.
- */
-
- ulg window_size;
- /* Actual size of window: 2*wSize, except when the user input buffer
- * is directly used as sliding window.
- */
-
- Pos *prev;
- /* Link to older string with same hash index. To limit the size of this
- * array to 64K, this link is maintained only for the last 32K strings.
- * An index in this array is thus a window index modulo 32K.
- */
-
- Pos *head; /* Heads of the hash chains or NIL. */
-
- uInt ins_h; /* hash index of string to be inserted */
- uInt hash_size; /* number of elements in hash table */
- uInt hash_bits; /* log2(hash_size) */
- uInt hash_mask; /* hash_size-1 */
-
- uInt hash_shift;
- /* Number of bits by which ins_h must be shifted at each input
- * step. It must be such that after MIN_MATCH steps, the oldest
- * byte no longer takes part in the hash key, that is:
- * hash_shift * MIN_MATCH >= hash_bits
- */
-
- long block_start;
- /* Window position at the beginning of the current output block. Gets
- * negative when the window is moved backwards.
- */
-
- uInt match_length; /* length of best match */
- IPos prev_match; /* previous match */
- int match_available; /* set if previous match exists */
- uInt strstart; /* start of string to insert */
- uInt match_start; /* start of matching string */
- uInt lookahead; /* number of valid bytes ahead in window */
-
- uInt prev_length;
- /* Length of the best match at previous step. Matches not greater than this
- * are discarded. This is used in the lazy match evaluation.
- */
-
- uInt max_chain_length;
- /* To speed up deflation, hash chains are never searched beyond this
- * length. A higher limit improves compression ratio but degrades the
- * speed.
- */
-
- uInt max_lazy_match;
- /* Attempt to find a better match only when the current match is strictly
- * smaller than this value. This mechanism is used only for compression
- * levels >= 4.
- */
-# define max_insert_length max_lazy_match
- /* Insert new strings in the hash table only if the match length is not
- * greater than this length. This saves time but degrades compression.
- * max_insert_length is used only for compression levels <= 3.
- */
-
- int level; /* compression level (1..9) */
- int strategy; /* favor or force Huffman coding*/
-
- uInt good_match;
- /* Use a faster search when the previous match is longer than this */
-
- int nice_match; /* Stop searching when current match exceeds this */
-
- /* used by trees.c: */
- /* Didn't use ct_data typedef below to suppress compiler warning */
- struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
- struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
- struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
-
- struct tree_desc_s l_desc; /* desc. for literal tree */
- struct tree_desc_s d_desc; /* desc. for distance tree */
- struct tree_desc_s bl_desc; /* desc. for bit length tree */
-
- ush bl_count[MAX_BITS+1];
- /* number of codes at each bit length for an optimal tree */
-
- int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
- int heap_len; /* number of elements in the heap */
- int heap_max; /* element of largest frequency */
- /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
- * The same heap array is used to build all trees.
- */
-
- uch depth[2*L_CODES+1];
- /* Depth of each subtree used as tie breaker for trees of equal frequency
- */
-
- uch *l_buf; /* buffer for literals or lengths */
-
- uInt lit_bufsize;
- /* Size of match buffer for literals/lengths. There are 4 reasons for
- * limiting lit_bufsize to 64K:
- * - frequencies can be kept in 16 bit counters
- * - if compression is not successful for the first block, all input
- * data is still in the window so we can still emit a stored block even
- * when input comes from standard input. (This can also be done for
- * all blocks if lit_bufsize is not greater than 32K.)
- * - if compression is not successful for a file smaller than 64K, we can
- * even emit a stored file instead of a stored block (saving 5 bytes).
- * This is applicable only for zip (not gzip or zlib).
- * - creating new Huffman trees less frequently may not provide fast
- * adaptation to changes in the input data statistics. (Take for
- * example a binary file with poorly compressible code followed by
- * a highly compressible string table.) Smaller buffer sizes give
- * fast adaptation but have of course the overhead of transmitting
- * trees more frequently.
- * - I can't count above 4
- */
-
- uInt last_lit; /* running index in l_buf */
-
- ush *d_buf;
- /* Buffer for distances. To simplify the code, d_buf and l_buf have
- * the same number of elements. To use different lengths, an extra flag
- * array would be necessary.
- */
-
- ulg opt_len; /* bit length of current block with optimal trees */
- ulg static_len; /* bit length of current block with static trees */
- ulg compressed_len; /* total bit length of compressed file */
- uInt matches; /* number of string matches in current block */
- int last_eob_len; /* bit length of EOB code for last block */
-
-#ifdef DEBUG_ZLIB
- ulg bits_sent; /* bit length of the compressed data */
-#endif
-
- ush bi_buf;
- /* Output buffer. bits are inserted starting at the bottom (least
- * significant bits).
- */
- int bi_valid;
- /* Number of valid bits in bi_buf. All bits above the last valid bit
- * are always zero.
- */
-
-} deflate_state;
-
-typedef struct deflate_workspace {
- /* State memory for the deflator */
- deflate_state deflate_memory;
- Byte *window_memory;
- Pos *prev_memory;
- Pos *head_memory;
- char *overlay_memory;
-} deflate_workspace;
-
-#define zlib_deflate_window_memsize(windowBits) \
- (2 * (1 << (windowBits)) * sizeof(Byte))
-#define zlib_deflate_prev_memsize(windowBits) \
- ((1 << (windowBits)) * sizeof(Pos))
-#define zlib_deflate_head_memsize(memLevel) \
- ((1 << ((memLevel)+7)) * sizeof(Pos))
-#define zlib_deflate_overlay_memsize(memLevel) \
- ((1 << ((memLevel)+6)) * (sizeof(ush)+2))
-
-/* Output a byte on the stream.
- * IN assertion: there is enough room in pending_buf.
- */
-#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
-
-
-#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-/* Minimum amount of lookahead, except at the end of the input file.
- * See deflate.c for comments about the MIN_MATCH+1.
- */
-
-#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
-/* In order to simplify the code, particularly on 16 bit machines, match
- * distances are limited to MAX_DIST instead of WSIZE.
- */
-
- /* in trees.c */
-void zlib_tr_init (deflate_state *s);
-int zlib_tr_tally (deflate_state *s, unsigned dist, unsigned lc);
-ulg zlib_tr_flush_block (deflate_state *s, char *buf, ulg stored_len,
- int eof);
-void zlib_tr_align (deflate_state *s);
-void zlib_tr_stored_block (deflate_state *s, char *buf, ulg stored_len,
- int eof);
-void zlib_tr_stored_type_only (deflate_state *);
-
-
-/* ===========================================================================
- * Output a short LSB first on the stream.
- * IN assertion: there is enough room in pendingBuf.
- */
-#define put_short(s, w) { \
- put_byte(s, (uch)((w) & 0xff)); \
- put_byte(s, (uch)((ush)(w) >> 8)); \
-}
-
-/* ===========================================================================
- * Flush the bit buffer, keeping at most 7 bits in it.
- */
-static inline void bi_flush(deflate_state *s)
-{
- if (s->bi_valid == 16) {
- put_short(s, s->bi_buf);
- s->bi_buf = 0;
- s->bi_valid = 0;
- } else if (s->bi_valid >= 8) {
- put_byte(s, (Byte)s->bi_buf);
- s->bi_buf >>= 8;
- s->bi_valid -= 8;
- }
-}
-
-/* ===========================================================================
- * Flush the bit buffer and align the output on a byte boundary
- */
-static inline void bi_windup(deflate_state *s)
-{
- if (s->bi_valid > 8) {
- put_short(s, s->bi_buf);
- } else if (s->bi_valid > 0) {
- put_byte(s, (Byte)s->bi_buf);
- }
- s->bi_buf = 0;
- s->bi_valid = 0;
-#ifdef DEBUG_ZLIB
- s->bits_sent = (s->bits_sent+7) & ~7;
-#endif
-}
-
+++ /dev/null
-/* inffast.c -- fast decoding
- * Copyright (C) 1995-2004 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include <linux/zutil.h>
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-
-#ifndef ASMINF
-
-/* Allow machine dependent optimization for post-increment or pre-increment.
- Based on testing to date,
- Pre-increment preferred for:
- - PowerPC G3 (Adler)
- - MIPS R5000 (Randers-Pehrson)
- Post-increment preferred for:
- - none
- No measurable difference:
- - Pentium III (Anderson)
- - M68060 (Nikl)
- */
-union uu {
- unsigned short us;
- unsigned char b[2];
-};
-
-/* Endian independed version */
-static inline unsigned short
-get_unaligned16(const unsigned short *p)
-{
- union uu mm;
- unsigned char *b = (unsigned char *)p;
-
- mm.b[0] = b[0];
- mm.b[1] = b[1];
- return mm.us;
-}
-
-#ifdef POSTINC
-# define OFF 0
-# define PUP(a) *(a)++
-# define UP_UNALIGNED(a) get_unaligned16((a)++)
-#else
-# define OFF 1
-# define PUP(a) *++(a)
-# define UP_UNALIGNED(a) get_unaligned16(++(a))
-#endif
-
-/*
- Decode literal, length, and distance codes and write out the resulting
- literal and match bytes until either not enough input or output is
- available, an end-of-block is encountered, or a data error is encountered.
- When large enough input and output buffers are supplied to inflate(), for
- example, a 16K input buffer and a 64K output buffer, more than 95% of the
- inflate execution time is spent in this routine.
-
- Entry assumptions:
-
- state->mode == LEN
- strm->avail_in >= 6
- strm->avail_out >= 258
- start >= strm->avail_out
- state->bits < 8
-
- On return, state->mode is one of:
-
- LEN -- ran out of enough output space or enough available input
- TYPE -- reached end of block code, inflate() to interpret next block
- BAD -- error in block data
-
- Notes:
-
- - The maximum input bits used by a length/distance pair is 15 bits for the
- length code, 5 bits for the length extra, 15 bits for the distance code,
- and 13 bits for the distance extra. This totals 48 bits, or six bytes.
- Therefore if strm->avail_in >= 6, then there is enough input to avoid
- checking for available input while decoding.
-
- - The maximum bytes that a single length/distance pair can output is 258
- bytes, which is the maximum length that can be coded. inflate_fast()
- requires strm->avail_out >= 258 for each loop to avoid checking for
- output space.
-
- - @start: inflate()'s starting value for strm->avail_out
- */
-void inflate_fast(z_streamp strm, unsigned start)
-{
- struct inflate_state *state;
- const unsigned char *in; /* local strm->next_in */
- const unsigned char *last; /* while in < last, enough input available */
- unsigned char *out; /* local strm->next_out */
- unsigned char *beg; /* inflate()'s initial strm->next_out */
- unsigned char *end; /* while out < end, enough space available */
-#ifdef INFLATE_STRICT
- unsigned dmax; /* maximum distance from zlib header */
-#endif
- unsigned wsize; /* window size or zero if not using window */
- unsigned whave; /* valid bytes in the window */
- unsigned write; /* window write index */
- unsigned char *window; /* allocated sliding window, if wsize != 0 */
- unsigned long hold; /* local strm->hold */
- unsigned bits; /* local strm->bits */
- code const *lcode; /* local strm->lencode */
- code const *dcode; /* local strm->distcode */
- unsigned lmask; /* mask for first level of length codes */
- unsigned dmask; /* mask for first level of distance codes */
- code this; /* retrieved table entry */
- unsigned op; /* code bits, operation, extra bits, or */
- /* window position, window bytes to copy */
- unsigned len; /* match length, unused bytes */
- unsigned dist; /* match distance */
- unsigned char *from; /* where to copy match from */
-
- /* copy state to local variables */
- state = (struct inflate_state *)strm->state;
- in = strm->next_in - OFF;
- last = in + (strm->avail_in - 5);
- out = strm->next_out - OFF;
- beg = out - (start - strm->avail_out);
- end = out + (strm->avail_out - 257);
-#ifdef INFLATE_STRICT
- dmax = state->dmax;
-#endif
- wsize = state->wsize;
- whave = state->whave;
- write = state->write;
- window = state->window;
- hold = state->hold;
- bits = state->bits;
- lcode = state->lencode;
- dcode = state->distcode;
- lmask = (1U << state->lenbits) - 1;
- dmask = (1U << state->distbits) - 1;
-
- /* decode literals and length/distances until end-of-block or not enough
- input data or output space */
- do {
- if (bits < 15) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- this = lcode[hold & lmask];
- dolen:
- op = (unsigned)(this.bits);
- hold >>= op;
- bits -= op;
- op = (unsigned)(this.op);
- if (op == 0) { /* literal */
- PUP(out) = (unsigned char)(this.val);
- }
- else if (op & 16) { /* length base */
- len = (unsigned)(this.val);
- op &= 15; /* number of extra bits */
- if (op) {
- if (bits < op) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- len += (unsigned)hold & ((1U << op) - 1);
- hold >>= op;
- bits -= op;
- }
- if (bits < 15) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- this = dcode[hold & dmask];
- dodist:
- op = (unsigned)(this.bits);
- hold >>= op;
- bits -= op;
- op = (unsigned)(this.op);
- if (op & 16) { /* distance base */
- dist = (unsigned)(this.val);
- op &= 15; /* number of extra bits */
- if (bits < op) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- if (bits < op) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- }
- dist += (unsigned)hold & ((1U << op) - 1);
-#ifdef INFLATE_STRICT
- if (dist > dmax) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
-#endif
- hold >>= op;
- bits -= op;
- op = (unsigned)(out - beg); /* max distance in output */
- if (dist > op) { /* see if copy from window */
- op = dist - op; /* distance back in window */
- if (op > whave) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
- from = window - OFF;
- if (write == 0) { /* very common case */
- from += wsize - op;
- if (op < len) { /* some from window */
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
- else if (write < op) { /* wrap around window */
- from += wsize + write - op;
- op -= write;
- if (op < len) { /* some from end of window */
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = window - OFF;
- if (write < len) { /* some from start of window */
- op = write;
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
- }
- else { /* contiguous in window */
- from += write - op;
- if (op < len) { /* some from window */
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
- while (len > 2) {
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- len -= 3;
- }
- if (len) {
- PUP(out) = PUP(from);
- if (len > 1)
- PUP(out) = PUP(from);
- }
- }
- else {
- unsigned short *sout;
- unsigned long loops;
-
- from = out - dist; /* copy direct from output */
- /* minimum length is three */
- /* Align out addr */
- if (!((long)(out - 1 + OFF) & 1)) {
- PUP(out) = PUP(from);
- len--;
- }
- sout = (unsigned short *)(out - OFF);
- if (dist > 2) {
- unsigned short *sfrom;
-
- sfrom = (unsigned short *)(from - OFF);
- loops = len >> 1;
- do
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
- PUP(sout) = PUP(sfrom);
-#else
- PUP(sout) = UP_UNALIGNED(sfrom);
-#endif
- while (--loops);
- out = (unsigned char *)sout + OFF;
- from = (unsigned char *)sfrom + OFF;
- } else { /* dist == 1 or dist == 2 */
- unsigned short pat16;
-
- pat16 = *(sout-1+OFF);
- if (dist == 1) {
- union uu mm;
- /* copy one char pattern to both bytes */
- mm.us = pat16;
- mm.b[0] = mm.b[1];
- pat16 = mm.us;
- }
- loops = len >> 1;
- do
- PUP(sout) = pat16;
- while (--loops);
- out = (unsigned char *)sout + OFF;
- }
- if (len & 1)
- PUP(out) = PUP(from);
- }
- }
- else if ((op & 64) == 0) { /* 2nd level distance code */
- this = dcode[this.val + (hold & ((1U << op) - 1))];
- goto dodist;
- }
- else {
- strm->msg = (char *)"invalid distance code";
- state->mode = BAD;
- break;
- }
- }
- else if ((op & 64) == 0) { /* 2nd level length code */
- this = lcode[this.val + (hold & ((1U << op) - 1))];
- goto dolen;
- }
- else if (op & 32) { /* end-of-block */
- state->mode = TYPE;
- break;
- }
- else {
- strm->msg = (char *)"invalid literal/length code";
- state->mode = BAD;
- break;
- }
- } while (in < last && out < end);
-
- /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
- len = bits >> 3;
- in -= len;
- bits -= len << 3;
- hold &= (1U << bits) - 1;
-
- /* update state and return */
- strm->next_in = in + OFF;
- strm->next_out = out + OFF;
- strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
- strm->avail_out = (unsigned)(out < end ?
- 257 + (end - out) : 257 - (out - end));
- state->hold = hold;
- state->bits = bits;
- return;
-}
-
-/*
- inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
- - Using bit fields for code structure
- - Different op definition to avoid & for extra bits (do & for table bits)
- - Three separate decoding do-loops for direct, window, and write == 0
- - Special case for distance > 1 copies to do overlapped load and store copy
- - Explicit branch predictions (based on measured branch probabilities)
- - Deferring match copy and interspersed it with decoding subsequent codes
- - Swapping literal/length else
- - Swapping window/direct else
- - Larger unrolled copy loops (three is about right)
- - Moving len -= 3 statement into middle of loop
- */
-
-#endif /* !ASMINF */
+++ /dev/null
-/* inffast.h -- header to use inffast.c
- * Copyright (C) 1995-2003 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-void inflate_fast (z_streamp strm, unsigned start);
+++ /dev/null
- /* inffixed.h -- table for decoding fixed codes
- * Generated automatically by makefixed().
- */
-
- /* WARNING: this file should *not* be used by applications. It
- is part of the implementation of the compression library and
- is subject to change. Applications should only use zlib.h.
- */
-
- static const code lenfix[512] = {
- {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
- {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
- {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
- {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
- {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
- {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
- {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
- {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
- {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
- {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
- {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
- {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
- {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
- {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
- {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
- {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
- {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
- {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
- {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
- {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
- {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
- {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
- {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
- {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
- {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
- {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
- {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
- {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
- {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
- {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
- {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
- {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
- {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
- {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
- {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
- {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
- {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
- {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
- {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
- {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
- {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
- {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
- {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
- {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
- {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
- {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
- {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
- {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
- {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
- {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
- {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
- {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
- {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
- {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
- {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
- {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
- {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
- {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
- {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
- {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
- {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
- {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
- {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
- {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
- {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
- {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
- {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
- {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
- {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
- {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
- {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
- {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
- {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
- {0,9,255}
- };
-
- static const code distfix[32] = {
- {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
- {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
- {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
- {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
- {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
- {22,5,193},{64,5,0}
- };
+++ /dev/null
-/* inflate.c -- zlib decompression
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- * Based on zlib 1.2.3 but modified for the Linux Kernel by
- * Richard Purdie <richard@openedhand.com>
- *
- * Changes mainly for static instead of dynamic memory allocation
- *
- */
-
-#include <linux/zutil.h>
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-#include "infutil.h"
-
-int zlib_inflate_workspacesize(void)
-{
- return sizeof(struct inflate_workspace);
-}
-
-int zlib_inflateReset(z_streamp strm)
-{
- struct inflate_state *state;
-
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
- strm->total_in = strm->total_out = state->total = 0;
- strm->msg = NULL;
- strm->adler = 1; /* to support ill-conceived Java test suite */
- state->mode = HEAD;
- state->last = 0;
- state->havedict = 0;
- state->dmax = 32768U;
- state->hold = 0;
- state->bits = 0;
- state->lencode = state->distcode = state->next = state->codes;
-
- /* Initialise Window */
- state->wsize = 1U << state->wbits;
- state->write = 0;
- state->whave = 0;
-
- return Z_OK;
-}
-
-int zlib_inflateInit2(z_streamp strm, int windowBits)
-{
- struct inflate_state *state;
-
- if (strm == NULL) return Z_STREAM_ERROR;
- strm->msg = NULL; /* in case we return an error */
-
- state = &WS(strm)->inflate_state;
- strm->state = (struct internal_state *)state;
-
- if (windowBits < 0) {
- state->wrap = 0;
- windowBits = -windowBits;
- }
- else {
- state->wrap = (windowBits >> 4) + 1;
- }
- if (windowBits < 8 || windowBits > 15) {
- return Z_STREAM_ERROR;
- }
- state->wbits = (unsigned)windowBits;
- state->window = &WS(strm)->working_window[0];
-
- return zlib_inflateReset(strm);
-}
-
-/*
- Return state with length and distance decoding tables and index sizes set to
- fixed code decoding. This returns fixed tables from inffixed.h.
- */
-static void zlib_fixedtables(struct inflate_state *state)
-{
-# include "inffixed.h"
- state->lencode = lenfix;
- state->lenbits = 9;
- state->distcode = distfix;
- state->distbits = 5;
-}
-
-
-/*
- Update the window with the last wsize (normally 32K) bytes written before
- returning. This is only called when a window is already in use, or when
- output has been written during this inflate call, but the end of the deflate
- stream has not been reached yet. It is also called to window dictionary data
- when a dictionary is loaded.
-
- Providing output buffers larger than 32K to inflate() should provide a speed
- advantage, since only the last 32K of output is copied to the sliding window
- upon return from inflate(), and since all distances after the first 32K of
- output will fall in the output data, making match copies simpler and faster.
- The advantage may be dependent on the size of the processor's data caches.
- */
-static void zlib_updatewindow(z_streamp strm, unsigned out)
-{
- struct inflate_state *state;
- unsigned copy, dist;
-
- state = (struct inflate_state *)strm->state;
-
- /* copy state->wsize or less output bytes into the circular window */
- copy = out - strm->avail_out;
- if (copy >= state->wsize) {
- memcpy(state->window, strm->next_out - state->wsize, state->wsize);
- state->write = 0;
- state->whave = state->wsize;
- }
- else {
- dist = state->wsize - state->write;
- if (dist > copy) dist = copy;
- memcpy(state->window + state->write, strm->next_out - copy, dist);
- copy -= dist;
- if (copy) {
- memcpy(state->window, strm->next_out - copy, copy);
- state->write = copy;
- state->whave = state->wsize;
- }
- else {
- state->write += dist;
- if (state->write == state->wsize) state->write = 0;
- if (state->whave < state->wsize) state->whave += dist;
- }
- }
-}
-
-
-/*
- * At the end of a Deflate-compressed PPP packet, we expect to have seen
- * a `stored' block type value but not the (zero) length bytes.
- */
-/*
- Returns true if inflate is currently at the end of a block generated by
- Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
- implementation to provide an additional safety check. PPP uses
- Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
- block. When decompressing, PPP checks that at the end of input packet,
- inflate is waiting for these length bytes.
- */
-static int zlib_inflateSyncPacket(z_streamp strm)
-{
- struct inflate_state *state;
-
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
-
- if (state->mode == STORED && state->bits == 0) {
- state->mode = TYPE;
- return Z_OK;
- }
- return Z_DATA_ERROR;
-}
-
-/* Macros for inflate(): */
-
-/* check function to use adler32() for zlib or crc32() for gzip */
-#define UPDATE(check, buf, len) zlib_adler32(check, buf, len)
-
-/* Load registers with state in inflate() for speed */
-#define LOAD() \
- do { \
- put = strm->next_out; \
- left = strm->avail_out; \
- next = strm->next_in; \
- have = strm->avail_in; \
- hold = state->hold; \
- bits = state->bits; \
- } while (0)
-
-/* Restore state from registers in inflate() */
-#define RESTORE() \
- do { \
- strm->next_out = put; \
- strm->avail_out = left; \
- strm->next_in = next; \
- strm->avail_in = have; \
- state->hold = hold; \
- state->bits = bits; \
- } while (0)
-
-/* Clear the input bit accumulator */
-#define INITBITS() \
- do { \
- hold = 0; \
- bits = 0; \
- } while (0)
-
-/* Get a byte of input into the bit accumulator, or return from inflate()
- if there is no input available. */
-#define PULLBYTE() \
- do { \
- if (have == 0) goto inf_leave; \
- have--; \
- hold += (unsigned long)(*next++) << bits; \
- bits += 8; \
- } while (0)
-
-/* Assure that there are at least n bits in the bit accumulator. If there is
- not enough available input to do that, then return from inflate(). */
-#define NEEDBITS(n) \
- do { \
- while (bits < (unsigned)(n)) \
- PULLBYTE(); \
- } while (0)
-
-/* Return the low n bits of the bit accumulator (n < 16) */
-#define BITS(n) \
- ((unsigned)hold & ((1U << (n)) - 1))
-
-/* Remove n bits from the bit accumulator */
-#define DROPBITS(n) \
- do { \
- hold >>= (n); \
- bits -= (unsigned)(n); \
- } while (0)
-
-/* Remove zero to seven bits as needed to go to a byte boundary */
-#define BYTEBITS() \
- do { \
- hold >>= bits & 7; \
- bits -= bits & 7; \
- } while (0)
-
-/* Reverse the bytes in a 32-bit value */
-#define REVERSE(q) \
- ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
- (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
-
-/*
- inflate() uses a state machine to process as much input data and generate as
- much output data as possible before returning. The state machine is
- structured roughly as follows:
-
- for (;;) switch (state) {
- ...
- case STATEn:
- if (not enough input data or output space to make progress)
- return;
- ... make progress ...
- state = STATEm;
- break;
- ...
- }
-
- so when inflate() is called again, the same case is attempted again, and
- if the appropriate resources are provided, the machine proceeds to the
- next state. The NEEDBITS() macro is usually the way the state evaluates
- whether it can proceed or should return. NEEDBITS() does the return if
- the requested bits are not available. The typical use of the BITS macros
- is:
-
- NEEDBITS(n);
- ... do something with BITS(n) ...
- DROPBITS(n);
-
- where NEEDBITS(n) either returns from inflate() if there isn't enough
- input left to load n bits into the accumulator, or it continues. BITS(n)
- gives the low n bits in the accumulator. When done, DROPBITS(n) drops
- the low n bits off the accumulator. INITBITS() clears the accumulator
- and sets the number of available bits to zero. BYTEBITS() discards just
- enough bits to put the accumulator on a byte boundary. After BYTEBITS()
- and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
-
- NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
- if there is no input available. The decoding of variable length codes uses
- PULLBYTE() directly in order to pull just enough bytes to decode the next
- code, and no more.
-
- Some states loop until they get enough input, making sure that enough
- state information is maintained to continue the loop where it left off
- if NEEDBITS() returns in the loop. For example, want, need, and keep
- would all have to actually be part of the saved state in case NEEDBITS()
- returns:
-
- case STATEw:
- while (want < need) {
- NEEDBITS(n);
- keep[want++] = BITS(n);
- DROPBITS(n);
- }
- state = STATEx;
- case STATEx:
-
- As shown above, if the next state is also the next case, then the break
- is omitted.
-
- A state may also return if there is not enough output space available to
- complete that state. Those states are copying stored data, writing a
- literal byte, and copying a matching string.
-
- When returning, a "goto inf_leave" is used to update the total counters,
- update the check value, and determine whether any progress has been made
- during that inflate() call in order to return the proper return code.
- Progress is defined as a change in either strm->avail_in or strm->avail_out.
- When there is a window, goto inf_leave will update the window with the last
- output written. If a goto inf_leave occurs in the middle of decompression
- and there is no window currently, goto inf_leave will create one and copy
- output to the window for the next call of inflate().
-
- In this implementation, the flush parameter of inflate() only affects the
- return code (per zlib.h). inflate() always writes as much as possible to
- strm->next_out, given the space available and the provided input--the effect
- documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers
- the allocation of and copying into a sliding window until necessary, which
- provides the effect documented in zlib.h for Z_FINISH when the entire input
- stream available. So the only thing the flush parameter actually does is:
- when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it
- will return Z_BUF_ERROR if it has not reached the end of the stream.
- */
-
-int zlib_inflate(z_streamp strm, int flush)
-{
- struct inflate_state *state;
- const unsigned char *next; /* next input */
- unsigned char *put; /* next output */
- unsigned have, left; /* available input and output */
- unsigned long hold; /* bit buffer */
- unsigned bits; /* bits in bit buffer */
- unsigned in, out; /* save starting available input and output */
- unsigned copy; /* number of stored or match bytes to copy */
- unsigned char *from; /* where to copy match bytes from */
- code this; /* current decoding table entry */
- code last; /* parent table entry */
- unsigned len; /* length to copy for repeats, bits to drop */
- int ret; /* return code */
- static const unsigned short order[19] = /* permutation of code lengths */
- {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
- /* Do not check for strm->next_out == NULL here as ppc zImage
- inflates to strm->next_out = 0 */
-
- if (strm == NULL || strm->state == NULL ||
- (strm->next_in == NULL && strm->avail_in != 0))
- return Z_STREAM_ERROR;
-
- state = (struct inflate_state *)strm->state;
-
- if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
- LOAD();
- in = have;
- out = left;
- ret = Z_OK;
- for (;;)
- switch (state->mode) {
- case HEAD:
- if (state->wrap == 0) {
- state->mode = TYPEDO;
- break;
- }
- NEEDBITS(16);
- if (
- ((BITS(8) << 8) + (hold >> 8)) % 31) {
- strm->msg = (char *)"incorrect header check";
- state->mode = BAD;
- break;
- }
- if (BITS(4) != Z_DEFLATED) {
- strm->msg = (char *)"unknown compression method";
- state->mode = BAD;
- break;
- }
- DROPBITS(4);
- len = BITS(4) + 8;
- if (len > state->wbits) {
- strm->msg = (char *)"invalid window size";
- state->mode = BAD;
- break;
- }
- state->dmax = 1U << len;
- strm->adler = state->check = zlib_adler32(0L, NULL, 0);
- state->mode = hold & 0x200 ? DICTID : TYPE;
- INITBITS();
- break;
- case DICTID:
- NEEDBITS(32);
- strm->adler = state->check = REVERSE(hold);
- INITBITS();
- state->mode = DICT;
- case DICT:
- if (state->havedict == 0) {
- RESTORE();
- return Z_NEED_DICT;
- }
- strm->adler = state->check = zlib_adler32(0L, NULL, 0);
- state->mode = TYPE;
- case TYPE:
- if (flush == Z_BLOCK) goto inf_leave;
- case TYPEDO:
- if (state->last) {
- BYTEBITS();
- state->mode = CHECK;
- break;
- }
- NEEDBITS(3);
- state->last = BITS(1);
- DROPBITS(1);
- switch (BITS(2)) {
- case 0: /* stored block */
- state->mode = STORED;
- break;
- case 1: /* fixed block */
- zlib_fixedtables(state);
- state->mode = LEN; /* decode codes */
- break;
- case 2: /* dynamic block */
- state->mode = TABLE;
- break;
- case 3:
- strm->msg = (char *)"invalid block type";
- state->mode = BAD;
- }
- DROPBITS(2);
- break;
- case STORED:
- BYTEBITS(); /* go to byte boundary */
- NEEDBITS(32);
- if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
- strm->msg = (char *)"invalid stored block lengths";
- state->mode = BAD;
- break;
- }
- state->length = (unsigned)hold & 0xffff;
- INITBITS();
- state->mode = COPY;
- case COPY:
- copy = state->length;
- if (copy) {
- if (copy > have) copy = have;
- if (copy > left) copy = left;
- if (copy == 0) goto inf_leave;
- memcpy(put, next, copy);
- have -= copy;
- next += copy;
- left -= copy;
- put += copy;
- state->length -= copy;
- break;
- }
- state->mode = TYPE;
- break;
- case TABLE:
- NEEDBITS(14);
- state->nlen = BITS(5) + 257;
- DROPBITS(5);
- state->ndist = BITS(5) + 1;
- DROPBITS(5);
- state->ncode = BITS(4) + 4;
- DROPBITS(4);
-#ifndef PKZIP_BUG_WORKAROUND
- if (state->nlen > 286 || state->ndist > 30) {
- strm->msg = (char *)"too many length or distance symbols";
- state->mode = BAD;
- break;
- }
-#endif
- state->have = 0;
- state->mode = LENLENS;
- case LENLENS:
- while (state->have < state->ncode) {
- NEEDBITS(3);
- state->lens[order[state->have++]] = (unsigned short)BITS(3);
- DROPBITS(3);
- }
- while (state->have < 19)
- state->lens[order[state->have++]] = 0;
- state->next = state->codes;
- state->lencode = (code const *)(state->next);
- state->lenbits = 7;
- ret = zlib_inflate_table(CODES, state->lens, 19, &(state->next),
- &(state->lenbits), state->work);
- if (ret) {
- strm->msg = (char *)"invalid code lengths set";
- state->mode = BAD;
- break;
- }
- state->have = 0;
- state->mode = CODELENS;
- case CODELENS:
- while (state->have < state->nlen + state->ndist) {
- for (;;) {
- this = state->lencode[BITS(state->lenbits)];
- if ((unsigned)(this.bits) <= bits) break;
- PULLBYTE();
- }
- if (this.val < 16) {
- NEEDBITS(this.bits);
- DROPBITS(this.bits);
- state->lens[state->have++] = this.val;
- }
- else {
- if (this.val == 16) {
- NEEDBITS(this.bits + 2);
- DROPBITS(this.bits);
- if (state->have == 0) {
- strm->msg = (char *)"invalid bit length repeat";
- state->mode = BAD;
- break;
- }
- len = state->lens[state->have - 1];
- copy = 3 + BITS(2);
- DROPBITS(2);
- }
- else if (this.val == 17) {
- NEEDBITS(this.bits + 3);
- DROPBITS(this.bits);
- len = 0;
- copy = 3 + BITS(3);
- DROPBITS(3);
- }
- else {
- NEEDBITS(this.bits + 7);
- DROPBITS(this.bits);
- len = 0;
- copy = 11 + BITS(7);
- DROPBITS(7);
- }
- if (state->have + copy > state->nlen + state->ndist) {
- strm->msg = (char *)"invalid bit length repeat";
- state->mode = BAD;
- break;
- }
- while (copy--)
- state->lens[state->have++] = (unsigned short)len;
- }
- }
-
- /* handle error breaks in while */
- if (state->mode == BAD) break;
-
- /* build code tables */
- state->next = state->codes;
- state->lencode = (code const *)(state->next);
- state->lenbits = 9;
- ret = zlib_inflate_table(LENS, state->lens, state->nlen, &(state->next),
- &(state->lenbits), state->work);
- if (ret) {
- strm->msg = (char *)"invalid literal/lengths set";
- state->mode = BAD;
- break;
- }
- state->distcode = (code const *)(state->next);
- state->distbits = 6;
- ret = zlib_inflate_table(DISTS, state->lens + state->nlen, state->ndist,
- &(state->next), &(state->distbits), state->work);
- if (ret) {
- strm->msg = (char *)"invalid distances set";
- state->mode = BAD;
- break;
- }
- state->mode = LEN;
- case LEN:
- if (have >= 6 && left >= 258) {
- RESTORE();
- inflate_fast(strm, out);
- LOAD();
- break;
- }
- for (;;) {
- this = state->lencode[BITS(state->lenbits)];
- if ((unsigned)(this.bits) <= bits) break;
- PULLBYTE();
- }
- if (this.op && (this.op & 0xf0) == 0) {
- last = this;
- for (;;) {
- this = state->lencode[last.val +
- (BITS(last.bits + last.op) >> last.bits)];
- if ((unsigned)(last.bits + this.bits) <= bits) break;
- PULLBYTE();
- }
- DROPBITS(last.bits);
- }
- DROPBITS(this.bits);
- state->length = (unsigned)this.val;
- if ((int)(this.op) == 0) {
- state->mode = LIT;
- break;
- }
- if (this.op & 32) {
- state->mode = TYPE;
- break;
- }
- if (this.op & 64) {
- strm->msg = (char *)"invalid literal/length code";
- state->mode = BAD;
- break;
- }
- state->extra = (unsigned)(this.op) & 15;
- state->mode = LENEXT;
- case LENEXT:
- if (state->extra) {
- NEEDBITS(state->extra);
- state->length += BITS(state->extra);
- DROPBITS(state->extra);
- }
- state->mode = DIST;
- case DIST:
- for (;;) {
- this = state->distcode[BITS(state->distbits)];
- if ((unsigned)(this.bits) <= bits) break;
- PULLBYTE();
- }
- if ((this.op & 0xf0) == 0) {
- last = this;
- for (;;) {
- this = state->distcode[last.val +
- (BITS(last.bits + last.op) >> last.bits)];
- if ((unsigned)(last.bits + this.bits) <= bits) break;
- PULLBYTE();
- }
- DROPBITS(last.bits);
- }
- DROPBITS(this.bits);
- if (this.op & 64) {
- strm->msg = (char *)"invalid distance code";
- state->mode = BAD;
- break;
- }
- state->offset = (unsigned)this.val;
- state->extra = (unsigned)(this.op) & 15;
- state->mode = DISTEXT;
- case DISTEXT:
- if (state->extra) {
- NEEDBITS(state->extra);
- state->offset += BITS(state->extra);
- DROPBITS(state->extra);
- }
-#ifdef INFLATE_STRICT
- if (state->offset > state->dmax) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
-#endif
- if (state->offset > state->whave + out - left) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
- state->mode = MATCH;
- case MATCH:
- if (left == 0) goto inf_leave;
- copy = out - left;
- if (state->offset > copy) { /* copy from window */
- copy = state->offset - copy;
- if (copy > state->write) {
- copy -= state->write;
- from = state->window + (state->wsize - copy);
- }
- else
- from = state->window + (state->write - copy);
- if (copy > state->length) copy = state->length;
- }
- else { /* copy from output */
- from = put - state->offset;
- copy = state->length;
- }
- if (copy > left) copy = left;
- left -= copy;
- state->length -= copy;
- do {
- *put++ = *from++;
- } while (--copy);
- if (state->length == 0) state->mode = LEN;
- break;
- case LIT:
- if (left == 0) goto inf_leave;
- *put++ = (unsigned char)(state->length);
- left--;
- state->mode = LEN;
- break;
- case CHECK:
- if (state->wrap) {
- NEEDBITS(32);
- out -= left;
- strm->total_out += out;
- state->total += out;
- if (out)
- strm->adler = state->check =
- UPDATE(state->check, put - out, out);
- out = left;
- if ((
- REVERSE(hold)) != state->check) {
- strm->msg = (char *)"incorrect data check";
- state->mode = BAD;
- break;
- }
- INITBITS();
- }
- state->mode = DONE;
- case DONE:
- ret = Z_STREAM_END;
- goto inf_leave;
- case BAD:
- ret = Z_DATA_ERROR;
- goto inf_leave;
- case MEM:
- return Z_MEM_ERROR;
- case SYNC:
- default:
- return Z_STREAM_ERROR;
- }
-
- /*
- Return from inflate(), updating the total counts and the check value.
- If there was no progress during the inflate() call, return a buffer
- error. Call zlib_updatewindow() to create and/or update the window state.
- */
- inf_leave:
- RESTORE();
- if (state->wsize || (state->mode < CHECK && out != strm->avail_out))
- zlib_updatewindow(strm, out);
-
- in -= strm->avail_in;
- out -= strm->avail_out;
- strm->total_in += in;
- strm->total_out += out;
- state->total += out;
- if (state->wrap && out)
- strm->adler = state->check =
- UPDATE(state->check, strm->next_out - out, out);
-
- strm->data_type = state->bits + (state->last ? 64 : 0) +
- (state->mode == TYPE ? 128 : 0);
-
- if (flush == Z_PACKET_FLUSH && ret == Z_OK &&
- strm->avail_out != 0 && strm->avail_in == 0)
- return zlib_inflateSyncPacket(strm);
-
- if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
- ret = Z_BUF_ERROR;
-
- return ret;
-}
-
-int zlib_inflateEnd(z_streamp strm)
-{
- if (strm == NULL || strm->state == NULL)
- return Z_STREAM_ERROR;
- return Z_OK;
-}
-
-/*
- * This subroutine adds the data at next_in/avail_in to the output history
- * without performing any output. The output buffer must be "caught up";
- * i.e. no pending output but this should always be the case. The state must
- * be waiting on the start of a block (i.e. mode == TYPE or HEAD). On exit,
- * the output will also be caught up, and the checksum will have been updated
- * if need be.
- */
-int zlib_inflateIncomp(z_stream *z)
-{
- struct inflate_state *state = (struct inflate_state *)z->state;
- Byte *saved_no = z->next_out;
- uInt saved_ao = z->avail_out;
-
- if (state->mode != TYPE && state->mode != HEAD)
- return Z_DATA_ERROR;
-
- /* Setup some variables to allow misuse of updateWindow */
- z->avail_out = 0;
- z->next_out = (unsigned char*)z->next_in + z->avail_in;
-
- zlib_updatewindow(z, z->avail_in);
-
- /* Restore saved variables */
- z->avail_out = saved_ao;
- z->next_out = saved_no;
-
- z->adler = state->check =
- UPDATE(state->check, z->next_in, z->avail_in);
-
- z->total_out += z->avail_in;
- z->total_in += z->avail_in;
- z->next_in += z->avail_in;
- state->total += z->avail_in;
- z->avail_in = 0;
-
- return Z_OK;
-}
+++ /dev/null
-#ifndef INFLATE_H
-#define INFLATE_H
-
-/* inflate.h -- internal inflate state definition
- * Copyright (C) 1995-2004 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-/* Possible inflate modes between inflate() calls */
-typedef enum {
- HEAD, /* i: waiting for magic header */
- FLAGS, /* i: waiting for method and flags (gzip) */
- TIME, /* i: waiting for modification time (gzip) */
- OS, /* i: waiting for extra flags and operating system (gzip) */
- EXLEN, /* i: waiting for extra length (gzip) */
- EXTRA, /* i: waiting for extra bytes (gzip) */
- NAME, /* i: waiting for end of file name (gzip) */
- COMMENT, /* i: waiting for end of comment (gzip) */
- HCRC, /* i: waiting for header crc (gzip) */
- DICTID, /* i: waiting for dictionary check value */
- DICT, /* waiting for inflateSetDictionary() call */
- TYPE, /* i: waiting for type bits, including last-flag bit */
- TYPEDO, /* i: same, but skip check to exit inflate on new block */
- STORED, /* i: waiting for stored size (length and complement) */
- COPY, /* i/o: waiting for input or output to copy stored block */
- TABLE, /* i: waiting for dynamic block table lengths */
- LENLENS, /* i: waiting for code length code lengths */
- CODELENS, /* i: waiting for length/lit and distance code lengths */
- LEN, /* i: waiting for length/lit code */
- LENEXT, /* i: waiting for length extra bits */
- DIST, /* i: waiting for distance code */
- DISTEXT, /* i: waiting for distance extra bits */
- MATCH, /* o: waiting for output space to copy string */
- LIT, /* o: waiting for output space to write literal */
- CHECK, /* i: waiting for 32-bit check value */
- LENGTH, /* i: waiting for 32-bit length (gzip) */
- DONE, /* finished check, done -- remain here until reset */
- BAD, /* got a data error -- remain here until reset */
- MEM, /* got an inflate() memory error -- remain here until reset */
- SYNC /* looking for synchronization bytes to restart inflate() */
-} inflate_mode;
-
-/*
- State transitions between above modes -
-
- (most modes can go to the BAD or MEM mode -- not shown for clarity)
-
- Process header:
- HEAD -> (gzip) or (zlib)
- (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME
- NAME -> COMMENT -> HCRC -> TYPE
- (zlib) -> DICTID or TYPE
- DICTID -> DICT -> TYPE
- Read deflate blocks:
- TYPE -> STORED or TABLE or LEN or CHECK
- STORED -> COPY -> TYPE
- TABLE -> LENLENS -> CODELENS -> LEN
- Read deflate codes:
- LEN -> LENEXT or LIT or TYPE
- LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
- LIT -> LEN
- Process trailer:
- CHECK -> LENGTH -> DONE
- */
-
-/* state maintained between inflate() calls. Approximately 7K bytes. */
-struct inflate_state {
- inflate_mode mode; /* current inflate mode */
- int last; /* true if processing last block */
- int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
- int havedict; /* true if dictionary provided */
- int flags; /* gzip header method and flags (0 if zlib) */
- unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */
- unsigned long check; /* protected copy of check value */
- unsigned long total; /* protected copy of output count */
- /* gz_headerp head; */ /* where to save gzip header information */
- /* sliding window */
- unsigned wbits; /* log base 2 of requested window size */
- unsigned wsize; /* window size or zero if not using window */
- unsigned whave; /* valid bytes in the window */
- unsigned write; /* window write index */
- unsigned char *window; /* allocated sliding window, if needed */
- /* bit accumulator */
- unsigned long hold; /* input bit accumulator */
- unsigned bits; /* number of bits in "in" */
- /* for string and stored block copying */
- unsigned length; /* literal or length of data to copy */
- unsigned offset; /* distance back to copy string from */
- /* for table and code decoding */
- unsigned extra; /* extra bits needed */
- /* fixed and dynamic code tables */
- code const *lencode; /* starting table for length/literal codes */
- code const *distcode; /* starting table for distance codes */
- unsigned lenbits; /* index bits for lencode */
- unsigned distbits; /* index bits for distcode */
- /* dynamic table building */
- unsigned ncode; /* number of code length code lengths */
- unsigned nlen; /* number of length code lengths */
- unsigned ndist; /* number of distance code lengths */
- unsigned have; /* number of code lengths in lens[] */
- code *next; /* next available space in codes[] */
- unsigned short lens[320]; /* temporary storage for code lengths */
- unsigned short work[288]; /* work area for code table building */
- code codes[ENOUGH]; /* space for code tables */
-};
-#endif
+++ /dev/null
-/* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include <linux/zutil.h>
-#include "inftrees.h"
-
-#define MAXBITS 15
-
-/*
- Build a set of tables to decode the provided canonical Huffman code.
- The code lengths are lens[0..codes-1]. The result starts at *table,
- whose indices are 0..2^bits-1. work is a writable array of at least
- lens shorts, which is used as a work area. type is the type of code
- to be generated, CODES, LENS, or DISTS. On return, zero is success,
- -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
- on return points to the next available entry's address. bits is the
- requested root table index bits, and on return it is the actual root
- table index bits. It will differ if the request is greater than the
- longest code or if it is less than the shortest code.
- */
-int zlib_inflate_table(codetype type, unsigned short *lens, unsigned codes,
- code **table, unsigned *bits, unsigned short *work)
-{
- unsigned len; /* a code's length in bits */
- unsigned sym; /* index of code symbols */
- unsigned min, max; /* minimum and maximum code lengths */
- unsigned root; /* number of index bits for root table */
- unsigned curr; /* number of index bits for current table */
- unsigned drop; /* code bits to drop for sub-table */
- int left; /* number of prefix codes available */
- unsigned used; /* code entries in table used */
- unsigned huff; /* Huffman code */
- unsigned incr; /* for incrementing code, index */
- unsigned fill; /* index for replicating entries */
- unsigned low; /* low bits for current root entry */
- unsigned mask; /* mask for low root bits */
- code this; /* table entry for duplication */
- code *next; /* next available space in table */
- const unsigned short *base; /* base value table to use */
- const unsigned short *extra; /* extra bits table to use */
- int end; /* use base and extra for symbol > end */
- unsigned short count[MAXBITS+1]; /* number of codes of each length */
- unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
- static const unsigned short lbase[31] = { /* Length codes 257..285 base */
- 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
- 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
- static const unsigned short lext[31] = { /* Length codes 257..285 extra */
- 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
- 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
- static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
- 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
- 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
- 8193, 12289, 16385, 24577, 0, 0};
- static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
- 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
- 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
- 28, 28, 29, 29, 64, 64};
-
- /*
- Process a set of code lengths to create a canonical Huffman code. The
- code lengths are lens[0..codes-1]. Each length corresponds to the
- symbols 0..codes-1. The Huffman code is generated by first sorting the
- symbols by length from short to long, and retaining the symbol order
- for codes with equal lengths. Then the code starts with all zero bits
- for the first code of the shortest length, and the codes are integer
- increments for the same length, and zeros are appended as the length
- increases. For the deflate format, these bits are stored backwards
- from their more natural integer increment ordering, and so when the
- decoding tables are built in the large loop below, the integer codes
- are incremented backwards.
-
- This routine assumes, but does not check, that all of the entries in
- lens[] are in the range 0..MAXBITS. The caller must assure this.
- 1..MAXBITS is interpreted as that code length. zero means that that
- symbol does not occur in this code.
-
- The codes are sorted by computing a count of codes for each length,
- creating from that a table of starting indices for each length in the
- sorted table, and then entering the symbols in order in the sorted
- table. The sorted table is work[], with that space being provided by
- the caller.
-
- The length counts are used for other purposes as well, i.e. finding
- the minimum and maximum length codes, determining if there are any
- codes at all, checking for a valid set of lengths, and looking ahead
- at length counts to determine sub-table sizes when building the
- decoding tables.
- */
-
- /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
- for (len = 0; len <= MAXBITS; len++)
- count[len] = 0;
- for (sym = 0; sym < codes; sym++)
- count[lens[sym]]++;
-
- /* bound code lengths, force root to be within code lengths */
- root = *bits;
- for (max = MAXBITS; max >= 1; max--)
- if (count[max] != 0) break;
- if (root > max) root = max;
- if (max == 0) { /* no symbols to code at all */
- this.op = (unsigned char)64; /* invalid code marker */
- this.bits = (unsigned char)1;
- this.val = (unsigned short)0;
- *(*table)++ = this; /* make a table to force an error */
- *(*table)++ = this;
- *bits = 1;
- return 0; /* no symbols, but wait for decoding to report error */
- }
- for (min = 1; min <= MAXBITS; min++)
- if (count[min] != 0) break;
- if (root < min) root = min;
-
- /* check for an over-subscribed or incomplete set of lengths */
- left = 1;
- for (len = 1; len <= MAXBITS; len++) {
- left <<= 1;
- left -= count[len];
- if (left < 0) return -1; /* over-subscribed */
- }
- if (left > 0 && (type == CODES || max != 1))
- return -1; /* incomplete set */
-
- /* generate offsets into symbol table for each length for sorting */
- offs[1] = 0;
- for (len = 1; len < MAXBITS; len++)
- offs[len + 1] = offs[len] + count[len];
-
- /* sort symbols by length, by symbol order within each length */
- for (sym = 0; sym < codes; sym++)
- if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
-
- /*
- Create and fill in decoding tables. In this loop, the table being
- filled is at next and has curr index bits. The code being used is huff
- with length len. That code is converted to an index by dropping drop
- bits off of the bottom. For codes where len is less than drop + curr,
- those top drop + curr - len bits are incremented through all values to
- fill the table with replicated entries.
-
- root is the number of index bits for the root table. When len exceeds
- root, sub-tables are created pointed to by the root entry with an index
- of the low root bits of huff. This is saved in low to check for when a
- new sub-table should be started. drop is zero when the root table is
- being filled, and drop is root when sub-tables are being filled.
-
- When a new sub-table is needed, it is necessary to look ahead in the
- code lengths to determine what size sub-table is needed. The length
- counts are used for this, and so count[] is decremented as codes are
- entered in the tables.
-
- used keeps track of how many table entries have been allocated from the
- provided *table space. It is checked when a LENS table is being made
- against the space in *table, ENOUGH, minus the maximum space needed by
- the worst case distance code, MAXD. This should never happen, but the
- sufficiency of ENOUGH has not been proven exhaustively, hence the check.
- This assumes that when type == LENS, bits == 9.
-
- sym increments through all symbols, and the loop terminates when
- all codes of length max, i.e. all codes, have been processed. This
- routine permits incomplete codes, so another loop after this one fills
- in the rest of the decoding tables with invalid code markers.
- */
-
- /* set up for code type */
- switch (type) {
- case CODES:
- base = extra = work; /* dummy value--not used */
- end = 19;
- break;
- case LENS:
- base = lbase;
- base -= 257;
- extra = lext;
- extra -= 257;
- end = 256;
- break;
- default: /* DISTS */
- base = dbase;
- extra = dext;
- end = -1;
- }
-
- /* initialize state for loop */
- huff = 0; /* starting code */
- sym = 0; /* starting code symbol */
- len = min; /* starting code length */
- next = *table; /* current table to fill in */
- curr = root; /* current table index bits */
- drop = 0; /* current bits to drop from code for index */
- low = (unsigned)(-1); /* trigger new sub-table when len > root */
- used = 1U << root; /* use root table entries */
- mask = used - 1; /* mask for comparing low */
-
- /* check available table space */
- if (type == LENS && used >= ENOUGH - MAXD)
- return 1;
-
- /* process all codes and make table entries */
- for (;;) {
- /* create table entry */
- this.bits = (unsigned char)(len - drop);
- if ((int)(work[sym]) < end) {
- this.op = (unsigned char)0;
- this.val = work[sym];
- }
- else if ((int)(work[sym]) > end) {
- this.op = (unsigned char)(extra[work[sym]]);
- this.val = base[work[sym]];
- }
- else {
- this.op = (unsigned char)(32 + 64); /* end of block */
- this.val = 0;
- }
-
- /* replicate for those indices with low len bits equal to huff */
- incr = 1U << (len - drop);
- fill = 1U << curr;
- min = fill; /* save offset to next table */
- do {
- fill -= incr;
- next[(huff >> drop) + fill] = this;
- } while (fill != 0);
-
- /* backwards increment the len-bit code huff */
- incr = 1U << (len - 1);
- while (huff & incr)
- incr >>= 1;
- if (incr != 0) {
- huff &= incr - 1;
- huff += incr;
- }
- else
- huff = 0;
-
- /* go to next symbol, update count, len */
- sym++;
- if (--(count[len]) == 0) {
- if (len == max) break;
- len = lens[work[sym]];
- }
-
- /* create new sub-table if needed */
- if (len > root && (huff & mask) != low) {
- /* if first time, transition to sub-tables */
- if (drop == 0)
- drop = root;
-
- /* increment past last table */
- next += min; /* here min is 1 << curr */
-
- /* determine length of next table */
- curr = len - drop;
- left = (int)(1 << curr);
- while (curr + drop < max) {
- left -= count[curr + drop];
- if (left <= 0) break;
- curr++;
- left <<= 1;
- }
-
- /* check for enough space */
- used += 1U << curr;
- if (type == LENS && used >= ENOUGH - MAXD)
- return 1;
-
- /* point entry in root table to sub-table */
- low = huff & mask;
- (*table)[low].op = (unsigned char)curr;
- (*table)[low].bits = (unsigned char)root;
- (*table)[low].val = (unsigned short)(next - *table);
- }
- }
-
- /*
- Fill in rest of table for incomplete codes. This loop is similar to the
- loop above in incrementing huff for table indices. It is assumed that
- len is equal to curr + drop, so there is no loop needed to increment
- through high index bits. When the current sub-table is filled, the loop
- drops back to the root table to fill in any remaining entries there.
- */
- this.op = (unsigned char)64; /* invalid code marker */
- this.bits = (unsigned char)(len - drop);
- this.val = (unsigned short)0;
- while (huff != 0) {
- /* when done with sub-table, drop back to root table */
- if (drop != 0 && (huff & mask) != low) {
- drop = 0;
- len = root;
- next = *table;
- this.bits = (unsigned char)len;
- }
-
- /* put invalid code marker in table */
- next[huff >> drop] = this;
-
- /* backwards increment the len-bit code huff */
- incr = 1U << (len - 1);
- while (huff & incr)
- incr >>= 1;
- if (incr != 0) {
- huff &= incr - 1;
- huff += incr;
- }
- else
- huff = 0;
- }
-
- /* set return parameters */
- *table += used;
- *bits = root;
- return 0;
-}
+++ /dev/null
-#ifndef INFTREES_H
-#define INFTREES_H
-
-/* inftrees.h -- header to use inftrees.c
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-/* Structure for decoding tables. Each entry provides either the
- information needed to do the operation requested by the code that
- indexed that table entry, or it provides a pointer to another
- table that indexes more bits of the code. op indicates whether
- the entry is a pointer to another table, a literal, a length or
- distance, an end-of-block, or an invalid code. For a table
- pointer, the low four bits of op is the number of index bits of
- that table. For a length or distance, the low four bits of op
- is the number of extra bits to get after the code. bits is
- the number of bits in this code or part of the code to drop off
- of the bit buffer. val is the actual byte to output in the case
- of a literal, the base length or distance, or the offset from
- the current table to the next table. Each entry is four bytes. */
-typedef struct {
- unsigned char op; /* operation, extra bits, table bits */
- unsigned char bits; /* bits in this part of the code */
- unsigned short val; /* offset in table or code value */
-} code;
-
-/* op values as set by inflate_table():
- 00000000 - literal
- 0000tttt - table link, tttt != 0 is the number of table index bits
- 0001eeee - length or distance, eeee is the number of extra bits
- 01100000 - end of block
- 01000000 - invalid code
- */
-
-/* Maximum size of dynamic tree. The maximum found in a long but non-
- exhaustive search was 1444 code structures (852 for length/literals
- and 592 for distances, the latter actually the result of an
- exhaustive search). The true maximum is not known, but the value
- below is more than safe. */
-#define ENOUGH 2048
-#define MAXD 592
-
-/* Type of code to build for inftable() */
-typedef enum {
- CODES,
- LENS,
- DISTS
-} codetype;
-
-extern int zlib_inflate_table (codetype type, unsigned short *lens,
- unsigned codes, code **table,
- unsigned *bits, unsigned short *work);
-#endif
+++ /dev/null
-#include <linux/zutil.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-/* Utility function: initialize zlib, unpack binary blob, clean up zlib,
- * return len or negative error code.
- */
-int zlib_inflate_blob(void *gunzip_buf, unsigned int sz,
- const void *buf, unsigned int len)
-{
- const u8 *zbuf = buf;
- struct z_stream_s *strm;
- int rc;
-
- rc = -ENOMEM;
- strm = kmalloc(sizeof(*strm), GFP_KERNEL);
- if (strm == NULL)
- goto gunzip_nomem1;
- strm->workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
- if (strm->workspace == NULL)
- goto gunzip_nomem2;
-
- /* gzip header (1f,8b,08... 10 bytes total + possible asciz filename)
- * expected to be stripped from input
- */
- strm->next_in = zbuf;
- strm->avail_in = len;
- strm->next_out = gunzip_buf;
- strm->avail_out = sz;
-
- rc = zlib_inflateInit2(strm, -MAX_WBITS);
- if (rc == Z_OK) {
- rc = zlib_inflate(strm, Z_FINISH);
- /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */
- if (rc == Z_STREAM_END)
- rc = sz - strm->avail_out;
- else
- rc = -EINVAL;
- zlib_inflateEnd(strm);
- } else
- rc = -EINVAL;
-
- kfree(strm->workspace);
-gunzip_nomem2:
- kfree(strm);
-gunzip_nomem1:
- return rc; /* returns Z_OK (0) if successful */
-}
+++ /dev/null
-/* infutil.h -- types and macros common to blocks and codes
- * Copyright (C) 1995-1998 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-#ifndef _INFUTIL_H
-#define _INFUTIL_H
-
-#include <linux/zlib.h>
-
-/* memory allocation for inflation */
-
-struct inflate_workspace {
- struct inflate_state inflate_state;
- unsigned char working_window[1 << MAX_WBITS];
-};
-
-#define WS(z) ((struct inflate_workspace *)(z->workspace))
-
-#endif