2 * LZ4 HC - High Compression Mode of LZ4
3 * Copyright (C) 2011-2012, Yann Collet.
4 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * You can contact the author at :
30 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 * - LZ4 source repository : http://code.google.com/p/lz4/
33 * Changed for kernel use by:
34 * Chanho Min <chanho.min@lge.com>
37 #include <linux/module.h>
38 #include <linux/kernel.h>
39 #include <linux/lz4.h>
40 #include <asm/unaligned.h>
45 HTYPE hashtable[HASHTABLESIZE];
47 const u8 *nexttoupdate;
48 } __attribute__((__packed__));
50 static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
52 memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
53 memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
56 hc4->nexttoupdate = base + 1;
58 hc4->nexttoupdate = base;
64 /* Update chains up to ip (excluded) */
65 static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
67 u16 *chaintable = hc4->chaintable;
68 HTYPE *hashtable = hc4->hashtable;
70 const u8 * const base = hc4->base;
75 while (hc4->nexttoupdate < ip) {
76 const u8 *p = hc4->nexttoupdate;
77 size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
78 if (delta > MAX_DISTANCE)
80 chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
81 hashtable[HASH_VALUE(p)] = (p) - base;
86 static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
87 const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
89 u16 *const chaintable = hc4->chaintable;
90 HTYPE *const hashtable = hc4->hashtable;
93 const u8 * const base = hc4->base;
97 int nbattempts = MAX_NB_ATTEMPTS;
98 size_t repl = 0, ml = 0;
101 /* HC4 match finder */
102 lz4hc_insert(hc4, ip);
103 ref = hashtable[HASH_VALUE(ip)] + base;
105 /* potential repetition */
108 if (A32(ref) == A32(ip)) {
109 delta = (u16)(ip-ref);
110 repl = ml = common_length(ip + MINMATCH,
111 ref + MINMATCH, matchlimit) + MINMATCH;
114 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
117 while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
119 if (*(ref + ml) == *(ip + ml)) {
120 if (A32(ref) == A32(ip)) {
122 common_length(ip + MINMATCH,
123 ref + MINMATCH, matchlimit) + MINMATCH;
130 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
137 end = ip + repl - (MINMATCH-1);
139 while (ptr < end - delta) {
140 chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
144 chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
146 hashtable[HASH_VALUE(ptr)] = (ptr) - base;
149 hc4->nexttoupdate = end;
155 static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
156 const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
157 const u8 **matchpos, const u8 **startpos)
159 u16 *const chaintable = hc4->chaintable;
160 HTYPE *const hashtable = hc4->hashtable;
162 const u8 * const base = hc4->base;
167 int nbattempts = MAX_NB_ATTEMPTS;
168 int delta = (int)(ip - startlimit);
171 lz4hc_insert(hc4, ip);
172 ref = hashtable[HASH_VALUE(ip)] + base;
174 while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
177 if (*(startlimit + longest) == *(ref - delta + longest)) {
178 if (A32(ref) == A32(ip)) {
179 const u8 *reft = ref;
180 const u8 *startt = ip;
182 common_length(ip + MINMATCH,
186 while ((startt > startlimit)
187 && (reft > hc4->base)
188 && (startt[-1] == reft[-1])) {
194 if (length > longest) {
201 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
206 static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
207 int ml, const u8 *ref)
212 /* Encode Literal length */
213 length = *ip - *anchor;
215 *token = encode_length(op, length) << ML_BITS;
218 MEMCPY_ADVANCE_CHUNKED(*op, *anchor, length);
221 PUT_LE16_ADVANCE(*op, (u16)(*ip - ref));
223 *token += encode_length(op, ml - MINMATCH);
225 /* Prepare next loop */
232 static int lz4_compresshcctx(struct lz4hc_data *ctx,
237 const u8 *ip = (const u8 *)source;
238 const u8 *anchor = ip;
239 const u8 *const iend = ip + isize;
240 const u8 *const mflimit = iend - MFLIMIT;
241 const u8 *const matchlimit = (iend - LASTLITERALS);
245 int ml, ml2, ml3, ml0;
246 const u8 *ref = NULL;
247 const u8 *start2 = NULL;
248 const u8 *ref2 = NULL;
249 const u8 *start3 = NULL;
250 const u8 *ref3 = NULL;
258 while (ip < mflimit) {
259 ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
265 /* saved, in case we would skip too much */
271 ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
272 ip + 1, matchlimit, ml, &ref2, &start2);
275 /* No better match */
277 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
283 if (start2 < ip + ml0) {
291 * First Match too small : removed
293 if ((start2 - ip) < 3) {
302 * Currently we have :
304 * ip1+3 <= ip2 (usually < ip1+ml1)
306 if ((start2 - ip) < OPTIMAL_ML) {
309 if (new_ml > OPTIMAL_ML)
311 if (ip + new_ml > start2 + ml2 - MINMATCH)
312 new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
313 correction = new_ml - (int)(start2 - ip);
314 if (correction > 0) {
315 start2 += correction;
321 * Now, we have start2 = ip+new_ml,
322 * with new_ml=min(ml, OPTIMAL_ML=18)
324 if (start2 + ml2 < mflimit)
325 ml3 = lz4hc_insertandgetwidermatch(ctx,
326 start2 + ml2 - 3, start2, matchlimit,
327 ml2, &ref3, &start3);
331 /* No better match : 2 sequences to encode */
333 /* ip & ref are known; Now for ml */
335 ml = (int)(start2 - ip);
337 /* Now, encode 2 sequences */
338 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
340 lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
344 /* Not enough space for match 2 : remove it */
345 if (start3 < ip + ml + 3) {
347 * can write Seq1 immediately ==> Seq2 is removed,
348 * so Seq3 becomes Seq1
350 if (start3 >= (ip + ml)) {
351 if (start2 < ip + ml) {
353 (int)(ip + ml - start2);
354 start2 += correction;
357 if (ml2 < MINMATCH) {
364 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
382 * OK, now we have 3 ascending matches; let's write at least
383 * the first one ip & ref are known; Now for ml
385 if (start2 < ip + ml) {
386 if ((start2 - ip) < (int)ML_MASK) {
390 if (ip + ml > start2 + ml2 - MINMATCH)
391 ml = (int)(start2 - ip) + ml2
393 correction = ml - (int)(start2 - ip);
394 if (correction > 0) {
395 start2 += correction;
400 ml = (int)(start2 - ip);
402 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
415 /* Encode Last Literals */
416 lastrun = (int)(iend - anchor);
417 if (lastrun >= (int)RUN_MASK) {
418 *op++ = (RUN_MASK << ML_BITS);
420 for (; lastrun > 254 ; lastrun -= 255)
422 *op++ = (u8) lastrun;
424 *op++ = (lastrun << ML_BITS);
425 memcpy(op, anchor, iend - anchor);
428 return (int) (((char *)op) - dest);
431 int lz4hc_compress(const unsigned char *src, size_t src_len,
432 unsigned char *dst, size_t *dst_len, void *wrkmem)
437 struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
438 lz4hc_init(hc4, (const u8 *)src);
439 out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
440 (char *)dst, (int)src_len);
451 EXPORT_SYMBOL(lz4hc_compress);
453 MODULE_LICENSE("Dual BSD/GPL");
454 MODULE_DESCRIPTION("LZ4HC compressor");