3 `sdefl` is a small bare bone lossless compression library in ANSI C (ISO C90)
4 which implements the Deflate (RFC 1951) compressed data format specification standard.
5 It is mainly tuned to get as much speed and compression ratio from as little code
6 as needed to keep the implementation as concise as possible.
9 - Portable single header and source file duo written in ANSI C (ISO C90)
10 - Dual license with either MIT or public domain
11 - Small implementation
15 - Deflate ~3.7 KB (~2.2KB compressed)
16 - Inflate ~3.6 KB (~2.2KB compressed)
19 This file behaves differently depending on what symbols you define
23 If you do not define `SINFL_IMPLEMENTATION` before including this file, it
24 will operate in header only mode. In this mode it declares all used structs
25 and the API of the library without including the implementation of the library.
28 If you define `SINFL_IMPLEMENTATION` before including this file, it will
29 compile the implementation. Make sure that you only include
30 this file implementation in *one* C or C++ file to prevent collisions.
34 | Compressor name | Compression| Decompress.| Compr. size | Ratio |
35 | ------------------------| -----------| -----------| ----------- | ----- |
36 | sdefl 1.0 -0 | 127 MB/s | 233 MB/s | 40004116 | 39.88 |
37 | sdefl 1.0 -1 | 111 MB/s | 259 MB/s | 38940674 | 38.82 |
38 | sdefl 1.0 -5 | 45 MB/s | 275 MB/s | 36577183 | 36.46 |
39 | sdefl 1.0 -7 | 38 MB/s | 276 MB/s | 36523781 | 36.41 |
40 | zlib 1.2.11 -1 | 72 MB/s | 307 MB/s | 42298774 | 42.30 |
41 | zlib 1.2.11 -6 | 24 MB/s | 313 MB/s | 36548921 | 36.55 |
42 | zlib 1.2.11 -9 | 20 MB/s | 314 MB/s | 36475792 | 36.48 |
43 | miniz 1.0 -1 | 122 MB/s | 208 MB/s | 48510028 | 48.51 |
44 | miniz 1.0 -6 | 27 MB/s | 260 MB/s | 36513697 | 36.51 |
45 | miniz 1.0 -9 | 23 MB/s | 261 MB/s | 36460101 | 36.46 |
46 | libdeflate 1.3 -1 | 147 MB/s | 667 MB/s | 39597378 | 39.60 |
47 | libdeflate 1.3 -6 | 69 MB/s | 689 MB/s | 36648318 | 36.65 |
48 | libdeflate 1.3 -9 | 13 MB/s | 672 MB/s | 35197141 | 35.20 |
49 | libdeflate 1.3 -12 | 8.13 MB/s | 670 MB/s | 35100568 | 35.10 |
52 Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia):
54 | File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` |
55 | :------ | ---------: | -----------------: | ---------: | ----------: |
56 | dickens | 10.192.446 | 4,260,187| 3,845,261| 3,833,657 |
57 | mozilla | 51.220.480 | 20,774,706 | 19,607,009 | 19,565,867 |
58 | mr | 9.970.564 | 3,860,531 | 3,673,460 | 3,665,627 |
59 | nci | 33.553.445 | 4,030,283 | 3,094,526 | 3,006,075 |
60 | ooffice | 6.152.192 | 3,320,063 | 3,186,373 | 3,183,815 |
61 | osdb | 10.085.684 | 3,919,646 | 3,649,510 | 3,649,477 |
62 | reymont | 6.627.202 | 2,263,378 | 1,857,588 | 1,827,237 |
63 | samba | 21.606.400 | 6,121,797 | 5,462,670 | 5,450,762 |
64 | sao | 7.251.944 | 5,612,421 | 5,485,380 | 5,481,765 |
65 | webster | 41.458.703 | 13,972,648 | 12,059,432 | 11,991,421 |
66 | xml | 5.345.280 | 886,620| 674,009 | 662,141 |
67 | x-ray | 8.474.240 | 6,304,655 | 6,244,779 | 6,244,779 |
71 ------------------------------------------------------------------------------
72 This software is available under 2 licenses -- choose whichever you prefer.
73 ------------------------------------------------------------------------------
74 ALTERNATIVE A - MIT License
75 Copyright (c) 2020 Micha Mettke
76 Permission is hereby granted, free of charge, to any person obtaining a copy of
77 this software and associated documentation files (the "Software"), to deal in
78 the Software without restriction, including without limitation the rights to
79 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
80 of the Software, and to permit persons to whom the Software is furnished to do
81 so, subject to the following conditions:
82 The above copyright notice and this permission notice shall be included in all
83 copies or substantial portions of the Software.
84 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
85 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
86 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
87 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
88 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
89 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
91 ------------------------------------------------------------------------------
92 ALTERNATIVE B - Public Domain (www.unlicense.org)
93 This is free and unencumbered software released into the public domain.
94 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
95 software, either in source code form or as a compiled binary, for any purpose,
96 commercial or non-commercial, and by any means.
97 In jurisdictions that recognize copyright laws, the author or authors of this
98 software dedicate any and all copyright interest in the software to the public
99 domain. We make this dedication for the benefit of the public at large and to
100 the detriment of our heirs and successors. We intend this dedication to be an
101 overt act of relinquishment in perpetuity of all present and future rights to
102 this software under copyright law.
103 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
104 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
105 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
106 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
107 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
108 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
109 ------------------------------------------------------------------------------
112 #ifndef SINFL_H_INCLUDED
113 #define SINFL_H_INCLUDED
119 #define SINFL_PRE_TBL_SIZE 128
120 #define SINFL_LIT_TBL_SIZE 1334
121 #define SINFL_OFF_TBL_SIZE 402
125 unsigned lits[SINFL_LIT_TBL_SIZE];
126 unsigned dsts[SINFL_OFF_TBL_SIZE];
128 extern int sinflate(void *out, const void *in, int size);
129 extern int zsinflate(void *out, const void *in, int size);
135 #endif /* SINFL_H_INCLUDED */
137 #ifdef SINFL_IMPLEMENTATION
139 #include <string.h> /* memcpy, memset */
142 sinfl_bsr(unsigned n) {
144 _BitScanReverse(&n, n);
146 #elif defined(__GNUC__) || defined(__clang__)
147 return 31 - __builtin_clz(n);
151 sinfl_get(const unsigned char **src, const unsigned char *end, struct sinfl *s,
153 const unsigned char *in = *src;
154 int v = s->bits & ((1 << n)-1);
156 s->bitcnt = s->bitcnt - n;
157 s->bitcnt = s->bitcnt < 0 ? 0 : s->bitcnt;
158 while (s->bitcnt < 16 && in < end) {
159 s->bits |= (*in++) << s->bitcnt;
172 sinfl_build_tbl(struct sinfl_gen *gen, unsigned *tbl, int tbl_bits,
175 while (!(gen->cnt = cnt[gen->len])) {
178 tbl_end = 1 << gen->len;
179 while (gen->len <= tbl_bits) {
180 do {unsigned bit = 0;
181 tbl[gen->word] = (*gen->sorted++ << 16) | gen->len;
182 if (gen->word == tbl_end - 1) {
183 for (; gen->len < tbl_bits; gen->len++) {
184 memcpy(&tbl[tbl_end], tbl, (size_t)tbl_end * sizeof(tbl[0]));
189 bit = 1 << sinfl_bsr((unsigned)(gen->word ^ (tbl_end - 1)));
190 gen->word &= bit - 1;
192 } while (--gen->cnt);
194 if (++gen->len <= tbl_bits) {
195 memcpy(&tbl[tbl_end], tbl, (size_t)tbl_end * sizeof(tbl[0]));
198 } while (!(gen->cnt = cnt[gen->len]));
203 sinfl_build_subtbl(struct sinfl_gen *gen, unsigned *tbl, int tbl_bits,
208 int tbl_end = 1 << tbl_bits;
212 /* start new subtable */
213 if ((gen->word & ((1 << tbl_bits)-1)) != sub_prefix) {
215 sub_prefix = gen->word & ((1 << tbl_bits)-1);
217 sub_bits = gen->len - tbl_bits;
219 while (used < (1 << sub_bits)) {
221 used = (used << 1) + cnt[tbl_bits + sub_bits];
223 tbl_end = sub_start + (1 << sub_bits);
224 tbl[sub_prefix] = (sub_start << 16) | 0x10 | (sub_bits & 0xf);
227 entry = (*gen->sorted << 16) | ((gen->len - tbl_bits) & 0xf);
229 i = sub_start + (gen->word >> tbl_bits);
230 stride = 1 << (gen->len - tbl_bits);
234 } while (i < tbl_end);
235 if (gen->word == (1 << gen->len)-1) {
238 bit = 1 << sinfl_bsr(gen->word ^ ((1 << gen->len) - 1));
239 gen->word &= bit - 1;
243 gen->cnt = cnt[++gen->len];
248 sinfl_build(unsigned *tbl, unsigned char *lens, int tbl_bits, int maxlen,
252 int cnt[16] = {0}, off[16]= {0};
253 struct sinfl_gen gen = {0};
257 for (i = 0; i < symcnt; ++i)
260 for (i = 1; i < maxlen; ++i) {
261 off[i + 1] = off[i] + cnt[i];
262 used = (used << 1) + cnt[i];
264 used = (used << 1) + cnt[i];
265 for (i = 0; i < symcnt; ++i)
266 gen.sorted[off[lens[i]]++] = (short)i;
267 gen.sorted += off[0];
269 if (used < (1 << maxlen)){
270 for (i = 0; i < 1 << tbl_bits; ++i)
271 tbl[i] = (0 << 16u) | 1;
274 if (!sinfl_build_tbl(&gen, tbl, tbl_bits, cnt)){
275 sinfl_build_subtbl(&gen, tbl, tbl_bits, cnt);
279 sinfl_decode(const unsigned char **in, const unsigned char *end,
280 struct sinfl *s, const unsigned *tbl, int bit_len) {
281 int idx = s->bits & ((1 << bit_len) - 1);
282 unsigned key = tbl[idx];
284 /* sub-table lookup */
285 int len = key & 0x0f;
286 sinfl_get(in, end, s, bit_len);
287 idx = s->bits & ((1 << len)-1);
288 key = tbl[((key >> 16) & 0xffff) + (unsigned)idx];
290 sinfl_get(in, end, s, key & 0x0f);
291 return (key >> 16) & 0x0fff;
294 sinfl_decompress(unsigned char *out, const unsigned char *in, int size) {
295 static const unsigned char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
296 static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
297 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
298 static const unsigned char dbits[30+2] = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,
299 10,10,11,11,12,12,13,13,0,0};
300 static const short lbase[29+2] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,
301 43,51,59,67,83,99,115,131,163,195,227,258,0,0};
302 static const unsigned char lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,
303 4,4,4,5,5,5,5,0,0,0};
305 const unsigned char *e = in + size, *o = out;
306 enum sinfl_states {hdr,stored,fixed,dyn,blk};
307 enum sinfl_states state = hdr;
308 struct sinfl s = {0};
311 sinfl_get(&in,e,&s,0); /* buffer input */
312 while (in < e || s.bitcnt) {
315 int type = 0; /* block header */
316 last = sinfl_get(&in,e,&s,1);
317 type = sinfl_get(&in,e,&s,2);
319 switch (type) {default: return (int)(out-o);
320 case 0x00: state = stored; break;
321 case 0x01: state = fixed; break;
322 case 0x02: state = dyn; break;}
325 int len; /* uncompressed block */
326 sinfl_get(&in,e,&s,s.bitcnt & 7);
327 len = sinfl_get(&in,e,&s,16);
328 //int nlen = sinfl_get(&in,e,&s,16);
329 in -= 2; s.bitcnt = 0;
331 if (len > (e-in) || !len)
333 memcpy(out, in, (size_t)len);
334 in += len, out += len;
338 /* fixed huffman codes */
339 int n; unsigned char lens[288+32];
340 for (n = 0; n <= 143; n++) lens[n] = 8;
341 for (n = 144; n <= 255; n++) lens[n] = 9;
342 for (n = 256; n <= 279; n++) lens[n] = 7;
343 for (n = 280; n <= 287; n++) lens[n] = 8;
344 for (n = 0; n < 32; n++) lens[288+n] = 5;
346 /* build lit/dist tables */
347 sinfl_build(s.lits, lens, 10, 15, 288);
348 sinfl_build(s.dsts, lens + 288, 8, 15, 32);
352 /* dynamic huffman codes */
354 unsigned hlens[SINFL_PRE_TBL_SIZE];
355 unsigned char nlens[19] = {0}, lens[288+32];
356 int nlit = 257 + sinfl_get(&in,e,&s,5);
357 int ndist = 1 + sinfl_get(&in,e,&s,5);
358 int nlen = 4 + sinfl_get(&in,e,&s,4);
359 for (n = 0; n < nlen; n++)
360 nlens[order[n]] = (unsigned char)sinfl_get(&in,e,&s,3);
361 sinfl_build(hlens, nlens, 7, 7, 19);
363 /* decode code lengths */
364 for (n = 0; n < nlit + ndist;) {
365 int sym = sinfl_decode(&in, e, &s, hlens, 7);
366 switch (sym) {default: lens[n++] = (unsigned char)sym; break;
367 case 16: for (i=3+sinfl_get(&in,e,&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
368 case 17: for (i=3+sinfl_get(&in,e,&s,3);i;i--,n++) lens[n]=0; break;
369 case 18: for (i=11+sinfl_get(&in,e,&s,7);i;i--,n++) lens[n]=0; break;}
371 /* build lit/dist tables */
372 sinfl_build(s.lits, lens, 10, 15, nlit);
373 sinfl_build(s.dsts, lens + nlit, 8, 15, ndist);
377 /* decompress block */
378 int i, sym = sinfl_decode(&in, e, &s, s.lits, 10);
379 if (sym > 256) {sym -= 257; /* match symbol */
380 {int len = sinfl_get(&in, e, &s, lbits[sym]) + lbase[sym];
381 int dsym = sinfl_decode(&in, e, &s, s.dsts, 8);
382 int offs = sinfl_get(&in, e, &s, dbits[dsym]) + dbase[dsym];
383 if (offs > (int)(out-o)) {
385 } else if (offs == 1) {
386 /* rle match copying */
387 unsigned char c = *(out - offs);
388 unsigned long w = (c << 24) | (c << 16) | (c << 8) | c;
389 for (i = 0; i < len >> 2; ++i) {
394 } else if (offs >= 4) {
397 for (i = 0; i < wcnt; ++i) {
399 memcpy(&w, out - offs, 4);
405 for (i = 0; i < len; ++i)
406 {*out = *(out-offs), out++;}
408 } else if (sym == 256) {
410 if (last) return (int)(out-o);
414 } else *out++ = (unsigned char)sym;
420 sinflate(void *out, const void *in, int size) {
421 return sinfl_decompress((unsigned char*)out, (const unsigned char*)in, size);
424 sinfl_adler32(unsigned adler32, const unsigned char *in, int in_len) {
425 const unsigned ADLER_MOD = 65521;
426 unsigned s1 = adler32 & 0xffff;
427 unsigned s2 = adler32 >> 16;
430 blk_len = in_len % 5552;
432 for (i=0; i + 7 < blk_len; i += 8) {
433 s1 += in[0]; s2 += s1;
434 s1 += in[1]; s2 += s1;
435 s1 += in[2]; s2 += s1;
436 s1 += in[3]; s2 += s1;
437 s1 += in[4]; s2 += s1;
438 s1 += in[5]; s2 += s1;
439 s1 += in[6]; s2 += s1;
440 s1 += in[7]; s2 += s1;
443 for (; i < blk_len; ++i)
444 s1 += *in++, s2 += s1;
445 s1 %= ADLER_MOD; s2 %= ADLER_MOD;
448 } return (unsigned)(s2 << 16) + (unsigned)s1;
451 zsinflate(void *out, const void *mem, int size) {
452 const unsigned char *in = (const unsigned char*)mem;
454 const unsigned char *eob = in + size - 4;
455 int n = sinfl_decompress((unsigned char*)out, in + 2u, size);
456 unsigned a = sinfl_adler32(1u, (unsigned char*)out, n);
457 unsigned h = eob[0] << 24 | eob[1] << 16 | eob[2] << 8 | eob[3] << 0;
458 return a == h ? n : -1;