From 618548d1f2e076a3da21368e708cf887dcbd20d2 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 2 Jan 2009 21:54:48 +0100 Subject: [PATCH] Add code for efficient (?) Huffman decoding. --- Makefile | 4 +-- dehuff.c | 46 ++++++++++++++++++++++++++++++ dehuff.h | 41 +++++++++++++++++++++++++++ dehuff_test.c | 77 +++++++++++++++++++++++++++++++++++++++------------ input.h | 6 ++++ 5 files changed, 153 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index ecee87c..37dfc95 100644 --- a/Makefile +++ b/Makefile @@ -12,12 +12,10 @@ INPUT_TEST_OBJS=input.o input_test.o input_test: $(INPUT_TEST_OBJS) $(CC) $(LDFLAGS) -o $@ $(INPUT_TEST_OBJS) -DEHUFF_TEST_OBJS=dehuff.o dehuff_test.o +DEHUFF_TEST_OBJS=dehuff.o input.o dehuff_test.o dehuff_test: $(DEHUFF_TEST_OBJS) $(CC) $(LDFLAGS) -o $@ $(DEHUFF_TEST_OBJS) -OBJS=$(UNSTUFF_TEST_OBJS) - tests: unstuff_test input_test dehuff_test clean: $(RM) $(UNSTUFF_TEST_OBJS) unstuff_test diff --git a/dehuff.c b/dehuff.c index 3e0fd6c..0a9fbbc 100644 --- a/dehuff.c +++ b/dehuff.c @@ -121,7 +121,53 @@ void read_huffman_tables(huffman_tables_t* dst, raw_input_func_t* input_func, vo j += tbl->num_codes[i]; tbl->maxcode[i] = tbl->huffcode[j - 1]; } + + // Generate the lookup tables + for (unsigned i = 0; i < DEHUF_TABLE_SIZE; ++i) { + tbl->lookup_table_codes[i] = DEHUF_SLOW_PATH; + tbl->lookup_table_length[i] = DEHUF_SLOW_PATH; + } + + unsigned k = 0; + for (unsigned i = 0; i < 16; ++i) { + for (unsigned j = 0; j < tbl->num_codes[i]; ++j, ++k) { + const unsigned code = tbl->huffcode[k]; + const unsigned length = tbl->huffsize[k]; + if (length > DEHUF_TABLE_BITS) { + continue; + } + const unsigned prefix_min = code << (DEHUF_TABLE_BITS - length); + const unsigned prefix_max = ((code + 1) << (DEHUF_TABLE_BITS - length)); + + for (unsigned elem = prefix_min; elem < prefix_max; ++elem) { + assert(tbl->lookup_table_codes[elem] == DEHUF_SLOW_PATH); + assert(tbl->lookup_table_length[elem] == DEHUF_SLOW_PATH); + tbl->lookup_table_codes[elem] = k; + tbl->lookup_table_length[elem] = length; + } + } + } } free(buf); } + +unsigned read_huffman_symbol_slow_path(const struct huffman_table* table, + struct bit_source* source) { + possibly_refill(source, 1); + unsigned code = read_bits(source, 1); + int i = 0; + + while (code > table->maxcode[i] || table->maxcode[i] == -1) { + possibly_refill(source, 1); + code = (code << 1) | read_bits(source, 1); + ++i; + + if (i > 15) { + fprintf(stderr, "Error in Huffman decoding: Too long code (%x)\n", code); + exit(1); + } + } + + return table->codes[table->valptr[i] + code - table->mincode[i]]; +} diff --git a/dehuff.h b/dehuff.h index b79a79a..d93e793 100644 --- a/dehuff.h +++ b/dehuff.h @@ -5,6 +5,14 @@ #include #include +#include "input.h" + +// About 99% of all Huffman codes are <= 8 bits long (see codelen.txt), +// and it's what libjpeg uses. Thus, it seems like a reasonable size. +#define DEHUF_TABLE_BITS 8 +#define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS) +static const int DEHUF_SLOW_PATH = -1; + // A function to read bytes from some input source. The bytes should be // already unstuffed (and thus without markers). // A return value of -1 indicates error, a return value of 0 indicates EOF. @@ -20,6 +28,15 @@ struct huffman_table { int maxcode[16]; int mincode[16]; unsigned valptr[16]; + + // Lookup table for fast decoding; given eight bits, + // return the symbol and length in bits. For longer codes, + // DEHUF_SLOW_PATH is returned. + + // Note that the codes we return are 8-bit, but the type of + // the lookup tables is int to avoid extra zero extending. + int lookup_table_codes[DEHUF_TABLE_SIZE]; + int lookup_table_length[DEHUF_TABLE_SIZE]; }; enum coefficient_class { @@ -29,6 +46,30 @@ enum coefficient_class { }; typedef struct huffman_table huffman_tables_t[NUM_COEFF_CLASSES][4]; +// Read Huffman tables from a stream, and compute the derived values. void read_huffman_tables(huffman_tables_t* dst, raw_input_func_t* input_func, void* userdata); +unsigned read_huffman_symbol_slow_path(const struct huffman_table* table, + struct bit_source* source); + +#include + +static inline unsigned read_huffman_symbol(const struct huffman_table* table, + struct bit_source* source) +{ + // FIXME: We can read past the end of the stream here in some edge + // cases. We need to define some guarantees in the layers above. + possibly_refill(source, DEHUF_TABLE_BITS); + unsigned lookup = peek_bits(source, DEHUF_TABLE_BITS); + int code = table->lookup_table_codes[lookup]; + int length = table->lookup_table_length[lookup]; + + if (code == DEHUF_SLOW_PATH) { + return read_huffman_symbol_slow_path(table, source); + } + + read_bits(source, length); + return code; +} + #endif /* !defined(_DEHUFF_H) */ diff --git a/dehuff_test.c b/dehuff_test.c index e383a1d..0212244 100644 --- a/dehuff_test.c +++ b/dehuff_test.c @@ -19,23 +19,36 @@ ssize_t custom_read(void* userdata, uint8_t* buf, size_t count) return num_to_read; } -// Uses the example from section K.3.1 from the JPEG standard. -void test_table_gen() -{ - uint8_t bytes[] = { - // Chunk length: Chunk length (2) + coefficient class (1) + - // code lengths (16) + code words (12) - 0, 31, +// The example from section K.3.1 from the JPEG standard. +uint8_t example_table_bytes[] = { + // Chunk length: Chunk length (2) + coefficient class (1) + + // code lengths (16) + code words (12) + 0, 31, - // DC coefficient, table 0 - 0x00, + // DC coefficient, table 0 + 0x00, - // List of code lengths - 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // List of code lengths + 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - // Code words - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, - }; + // Code words + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, +}; + +void read_example_tables(huffman_tables_t* tables) +{ + struct custom_read_userdata ud; + ud.bytes = example_table_bytes; + ud.bytes_left = sizeof(example_table_bytes); + + read_huffman_tables(tables, custom_read, &ud); +} + +// Test that the generated code tables are what we expect. +void test_table_gen() +{ + huffman_tables_t tables; + read_example_tables(&tables); // Expected results (table K.3) struct { @@ -55,19 +68,44 @@ void test_table_gen() { 8, 0xfe }, { 9, 0x1fe }, }; + + struct huffman_table* tbl = &tables[DC_CLASS][0]; + for (unsigned i = 0; i < 12; ++i) { + assert(tbl->huffsize[i] == expected_table[i].code_length); + assert(tbl->huffcode[i] == expected_table[i].code_word); + } +} + +// Test that we can decode a simple bit stream. +// Note that since we end on a long code, we won't crash into +// the end-of-stream problems we currently have. +void test_decoding() +{ + huffman_tables_t tables; + read_example_tables(&tables); + + // Our stream looks like this: + // + // 0 1 2 3 4 5 6 7 8 9 10 11 + // 00 010 011 100 101 110 1110 11110 111110 1111110 11111110 111111110 + uint8_t bytes[] = { + 0x13, 0x97, 0x77, 0xbe, 0xfd, 0xfd, 0xfe + }; struct custom_read_userdata ud; ud.bytes = bytes; ud.bytes_left = sizeof(bytes); - huffman_tables_t tables; - read_huffman_tables(&tables, custom_read, &ud); + struct bit_source source; + init_bit_source(&source, custom_read, &ud); struct huffman_table* tbl = &tables[DC_CLASS][0]; for (unsigned i = 0; i < 12; ++i) { - assert(tbl->huffsize[i] == expected_table[i].code_length); - assert(tbl->huffcode[i] == expected_table[i].code_word); + unsigned symbol = read_huffman_symbol(tbl, &source); + assert(symbol == i); } + + assert(source.bits_available == 0); } int main(void) @@ -75,6 +113,9 @@ int main(void) printf("test_table_gen()\n"); test_table_gen(); + printf("test_decoding()\n"); + test_decoding(); + printf("All tests pass.\n"); return 0; } diff --git a/input.h b/input.h index aa4c337..19315e5 100644 --- a/input.h +++ b/input.h @@ -87,4 +87,10 @@ static inline unsigned read_bits(struct bit_source* source, unsigned num_bits) return ret; } +static inline unsigned peek_bits(struct bit_source* source, unsigned num_bits) +{ + assert(source->bits_available >= num_bits); + return (source->bits >> (BITRESERVOIR_SIZE - num_bits)); +} + #endif /* !defined(_INPUT_H) */ -- 2.39.2