Add code for efficient (?) Huffman decoding.
authorSteinar H. Gunderson <sesse@debian.org>
Fri, 2 Jan 2009 20:54:48 +0000 (21:54 +0100)
committerSteinar H. Gunderson <sesse@debian.org>
Fri, 2 Jan 2009 20:54:48 +0000 (21:54 +0100)
Makefile
dehuff.c
dehuff.h
dehuff_test.c
input.h

index ecee87c..37dfc95 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -12,12 +12,10 @@ INPUT_TEST_OBJS=input.o input_test.o
 input_test: $(INPUT_TEST_OBJS)
        $(CC) $(LDFLAGS) -o $@ $(INPUT_TEST_OBJS)
 
-DEHUFF_TEST_OBJS=dehuff.o dehuff_test.o
+DEHUFF_TEST_OBJS=dehuff.o input.o dehuff_test.o
 dehuff_test: $(DEHUFF_TEST_OBJS)
        $(CC) $(LDFLAGS) -o $@ $(DEHUFF_TEST_OBJS)
 
-OBJS=$(UNSTUFF_TEST_OBJS)
-
 tests: unstuff_test input_test dehuff_test
 clean:
        $(RM) $(UNSTUFF_TEST_OBJS) unstuff_test
index 3e0fd6c..0a9fbbc 100644 (file)
--- a/dehuff.c
+++ b/dehuff.c
@@ -121,7 +121,53 @@ void read_huffman_tables(huffman_tables_t* dst, raw_input_func_t* input_func, vo
                        j += tbl->num_codes[i];
                        tbl->maxcode[i] = tbl->huffcode[j - 1];
                }
+
+               // Generate the lookup tables
+               for (unsigned i = 0; i < DEHUF_TABLE_SIZE; ++i) {
+                       tbl->lookup_table_codes[i] = DEHUF_SLOW_PATH;
+                       tbl->lookup_table_length[i] = DEHUF_SLOW_PATH;
+               }
+       
+               unsigned k = 0; 
+               for (unsigned i = 0; i < 16; ++i) {
+                       for (unsigned j = 0; j < tbl->num_codes[i]; ++j, ++k) {
+                               const unsigned code = tbl->huffcode[k];
+                               const unsigned length = tbl->huffsize[k];
+                               if (length > DEHUF_TABLE_BITS) {
+                                       continue;
+                               }
+                               const unsigned prefix_min = code << (DEHUF_TABLE_BITS - length);
+                               const unsigned prefix_max = ((code + 1) << (DEHUF_TABLE_BITS - length));
+
+                               for (unsigned elem = prefix_min; elem < prefix_max; ++elem) {
+                                       assert(tbl->lookup_table_codes[elem] == DEHUF_SLOW_PATH);
+                                       assert(tbl->lookup_table_length[elem] == DEHUF_SLOW_PATH);
+                                       tbl->lookup_table_codes[elem] = k;
+                                       tbl->lookup_table_length[elem] = length;
+                               }
+                       }
+               }       
        }
 
        free(buf);
 }
+
+unsigned read_huffman_symbol_slow_path(const struct huffman_table* table,
+                                       struct bit_source* source) {
+       possibly_refill(source, 1);
+       unsigned code = read_bits(source, 1);
+       int i = 0;
+
+       while (code > table->maxcode[i] || table->maxcode[i] == -1) {
+               possibly_refill(source, 1);
+               code = (code << 1) | read_bits(source, 1);
+               ++i;
+
+               if (i > 15) {
+                       fprintf(stderr, "Error in Huffman decoding: Too long code (%x)\n", code);
+                       exit(1);
+               }
+       }
+
+       return table->codes[table->valptr[i] + code - table->mincode[i]];
+}
index b79a79a..d93e793 100644 (file)
--- a/dehuff.h
+++ b/dehuff.h
@@ -5,6 +5,14 @@
 #include <stdint.h>
 #include <sys/types.h>
 
+#include "input.h"
+
+// About 99% of all Huffman codes are <= 8 bits long (see codelen.txt),
+// and it's what libjpeg uses. Thus, it seems like a reasonable size.
+#define DEHUF_TABLE_BITS 8
+#define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS)
+static const int DEHUF_SLOW_PATH = -1;
+
 // A function to read bytes from some input source. The bytes should be
 // already unstuffed (and thus without markers).
 // A return value of -1 indicates error, a return value of 0 indicates EOF.
@@ -20,6 +28,15 @@ struct huffman_table {
        int maxcode[16];
        int mincode[16];
        unsigned valptr[16];
+
+       // Lookup table for fast decoding; given eight bits,
+       // return the symbol and length in bits. For longer codes,
+       // DEHUF_SLOW_PATH is returned.
+
+       // Note that the codes we return are 8-bit, but the type of
+       // the lookup tables is int to avoid extra zero extending. 
+       int lookup_table_codes[DEHUF_TABLE_SIZE]; 
+       int lookup_table_length[DEHUF_TABLE_SIZE]; 
 };
 
 enum coefficient_class {
@@ -29,6 +46,30 @@ enum coefficient_class {
 };
 typedef struct huffman_table huffman_tables_t[NUM_COEFF_CLASSES][4];
 
+// Read Huffman tables from a stream, and compute the derived values.
 void read_huffman_tables(huffman_tables_t* dst, raw_input_func_t* input_func, void* userdata);
 
+unsigned read_huffman_symbol_slow_path(const struct huffman_table* table,
+                                       struct bit_source* source);
+
+#include <stdio.h>
+
+static inline unsigned read_huffman_symbol(const struct huffman_table* table,
+                                           struct bit_source* source)
+{
+       // FIXME: We can read past the end of the stream here in some edge
+       // cases. We need to define some guarantees in the layers above.
+       possibly_refill(source, DEHUF_TABLE_BITS);
+       unsigned lookup = peek_bits(source, DEHUF_TABLE_BITS);
+       int code = table->lookup_table_codes[lookup];
+       int length = table->lookup_table_length[lookup];
+
+       if (code == DEHUF_SLOW_PATH) {
+               return read_huffman_symbol_slow_path(table, source);
+       }
+
+       read_bits(source, length);
+       return code;
+}
+
 #endif /* !defined(_DEHUFF_H) */
index e383a1d..0212244 100644 (file)
@@ -19,23 +19,36 @@ ssize_t custom_read(void* userdata, uint8_t* buf, size_t count)
        return num_to_read;     
 }
 
-// Uses the example from section K.3.1 from the JPEG standard.
-void test_table_gen()
-{
-       uint8_t bytes[] = {
-               // Chunk length: Chunk length (2) + coefficient class (1) + 
-               // code lengths (16) + code words (12)
-               0, 31, 
+// The example from section K.3.1 from the JPEG standard.
+uint8_t example_table_bytes[] = {
+       // Chunk length: Chunk length (2) + coefficient class (1) + 
+       // code lengths (16) + code words (12)
+       0, 31, 
 
-               // DC coefficient, table 0
-               0x00,
+       // DC coefficient, table 0
+       0x00,
 
-               // List of code lengths
-               0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+       // List of code lengths
+       0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
-               // Code words
-               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
-       };
+       // Code words
+       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+};
+
+void read_example_tables(huffman_tables_t* tables)
+{
+       struct custom_read_userdata ud;
+       ud.bytes = example_table_bytes;
+       ud.bytes_left = sizeof(example_table_bytes);
+
+       read_huffman_tables(tables, custom_read, &ud);
+}      
+
+// Test that the generated code tables are what we expect.
+void test_table_gen()
+{
+       huffman_tables_t tables;
+       read_example_tables(&tables);
 
        // Expected results (table K.3)
        struct {
@@ -55,19 +68,44 @@ void test_table_gen()
                { 8, 0xfe },
                { 9, 0x1fe },
        };
+       
+       struct huffman_table* tbl = &tables[DC_CLASS][0];       
+       for (unsigned i = 0; i < 12; ++i) {
+               assert(tbl->huffsize[i] == expected_table[i].code_length);
+               assert(tbl->huffcode[i] == expected_table[i].code_word);
+       }
+}
+
+// Test that we can decode a simple bit stream.
+// Note that since we end on a long code, we won't crash into
+// the end-of-stream problems we currently have.
+void test_decoding()
+{
+       huffman_tables_t tables;
+       read_example_tables(&tables);
+       
+       // Our stream looks like this:
+       //
+       //  0   1   2   3   4   5    6     7      8       9       10        11
+       // 00 010 011 100 101 110 1110 11110 111110 1111110 11111110 111111110
+       uint8_t bytes[] = {
+               0x13, 0x97, 0x77, 0xbe, 0xfd, 0xfd, 0xfe
+       };
 
        struct custom_read_userdata ud;
        ud.bytes = bytes;
        ud.bytes_left = sizeof(bytes);
 
-       huffman_tables_t tables;
-       read_huffman_tables(&tables, custom_read, &ud);
+       struct bit_source source;
+       init_bit_source(&source, custom_read, &ud);
                
        struct huffman_table* tbl = &tables[DC_CLASS][0];       
        for (unsigned i = 0; i < 12; ++i) {
-               assert(tbl->huffsize[i] == expected_table[i].code_length);
-               assert(tbl->huffcode[i] == expected_table[i].code_word);
+               unsigned symbol = read_huffman_symbol(tbl, &source);
+               assert(symbol == i);
        }
+
+       assert(source.bits_available == 0);
 }
 
 int main(void)
@@ -75,6 +113,9 @@ int main(void)
        printf("test_table_gen()\n");
        test_table_gen();
        
+       printf("test_decoding()\n");
+       test_decoding();
+       
        printf("All tests pass.\n");
        return 0;
 }
diff --git a/input.h b/input.h
index aa4c337..19315e5 100644 (file)
--- a/input.h
+++ b/input.h
@@ -87,4 +87,10 @@ static inline unsigned read_bits(struct bit_source* source, unsigned num_bits)
        return ret;
 }
 
+static inline unsigned peek_bits(struct bit_source* source, unsigned num_bits)
+{
+       assert(source->bits_available >= num_bits);
+       return (source->bits >> (BITRESERVOIR_SIZE - num_bits));
+}
+
 #endif /* !defined(_INPUT_H) */