X-Git-Url: https://git.sesse.net/?p=fjl;a=blobdiff_plain;f=dehuff.h;h=7621f6095e99791feb6138e81c2574ae9dda6bce;hp=6865dae0d7169bbe62b390e86c662d8c6439b13a;hb=a4009687c73083dd0290285a065740a83e27e855;hpb=30860bda5fd2474a3b45b05f6b89dcf7230a75a8 diff --git a/dehuff.h b/dehuff.h index 6865dae..7621f60 100644 --- a/dehuff.h +++ b/dehuff.h @@ -6,7 +6,7 @@ #include #include "bytesource.h" -#include "input.h" +#include "bitsource.h" // About 99% of all Huffman codes are <= 8 bits long (see codelen.txt), // and it's what libjpeg uses. Thus, it seems like a reasonable size. @@ -43,19 +43,16 @@ enum coefficient_class { typedef struct huffman_table huffman_tables_t[NUM_COEFF_CLASSES][4]; // Read Huffman tables from a stream, and compute the derived values. -void read_huffman_tables(huffman_tables_t* dst, raw_input_func_t* input_func, void* userdata); +void read_huffman_tables(huffman_tables_t* dst, input_func_t* input_func, void* userdata); unsigned read_huffman_symbol_slow_path(const struct huffman_table* table, struct bit_source* source); -#include - -static inline unsigned read_huffman_symbol(const struct huffman_table* table, - struct bit_source* source) +static inline unsigned read_huffman_symbol_no_refill( + const struct huffman_table* table, + struct bit_source* source) { - // FIXME: We can read past the end of the stream here in some edge - // cases. We need to define some guarantees in the layers above. - possibly_refill(source, DEHUF_TABLE_BITS); + assert(source->bits_available >= DEHUF_TABLE_BITS); unsigned lookup = peek_bits(source, DEHUF_TABLE_BITS); int code = table->lookup_table_codes[lookup]; int length = table->lookup_table_length[lookup]; @@ -63,9 +60,46 @@ static inline unsigned read_huffman_symbol(const struct huffman_table* table, if (code == DEHUF_SLOW_PATH) { return read_huffman_symbol_slow_path(table, source); } - + read_bits(source, length); return code; } +static inline unsigned read_huffman_symbol(const struct huffman_table* table, + struct bit_source* source) +{ + possibly_refill(source, DEHUF_TABLE_BITS); + return read_huffman_symbol_no_refill(table, source); +} + +// procedure EXTEND (figure F.12) + +// Fast lookup table for (1 << (bits - 1)). +// The table actually helps, since the load can go in parallel with the shift +// operation below. +static const int bit_thresholds[16] = { + 0, 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14 +}; + +static inline unsigned extend(int val, unsigned bits) +{ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + // GCC should ideally be able to figure out that the conditional move is better, but + // it doesn't for various reasons, and this is pretty important for speed, so we hardcode. + asm("cmp %2, %0 ; cmovl %3, %0" + : "=r" (val) + : "0" (val), + "g" (bit_thresholds[bits]), + "r" (val + (-1 << bits) + 1) + : "cc"); + return val; +#else + if (val < bit_thresholds[bits]) { + return val + (-1 << bits) + 1; + } else { + return val; + } +#endif +} + #endif /* !defined(_DEHUFF_H) */