#define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS)
static const int DEHUF_SLOW_PATH = -1;
+// About 98% of all AC coefficients (control byte + coefficient) are <= 10 bits
+// long; again, see codelen.txt. This will cost us about 6 kB of data to store
+// in L1 cache.
+#define DEHUF_AC_TABLE_BITS 10
+#define DEHUF_AC_TABLE_SIZE (1 << DEHUF_AC_TABLE_BITS)
+static const int AC_DEHUF_SLOW_PATH = 0xf0000000;
+static const int AC_END_OF_BLOCK = 0xf0000001;
+static const int AC_SIXTEEN_ZEROS = 0xf0000002;
+
struct huffman_table {
unsigned num_codes[17]; // BITS
unsigned char codes[256]; // HUFFVAL
// the lookup tables is int to avoid extra zero extending.
int lookup_table_codes[DEHUF_TABLE_SIZE];
int lookup_table_length[DEHUF_TABLE_SIZE];
+
+ // Further lookup tables for decoding AC coefficients.
+ // (Generated but obviously not used for DC coefficients.)
+ // Maps from 10-bit lookahead values to the signed coeffient (_codes),
+ // number of bits to skip (_length) and the number of zero coefficients
+ // after this one (_skip).
+ int ac_table_codes[DEHUF_AC_TABLE_SIZE];
+ uint8_t ac_table_length[DEHUF_AC_TABLE_SIZE];
+ uint8_t ac_table_skip[DEHUF_AC_TABLE_SIZE];
};
enum coefficient_class {
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
// GCC should ideally be able to figure out that the conditional move is better, but
// it doesn't for various reasons, and this is pretty important for speed, so we hardcode.
- asm("cmp %2, %0 ; cmovl %3, %0"
- : "=r" (val)
- : "0" (val),
- "g" (bit_thresholds[bits]),
+ asm("cmp %1, %0 ; cmovl %2, %0"
+ : "+r" (val)
+ : "g" (bit_thresholds[bits]),
"r" (val + (-1 << bits) + 1)
: "cc");
return val;