X-Git-Url: https://git.sesse.net/?p=fjl;a=blobdiff_plain;f=dehuff.h;h=0f86c3b9c4f4e6703e13751dbb9d5306267ed264;hp=7621f6095e99791feb6138e81c2574ae9dda6bce;hb=bbe665e5ef6a103657f4d921ddd4b540f38daf3d;hpb=a4009687c73083dd0290285a065740a83e27e855 diff --git a/dehuff.h b/dehuff.h index 7621f60..0f86c3b 100644 --- a/dehuff.h +++ b/dehuff.h @@ -14,6 +14,15 @@ #define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS) static const int DEHUF_SLOW_PATH = -1; +// About 98% of all AC coefficients (control byte + coefficient) are <= 10 bits +// long; again, see codelen.txt. This will cost us about 6 kB of data to store +// in L1 cache. +#define DEHUF_AC_TABLE_BITS 10 +#define DEHUF_AC_TABLE_SIZE (1 << DEHUF_AC_TABLE_BITS) +static const int AC_DEHUF_SLOW_PATH = 0xf0000000; +static const int AC_END_OF_BLOCK = 0xf0000001; +static const int AC_SIXTEEN_ZEROS = 0xf0000002; + struct huffman_table { unsigned num_codes[17]; // BITS unsigned char codes[256]; // HUFFVAL @@ -33,6 +42,15 @@ struct huffman_table { // the lookup tables is int to avoid extra zero extending. int lookup_table_codes[DEHUF_TABLE_SIZE]; int lookup_table_length[DEHUF_TABLE_SIZE]; + + // Further lookup tables for decoding AC coefficients. + // (Generated but obviously not used for DC coefficients.) + // Maps from 10-bit lookahead values to the signed coeffient (_codes), + // number of bits to skip (_length) and the number of zero coefficients + // after this one (_skip). + int ac_table_codes[DEHUF_AC_TABLE_SIZE]; + uint8_t ac_table_length[DEHUF_AC_TABLE_SIZE]; + uint8_t ac_table_skip[DEHUF_AC_TABLE_SIZE]; }; enum coefficient_class { @@ -86,10 +104,9 @@ static inline unsigned extend(int val, unsigned bits) #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) // GCC should ideally be able to figure out that the conditional move is better, but // it doesn't for various reasons, and this is pretty important for speed, so we hardcode. - asm("cmp %2, %0 ; cmovl %3, %0" - : "=r" (val) - : "0" (val), - "g" (bit_thresholds[bits]), + asm("cmp %1, %0 ; cmovl %2, %0" + : "+r" (val) + : "g" (bit_thresholds[bits]), "r" (val + (-1 << bits) + 1) : "cc"); return val;