X-Git-Url: https://git.sesse.net/?p=fjl;a=blobdiff_plain;f=dehuff.h;h=0f86c3b9c4f4e6703e13751dbb9d5306267ed264;hp=7621f6095e99791feb6138e81c2574ae9dda6bce;hb=bbe665e5ef6a103657f4d921ddd4b540f38daf3d;hpb=a4009687c73083dd0290285a065740a83e27e855

diff --git a/dehuff.h b/dehuff.h
index 7621f60..0f86c3b 100644
--- a/dehuff.h
+++ b/dehuff.h
@@ -14,6 +14,15 @@
 #define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS)
 static const int DEHUF_SLOW_PATH = -1;
 
+// About 98% of all AC coefficients (control byte + coefficient) are <= 10 bits
+// long; again, see codelen.txt. This will cost us about 6 kB of data to store
+// in L1 cache.
+#define DEHUF_AC_TABLE_BITS 10
+#define DEHUF_AC_TABLE_SIZE (1 << DEHUF_AC_TABLE_BITS)
+static const int AC_DEHUF_SLOW_PATH = 0xf0000000;
+static const int AC_END_OF_BLOCK = 0xf0000001;
+static const int AC_SIXTEEN_ZEROS = 0xf0000002;
+
 struct huffman_table {
 	unsigned num_codes[17];     // BITS
 	unsigned char codes[256];   // HUFFVAL
@@ -33,6 +42,15 @@ struct huffman_table {
 	// the lookup tables is int to avoid extra zero extending. 
 	int lookup_table_codes[DEHUF_TABLE_SIZE]; 
 	int lookup_table_length[DEHUF_TABLE_SIZE]; 
+	
+	// Further lookup tables for decoding AC coefficients.
+	// (Generated but obviously not used for DC coefficients.)
+	// Maps from 10-bit lookahead values to the signed coeffient (_codes),
+	// number of bits to skip (_length) and the number of zero coefficients
+	// after this one (_skip).
+	int ac_table_codes[DEHUF_AC_TABLE_SIZE]; 
+	uint8_t ac_table_length[DEHUF_AC_TABLE_SIZE]; 
+	uint8_t ac_table_skip[DEHUF_AC_TABLE_SIZE]; 
 };
 
 enum coefficient_class {
@@ -86,10 +104,9 @@ static inline unsigned extend(int val, unsigned bits)
 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 	// GCC should ideally be able to figure out that the conditional move is better, but
 	// it doesn't for various reasons, and this is pretty important for speed, so we hardcode.
-	asm("cmp %2, %0 ; cmovl %3, %0"
-		: "=r" (val)
-		: "0" (val),
-		  "g" (bit_thresholds[bits]),
+	asm("cmp %1, %0 ; cmovl %2, %0"
+		: "+r" (val)
+		: "g" (bit_thresholds[bits]),
  		  "r" (val + (-1 << bits) + 1)
 		: "cc");
 	return val;