Add an x86 optimized version of extend().
authorSteinar H. Gunderson <sesse@debian.org>
Mon, 1 Jun 2009 00:37:26 +0000 (02:37 +0200)
committerSteinar H. Gunderson <sesse@debian.org>
Mon, 1 Jun 2009 00:37:26 +0000 (02:37 +0200)
dehuff.h

index f29b815..7621f60 100644 (file)
--- a/dehuff.h
+++ b/dehuff.h
@@ -83,11 +83,23 @@ static const int bit_thresholds[16] = {
 
 static inline unsigned extend(int val, unsigned bits)
 {
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+       // GCC should ideally be able to figure out that the conditional move is better, but
+       // it doesn't for various reasons, and this is pretty important for speed, so we hardcode.
+       asm("cmp %2, %0 ; cmovl %3, %0"
+               : "=r" (val)
+               : "0" (val),
+                 "g" (bit_thresholds[bits]),
+                 "r" (val + (-1 << bits) + 1)
+               : "cc");
+       return val;
+#else
        if (val < bit_thresholds[bits]) {
                return val + (-1 << bits) + 1;
        } else {
                return val;
        }
+#endif
 }
 
 #endif /* !defined(_DEHUFF_H) */