#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
// GCC should ideally be able to figure out that the conditional move is better, but
// it doesn't for various reasons, and this is pretty important for speed, so we hardcode.
- asm("cmp %2, %0 ; cmovl %3, %0"
- : "=r" (val)
- : "0" (val),
- "g" (bit_thresholds[bits]),
+ asm("cmp %1, %0 ; cmovl %2, %0"
+ : "+r" (val)
+ : "g" (bit_thresholds[bits]),
"r" (val + (-1 << bits) + 1)
: "cc");
return val;