From: Steinar H. Gunderson Date: Sun, 31 May 2009 23:54:31 +0000 (+0200) Subject: Microoptimization: Use a form of byteswapping that gcc realizes that is auto-zero... X-Git-Url: https://git.sesse.net/?p=fjl;a=commitdiff_plain;h=51c6a589f88aea02b49673ec40745d3a753976cd;hp=0932a33d5fa817a07778f85c05681ed77131bac6 Microoptimization: Use a form of byteswapping that gcc realizes that is auto-zero-extending. --- diff --git a/bitsource.h b/bitsource.h index 60744b3..f2f164d 100644 --- a/bitsource.h +++ b/bitsource.h @@ -15,9 +15,16 @@ typedef uint64_t bitreservoir_t; typedef uint32_t bitreservoir_fill_t; -static inline bitreservoir_fill_t read_bitreservoir_fill(uint8_t* source) +// Note: We return bitreservoir_t here, so we can get implicit zero extension on amd64. +static inline bitreservoir_t read_bitreservoir_fill(uint8_t* source) { +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + bitreservoir_t ret; + asm("bswapl %1" : "=r" (ret) : "0" (*(bitreservoir_fill_t*)(source))); + return ret; +#else return ntohl(*(bitreservoir_fill_t*)(source)); +#endif } static const unsigned BITRESERVOIR_SIZE = 8 * sizeof(bitreservoir_t); @@ -71,7 +78,7 @@ static inline void possibly_refill(struct bit_source* source, unsigned num_bits) // Slower path (~99% of remaining invocations?) assert(source->bits_available + BITRESERVOIR_FILL_SIZE < BITRESERVOIR_SIZE); if (source->bytes_available >= sizeof(bitreservoir_fill_t)) { - bitreservoir_fill_t fill = read_bitreservoir_fill(source->byte_read_ptr); + bitreservoir_t fill = read_bitreservoir_fill(source->byte_read_ptr); source->byte_read_ptr += sizeof(bitreservoir_fill_t); source->bytes_available -= sizeof(bitreservoir_fill_t); source->bits |= (bitreservoir_t)fill << (BITRESERVOIR_SIZE - BITRESERVOIR_FILL_SIZE - source->bits_available);