On 64 bit systems we can use bsfq instruction to count
set bits in a bitboard.
This is a patch for GCC and Intel compilers to take advantage
of that and get a 2% speed up.
Original patch from Heinz van Saanen, adapted to current tree
by me.
No functional change.
Signed-off-by: Marco Costalba <mcostalba@gmail.com>
21, 22, 22, 22, 22, 22, 22, 21, 20, 21, 21, 21, 21, 21, 21, 20
};
21, 22, 22, 22, 22, 22, 22, 21, 20, 21, 21, 21, 21, 21, 21, 20
};
+#endif // defined(IS_64BIT)
/// pop_1st_bit() finds and clears the least significant nonzero bit in a
/// nonzero bitboard.
/// pop_1st_bit() finds and clears the least significant nonzero bit in a
/// nonzero bitboard.
+#if defined(IS_64BIT) && !defined(USE_BSFQ)
-Square pop_1st_bit(Bitboard *b) {
+Square pop_1st_bit(Bitboard* b) {
Bitboard bb = *b ^ (*b - 1);
uint32_t fold = int(bb) ^ int(bb >> 32);
*b &= (*b - 1);
return Square(BitTable[(fold * 0x783a9b23) >> 26]);
}
Bitboard bb = *b ^ (*b - 1);
uint32_t fold = int(bb) ^ int(bb >> 32);
*b &= (*b - 1);
return Square(BitTable[(fold * 0x783a9b23) >> 26]);
}
+#elif !defined(USE_BSFQ)
// Use type-punning
union b_union {
// Use type-punning
union b_union {
};
// WARNING: Needs -fno-strict-aliasing compiler option
};
// WARNING: Needs -fno-strict-aliasing compiler option
-Square pop_1st_bit(Bitboard *bb) {
+Square pop_1st_bit(Bitboard* bb) {
+#if defined(IS_64BIT) && (defined(__GNUC__) || defined(__INTEL_COMPILER))
+#define USE_BSFQ
+#endif
+
+
/// first_1() finds the least significant nonzero bit in a nonzero bitboard.
/// first_1() finds the least significant nonzero bit in a nonzero bitboard.
+/// pop_1st_bit() finds and clears the least significant nonzero bit in a
+/// nonzero bitboard.
+#if defined(USE_BSFQ) // Assembly code by Heinz van Saanen
-inline Square first_1(Bitboard b) {
- return Square(BitTable[((b & -b) * 0x218a392cd3d5dbfULL) >> 58]);
+inline Square __attribute__((always_inline)) first_1(Bitboard b) {
+ Bitboard dummy;
+ __asm__("bsfq %1, %0": "=r"(dummy): "rm"(b) );
+ return (Square)(dummy);
+}
+
+inline Square __attribute__((always_inline)) pop_1st_bit(Bitboard* b) {
+ const Square s = first_1(*b);
+ *b &= ~(1ULL<<s);
+ return s;
+#else // if !defined(USE_BSFQ)
inline Square first_1(Bitboard b) {
b ^= (b - 1);
inline Square first_1(Bitboard b) {
b ^= (b - 1);
return Square(BitTable[(fold * 0x783a9b23) >> 26]);
}
return Square(BitTable[(fold * 0x783a9b23) >> 26]);
}
+extern Square pop_1st_bit(Bitboard* b);
+
extern void print_bitboard(Bitboard b);
extern void init_bitboards();
extern void print_bitboard(Bitboard b);
extern void init_bitboards();
-extern Square pop_1st_bit(Bitboard *b);
#endif // !defined(BITBOARD_H_INCLUDED)
#endif // !defined(BITBOARD_H_INCLUDED)