#endif
-#else
+#else // defined(USE_FOLDED_BITSCAN)
static const int BitTable[64] = {
0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40, 5, 17, 26, 38, 15,
}
#define POPCNT_INTRINSIC(x) __popcnt64(x)
+#define BITSCAN_INTRINSIC(idx, x) _BitScanForward64(idx, x)
#elif defined(__INTEL_COMPILER) && (defined(__x86_64) || defined(_M_X64)) // Intel compiler
}
#define POPCNT_INTRINSIC(x) _mm_popcnt_u64(x)
+#define BITSCAN_INTRINSIC(idx, x) _BitScanForward64(idx, x)
#else // Safe fallback for unsupported compilers
inline bool cpu_has_popcnt() { return false; }
#define POPCNT_INTRINSIC(x) sw_count_1s(x)
+#define BITSCAN_INTRINSIC(idx, x) sw_count_1s(x) // dummy
#endif
const bool CpuHas64BitPath = false;
#endif
+
+/// pop_1st_bit() finds and clears the least significant nonzero bit in a
+/// nonzero bitboard. If template parameter is true an intrinsic is called,
+/// otherwise we fallback on a software implementation.
+
+template<bool UseIntrinsic>
+inline Square pop_1st_bit(Bitboard *b) {
+
+ return pop_1st_bit(b);
+}
+
+template<>
+inline Square pop_1st_bit<true>(Bitboard *b) {
+
+ unsigned long idx;
+ Bitboard bb = *b;
+ BITSCAN_INTRINSIC(&idx, bb);
+ *b &= (bb - 1);
+ return Square(idx);
+}
+
#endif // !defined(BITCOUNT_H_INCLUDED)
// Simple macro to wrap a very common while loop, no facny, no flexibility,
// hardcoded list name 'mlist' and from square 'from'.
-#define SERIALIZE_MOVES(b) while (b) (*mlist++).move = make_move(from, pop_1st_bit(&b))
+#define SERIALIZE_MOVES(b) while (b) (*mlist++).move = make_move(from, pop_1st_bit<false>(&b))
////
//// Local definitions