%endif
%endmacro
-; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPUNOP amdnop
-
; Macros to eliminate most code duplication between x86_32 and x86_64:
; Currently this works only for leaf functions which load all their arguments
; into registers at the start, and make no other use of the stack. Luckily that
%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
-; Takes up to 2 cpuflags from the above list.
+; Takes an arbitrary number of cpuflags from the above list.
; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
-%macro INIT_CPUFLAGS 0-2
- CPUNOP amdnop
+%macro INIT_CPUFLAGS 0-*
+ %xdefine SUFFIX
+ %undef cpuname
+ %assign cpuflags 0
+
%if %0 >= 1
- %xdefine cpuname %1
- %assign cpuflags cpuflags_%1
- %if %0 >= 2
- %xdefine cpuname %1_%2
- %assign cpuflags cpuflags | cpuflags_%2
- %endif
+ %rep %0
+ %ifdef cpuname
+ %xdefine cpuname cpuname %+ _%1
+ %else
+ %xdefine cpuname %1
+ %endif
+ %assign cpuflags cpuflags | cpuflags_%1
+ %rotate 1
+ %endrep
%xdefine SUFFIX _ %+ cpuname
+
%if cpuflag(avx)
%assign avx_enabled 1
%endif
%endif
%if cpuflag(aligned)
%define movu mova
- %elifidn %1, sse3
+ %elif cpuflag(sse3) && notcpuflag(ssse3)
%define movu lddqu
%endif
- %if notcpuflag(sse2)
- CPUNOP basicnop
- %endif
+ %endif
+
+ %if cpuflag(sse2)
+ CPUNOP amdnop
%else
- %xdefine SUFFIX
- %undef cpuname
- %undef cpuflags
+ CPUNOP basicnop
%endif
%endmacro
INIT_CPUFLAGS %1
%endmacro
+; FIXME: INIT_AVX can be replaced by INIT_XMM avx
+%macro INIT_AVX 0
+ INIT_XMM
+ %assign avx_enabled 1
+ %define PALIGNR PALIGNR_SSSE3
+ %define RESET_MM_PERMUTATION INIT_AVX
+%endmacro
+
%macro INIT_YMM 0-1+
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_YMM %1
%undef i
%undef j
-%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
- %else
- %6 %1, %2, %3
- %7 %1, %4
- %endif
- %endmacro
-%endmacro
-
-FMA_INSTR pmacsdd, pmulld, paddd
-FMA_INSTR pmacsww, pmullw, paddw
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
; This lets us use tzcnt without bumping the yasm version requirement yet.
%define tzcnt rep bsf