%endif
%endif
+%define FORMAT_ELF 0
+%ifidn __OUTPUT_FORMAT__,elf
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf32
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf64
+ %define FORMAT_ELF 1
+%endif
+
%ifdef PREFIX
%define mangle(x) _ %+ x
%else
SECTION .rodata align=%1
%endmacro
-%macro SECTION_TEXT 0-1 16
- SECTION .text align=%1
-%endmacro
-
%if WIN64
%define PIC
%elif ARCH_X86_64 == 0
default rel
%endif
+%ifdef __NASM_VER__
+ %use smartalign
+%endif
+
; Macros to eliminate most code duplication between x86_32 and x86_64:
; Currently this works only for leaf functions which load all their arguments
; into registers at the start, and make no other use of the stack. Luckily that
CAT_XDEFINE cglobaled_, %2, 1
%endif
%xdefine current_function %2
- %ifidn __OUTPUT_FORMAT__,elf
+ %if FORMAT_ELF
global %2:function %%VISIBILITY
%else
global %2
; like cextern, but without the prefix
%macro cextern_naked 1
- %xdefine %1 mangle(%1)
+ %ifdef PREFIX
+ %xdefine %1 mangle(%1)
+ %endif
CAT_XDEFINE cglobaled_, %1, 1
extern %1
%endmacro
%macro const 1-2+
%xdefine %1 mangle(private_prefix %+ _ %+ %1)
- %ifidn __OUTPUT_FORMAT__,elf
+ %if FORMAT_ELF
global %1:data hidden
%else
global %1
%1: %2
%endmacro
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
+; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
+%if FORMAT_ELF
+ [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
%endif
; cpuflags
%endif
%if ARCH_X86_64 || cpuflag(sse2)
- CPU amdnop
+ %ifdef __NASM_VER__
+ ALIGNMODE k8
+ %else
+ CPU amdnop
+ %endif
%else
- CPU basicnop
+ %ifdef __NASM_VER__
+ ALIGNMODE nop
+ %else
+ CPU basicnop
+ %endif
%endif
%endmacro
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nymm, %%i, %%i
+ CAT_XDEFINE nnymm, %%i, %%i
%assign %%i %%i+1
%endrep
INIT_CPUFLAGS %1
%ifdef cpuname
%if notcpuflag(%2)
%error use of ``%1'' %2 instruction in cpuname function: current_function
+ %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8
+ %error use of ``%1'' sse2 instruction in cpuname function: current_function
%endif
%endif
%endif
AVX_INSTR minss, sse, 1, 0, 1
AVX_INSTR movapd, sse2
AVX_INSTR movaps, sse
-AVX_INSTR movd
+AVX_INSTR movd, mmx
AVX_INSTR movddup, sse3
AVX_INSTR movdqa, sse2
AVX_INSTR movdqu, sse2
AVX_INSTR movntdqa, sse4
AVX_INSTR movntpd, sse2
AVX_INSTR movntps, sse
-AVX_INSTR movq
+AVX_INSTR movq, mmx
AVX_INSTR movsd, sse2, 1, 0, 0
AVX_INSTR movshdup, sse3
AVX_INSTR movsldup, sse3
FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd
FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss
-; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug
-%if ARCH_X86_64 == 0
-%macro vpbroadcastq 2
-%if sizeof%1 == 16
- movddup %1, %2
-%else
- vbroadcastsd %1, %2
-%endif
-%endmacro
+; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
+%ifdef __YASM_VER__
+ %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
+ %macro vpbroadcastq 2
+ %if sizeof%1 == 16
+ movddup %1, %2
+ %else
+ vbroadcastsd %1, %2
+ %endif
+ %endmacro
+ %endif
%endif