%define public_prefix private_prefix
%endif
+%ifndef STACK_ALIGNMENT
+ %if ARCH_X86_64
+ %define STACK_ALIGNMENT 16
+ %else
+ %define STACK_ALIGNMENT 4
+ %endif
+%endif
+
%define WIN64 0
%define UNIX64 0
%if ARCH_X86_64
; %1 = number of arguments. loads them from stack if needed.
; %2 = number of registers used. pushes callee-saved regs if needed.
; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
-; MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+; allocating the specified stack size. If the required stack alignment is
+; larger than the known stack alignment the stack will be manually aligned
; and an extra register will be allocated to hold the original stack
; pointer (to not invalidate r0m etc.). To prevent the use of an extra
; register as stack pointer, request a negative stack size.
; PROLOGUE can also be invoked by adding the same options to cglobal
; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
; TODO Some functions can use some args directly from the stack. If they're the
; last args then you can just not declare them, but if they're in the middle
%assign n_arg_names %0
%endmacro
+%define required_stack_alignment ((mmsize + 15) & ~15)
+
%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
%ifnum %1
%if %1 != 0
- %assign %%stack_alignment ((mmsize + 15) & ~15)
+ %assign %%pad 0
%assign stack_size %1
%if stack_size < 0
%assign stack_size -stack_size
%endif
- %assign stack_size_padded stack_size
%if WIN64
- %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
+ %assign %%pad %%pad + 32 ; shadow space
%if mmsize != 8
%assign xmm_regs_used %2
%if xmm_regs_used > 8
- %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
+ %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
%endif
%endif
%endif
- %if mmsize <= 16 && HAVE_ALIGNED_STACK
- %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
+ %if required_stack_alignment <= STACK_ALIGNMENT
+ ; maintain the current stack alignment
+ %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
SUB rsp, stack_size_padded
%else
%assign %%reg_num (regs_used - 1)
; it, i.e. in [rsp+stack_size_padded], so we can restore the
; stack in a single instruction (i.e. mov rsp, rstk or mov
; rsp, [rsp+stack_size_padded])
- mov rstk, rsp
%if %1 < 0 ; need to store rsp on stack
- sub rsp, gprsize+stack_size_padded
- and rsp, ~(%%stack_alignment-1)
- %xdefine rstkm [rsp+stack_size_padded]
- mov rstkm, rstk
+ %xdefine rstkm [rsp + stack_size + %%pad]
+ %assign %%pad %%pad + gprsize
%else ; can keep rsp in rstk during whole function
- sub rsp, stack_size_padded
- and rsp, ~(%%stack_alignment-1)
%xdefine rstkm rstk
%endif
+ %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+ mov rstk, rsp
+ and rsp, ~(required_stack_alignment-1)
+ sub rsp, stack_size_padded
+ movifnidn rstkm, rstk
%endif
WIN64_PUSH_XMM
%endif
%macro SETUP_STACK_POINTER 1
%ifnum %1
- %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
+ %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
%if %1 > 0
%assign regs_used (regs_used + 1)
%elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
%assign xmm_regs_used %1
ASSERT xmm_regs_used <= 16
%if xmm_regs_used > 8
- %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
+ ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+ %assign %%pad (xmm_regs_used-8)*16 + 32
+ %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
SUB rsp, stack_size_padded
%endif
WIN64_PUSH_XMM
%endrep
%endif
%if stack_size_padded > 0
- %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
+ %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
mov rsp, rstkm
%else
add %1, stack_size_padded
%macro RET 0
%if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
mov rsp, rstkm
%else
add rsp, stack_size_padded
%macro RET 0
%if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
mov rsp, rstkm
%else
add rsp, stack_size_padded
LDFLAGS="$LDFLAGS $libm"
-aligned_stack=1
+stack_alignment=16
case $host_cpu in
i*86)
ARCH="X86"
if [ $SYS = LINUX ]; then
# < 11 is completely incapable of keeping a mod16 stack
if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
- define BROKEN_STACK_ALIGNMENT
- aligned_stack=0
+ stack_alignment=4
# 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
# >= 12 defaults to a mod16 stack
fi
# icl on windows has no mod16 stack support
- [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0
+ [ $SYS = WINDOWS ] && stack_alignment=4
fi
if [ "$SYS" = MACOSX ]; then
ASFLAGS="$ASFLAGS -f macho -DPREFIX"
ARCH="$(echo $host_cpu | tr a-z A-Z)"
;;
esac
-ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}"
if [ $SYS = WINDOWS ]; then
if ! rc_check "0 RCDATA {0}" ; then
fi
ASFLAGS="$ASFLAGS -Worphan-labels"
define HAVE_MMX
- if cc_check '' -mpreferred-stack-boundary=5 ; then
+ if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then
CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
- define HAVE_32B_STACK_ALIGNMENT
+ stack_alignment=32
fi
fi
define ARCH_$ARCH
define SYS_$SYS
+define STACK_ALIGNMENT $stack_alignment
+ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
+
# skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
if [ $compiler = GNU ]; then
echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c