void x264_cpu_mask_misalign_sse( void );
void x264_safe_intel_cpu_indicator_init( void );
-/* kluge:
+/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
- * We need 16 byte alignment for SSE2, so here we make sure that the stack is
- * aligned to 16 bytes.
+ * We need 32 byte alignment for AVX2, so here we make sure that the stack is
+ * aligned to 32 bytes.
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
- * This applies only to x86_32, since other architectures that need alignment
- * either have ABIs that ensure aligned stack, or don't support it at all. */
-#if ARCH_X86 && HAVE_MMX
+ * aligning to 32 bytes only works if the compiler supports keeping that
+ * alignment between functions (osdep.h handles manual alignment of arrays
+ * if it doesn't).
+ */
+#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
int x264_stack_align( void (*func)(), ... );
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
#define EXPAND(x) x
+#if HAVE_32B_STACK_ALIGNMENT
+#define ALIGNED_ARRAY_32( type, name, sub1, ... )\
+ ALIGNED_32( type name sub1 __VA_ARGS__ )
+#else
#define ALIGNED_ARRAY_32( ... ) EXPAND( ALIGNED_ARRAY_EMU( 31, __VA_ARGS__ ) )
+#endif
+
#define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) )
/* For AVX2 */
mov [r4], edx
RET
-%if ARCH_X86_64 == 0
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; void stack_align( void (*func)(void*), void *arg );
+;-----------------------------------------------------------------------------
+cglobal stack_align
+ push rbp
+ mov rbp, rsp
+%if WIN64
+ sub rsp, 32 ; shadow space
+%endif
+ and rsp, ~31
+ mov rax, r0
+ mov r0, r1
+ mov r1, r2
+ mov r2, r3
+ call rax
+ leave
+ ret
+
+%else
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
popfd
ret
-;-----------------------------------------------------------------------------
-; void stack_align( void (*func)(void*), void *arg );
-;-----------------------------------------------------------------------------
cglobal stack_align
push ebp
mov ebp, esp
sub esp, 12
- and esp, ~15
+ and esp, ~31
mov ecx, [ebp+8]
mov edx, [ebp+12]
mov [esp], edx
%if WIN64
sub rsp, 32 ; shadow space
%endif
- and rsp, ~15
+ and rsp, ~31
call intel_cpu_indicator_init
leave
%if ARCH_X86_64
exit 1
fi
define HAVE_MMX
+ if cc_check '' -mpreferred-stack-boundary=5 ; then
+ CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
+ define HAVE_32B_STACK_ALIGNMENT
+ fi
fi
if [ $asm = auto -a $ARCH = ARM ] ; then