From: Henrik Gramner Date: Sat, 23 May 2015 17:44:16 +0000 (+0200) Subject: x86: Experimental nasm support X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=b568a256b9bc6c500d7b1ffe4b9c3311ee5ff337;p=x264 x86: Experimental nasm support Enables the use of nasm as an alternative to yasm. Note that nasm cannot assemble x264 with PIC enabled since it currently doesn't support [symbol-$$] addressing which is used extensively by x264's PIC code. This includes all 64-bit Windows and 64-bit OS X builds, even non-shared. For the above reason nasm is currently intentionally not auto-detected, instead the assembler must be explicitly specified using "AS=nasm ./configure". Also drop -O2 from ASFLAGS since it's simply ignored anyway. --- diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm index 9c5193d4..c0fa7230 100644 --- a/common/x86/x86inc.asm +++ b/common/x86/x86inc.asm @@ -64,6 +64,15 @@ %endif %endif +%define FORMAT_ELF 0 +%ifidn __OUTPUT_FORMAT__,elf + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf32 + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf64 + %define FORMAT_ELF 1 +%endif + %ifdef PREFIX %define mangle(x) _ %+ x %else @@ -86,6 +95,10 @@ default rel %endif +%ifdef __NASM_VER__ + %use smartalign +%endif + ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments ; into registers at the start, and make no other use of the stack. Luckily that @@ -671,7 +684,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, CAT_XDEFINE cglobaled_, %2, 1 %endif %xdefine current_function %2 - %ifidn __OUTPUT_FORMAT__,elf + %if FORMAT_ELF global %2:function %%VISIBILITY %else global %2 @@ -697,14 +710,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, ; like cextern, but without the prefix %macro cextern_naked 1 - %xdefine %1 mangle(%1) + %ifdef PREFIX + %xdefine %1 mangle(%1) + %endif CAT_XDEFINE cglobaled_, %1, 1 extern %1 %endmacro %macro const 1-2+ %xdefine %1 mangle(private_prefix %+ _ %+ %1) - %ifidn __OUTPUT_FORMAT__,elf + %if FORMAT_ELF global %1:data hidden %else global %1 @@ -712,9 +727,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %1: %2 %endmacro -; This is needed for ELF, otherwise the GNU linker assumes the stack is -; executable by default. -%ifidn __OUTPUT_FORMAT__,elf +; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default. +%if FORMAT_ELF [SECTION .note.GNU-stack noalloc noexec nowrite progbits] %endif @@ -785,9 +799,17 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %endif %if ARCH_X86_64 || cpuflag(sse2) - CPU amdnop + %ifdef __NASM_VER__ + ALIGNMODE k8 + %else + CPU amdnop + %endif %else - CPU basicnop + %ifdef __NASM_VER__ + ALIGNMODE nop + %else + CPU basicnop + %endif %endif %endmacro @@ -1467,12 +1489,14 @@ FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss ; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) -%if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 - %macro vpbroadcastq 2 - %if sizeof%1 == 16 - movddup %1, %2 - %else - vbroadcastsd %1, %2 - %endif - %endmacro +%ifdef __YASM_VER__ + %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 + %macro vpbroadcastq 2 + %if sizeof%1 == 16 + movddup %1, %2 + %else + vbroadcastsd %1, %2 + %endif + %endmacro + %endif %endif diff --git a/configure b/configure index db116598..2c6cfe86 100755 --- a/configure +++ b/configure @@ -649,9 +649,9 @@ stack_alignment=16 case $host_cpu in i*86) ARCH="X86" - AS="yasm" + AS="${AS-yasm}" AS_EXT=".asm" - ASFLAGS="$ASFLAGS -O2 -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/" + ASFLAGS="$ASFLAGS -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/" if [ $compiler = GNU ]; then if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then CFLAGS="$CFLAGS -march=i686" @@ -678,36 +678,36 @@ case $host_cpu in stack_alignment=4 fi if [ "$SYS" = MACOSX ]; then - ASFLAGS="$ASFLAGS -f macho -DPREFIX" + ASFLAGS="$ASFLAGS -f macho32 -DPREFIX" elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then ASFLAGS="$ASFLAGS -f win32 -DPREFIX" LDFLAGS="$LDFLAGS -Wl,--large-address-aware" [ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase" [ $compiler = GNU ] && RCFLAGS="--target=pe-i386 $RCFLAGS" else - ASFLAGS="$ASFLAGS -f elf" + ASFLAGS="$ASFLAGS -f elf32" fi ;; x86_64) ARCH="X86_64" - AS="yasm" + AS="${AS-yasm}" AS_EXT=".asm" ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/" [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS" if [ "$SYS" = MACOSX ]; then - ASFLAGS="$ASFLAGS -f macho64 -m amd64 -DPIC -DPREFIX" + ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX" if cc_check '' "-arch x86_64"; then CFLAGS="$CFLAGS -arch x86_64" LDFLAGS="$LDFLAGS -arch x86_64" fi elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then - ASFLAGS="$ASFLAGS -f win32 -m amd64" + ASFLAGS="$ASFLAGS -f win64" # only the GNU toolchain is inconsistent in prefixing function names with _ [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX" [ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase" [ $compiler = GNU ] && RCFLAGS="--target=pe-x86-64 $RCFLAGS" else - ASFLAGS="$ASFLAGS -f elf -m amd64" + ASFLAGS="$ASFLAGS -f elf64" fi ;; powerpc|powerpc64) diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm index fbe1291c..51f7ab51 100644 --- a/tools/checkasm-a.asm +++ b/tools/checkasm-a.asm @@ -33,24 +33,24 @@ error_message: db "failed to preserve register", 0 %if ARCH_X86_64 ; just random numbers to reduce the chance of incidental match ALIGN 16 -x6: ddq 0x79445c159ce790641a1b2550a612b48c -x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd -x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943 -x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2 -x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9 -x11: ddq 0x77d410d5c42c882d89b0c0765892729a -x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5 -x13: ddq 0xdd7b8919edd427862e8ec680de14b47c -x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf -x15: ddq 0x6de8f4c914c334d5011ff554472a7a10 -n7: dq 0x21f86d66c8ca00ce -n8: dq 0x75b6ba21077c48ad -n9: dq 0xed56bb2dcb3c7736 -n10: dq 0x8bda43d3fd1a7e06 -n11: dq 0xb64a9c9e5d318408 -n12: dq 0xdf9a54b303f1d3a3 -n13: dq 0x4a75479abd64e097 -n14: dq 0x249214109d5d1c88 +x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 +x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 +x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e +x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f +x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 +x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d +x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b +x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 +x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef +x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 +n7: dq 0x21f86d66c8ca00ce +n8: dq 0x75b6ba21077c48ad +n9: dq 0xed56bb2dcb3c7736 +n10: dq 0x8bda43d3fd1a7e06 +n11: dq 0xb64a9c9e5d318408 +n12: dq 0xdf9a54b303f1d3a3 +n13: dq 0x4a75479abd64e097 +n14: dq 0x249214109d5d1c88 %endif SECTION .text