switch default assembler to yasm. it will still fallback to nasm if you don't have yasm.
git-svn-id: svn://svn.videolan.org/x264/trunk@645
df754926-b1dd-0310-bc7b-
ec298dee348c
pw_1: times 8 dw 1
ssim_c1: times 4 dd 416 ; .01*.01*255*255*64
ssim_c2: times 4 dd 235963 ; .03*.03*255*255*64*63
pw_1: times 8 dw 1
ssim_c1: times 4 dd 416 ; .01*.01*255*255*64
ssim_c2: times 4 dd 235963 ; .03*.03*255*255*64*63
SECTION .text align=16
fakegot:
%else
SECTION .text align=16
fakegot:
%else
- SECTION .rodata data align=16
+ SECTION .rodata align=16
; This is needed for ELF, otherwise the GNU linker assumes the stack is
; executable by default.
%ifidn __OUTPUT_FORMAT__,elf
; This is needed for ELF, otherwise the GNU linker assumes the stack is
; executable by default.
%ifidn __OUTPUT_FORMAT__,elf
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
+SECTION ".note.GNU-stack" noalloc noexec nowrite progbits
-;;; two SUM4x4_SSE2 running side-by-side
-%macro SUM4x4_TWO_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum
+%macro SUM8x4_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum
pxor %3, %3
pxor %6, %6
psubw %3, %1
pxor %3, %3
pxor %6, %6
psubw %3, %1
+%macro SUM8x4_SSSE3 7 ; a02 a13 . b02 b13 . sum
+ pabsw %1, %1
+ pabsw %2, %2
+ pabsw %4, %4
+ pabsw %5, %5
+ paddusw %1, %2
+ paddusw %4, %5
+ paddusw %7, %1
+ paddusw %7, %4
+%endmacro
+
%macro SATD_TWO_SSE2 0
LOAD_DIFF_8P xmm0, xmm4, [eax], [ecx]
LOAD_DIFF_8P xmm1, xmm5, [eax+ebx], [ecx+edx]
%macro SATD_TWO_SSE2 0
LOAD_DIFF_8P xmm0, xmm4, [eax], [ecx]
LOAD_DIFF_8P xmm1, xmm5, [eax+ebx], [ecx+edx]
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
TRANSPOSE2x4x4W xmm0, xmm1, xmm2, xmm3, xmm4
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
TRANSPOSE2x4x4W xmm0, xmm1, xmm2, xmm3, xmm4
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
- SUM4x4_TWO_SSE2 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6
+ SUM8x4 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6
%endmacro
%macro SATD_START 0
%endmacro
%macro SATD_START 0
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_16x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_16x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
-cglobal x264_pixel_satd_16x16_sse2
+cglobal x264_pixel_satd_16x16_%1
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
mov eax, [esp+ 8]
mov ecx, [esp+16]
add eax, 8
add ecx, 8
mov eax, [esp+ 8]
mov ecx, [esp+16]
add eax, 8
add ecx, 8
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
-cglobal x264_pixel_satd_8x16_sse2
+cglobal x264_pixel_satd_8x16_%1
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_16x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_16x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
-cglobal x264_pixel_satd_16x8_sse2
+cglobal x264_pixel_satd_16x8_%1
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
mov eax, [esp+ 8]
mov ecx, [esp+16]
add eax, 8
add ecx, 8
mov eax, [esp+ 8]
mov ecx, [esp+16]
add eax, 8
add ecx, 8
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
-cglobal x264_pixel_satd_8x8_sse2
+cglobal x264_pixel_satd_8x8_%1
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x4_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x4_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
-cglobal x264_pixel_satd_8x4_sse2
+cglobal x264_pixel_satd_8x4_%1
+%endmacro ; SATDS
+
+%define SUM8x4 SUM8x4_SSE2
+SATDS sse2
+%ifdef HAVE_SSE3
+%define SUM8x4 SUM8x4_SSSE3
+SATDS ssse3
+%endif
if( cpu&X264_CPU_SSSE3 )
{
if( cpu&X264_CPU_SSSE3 )
{
-#if defined(ARCH_X86_64) && defined(HAVE_SSE3)
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_ssse3;
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_ssse3;
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_ssse3;
pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_ssse3;
pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_ssse3;
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_ssse3;
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_ssse3;
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_ssse3;
pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_ssse3;
pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_ssse3;
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
#endif
}
#endif //HAVE_MMX
#endif
}
#endif //HAVE_MMX
LDFLAGS="$LDFLAGS"
HAVE_GETOPT_LONG=1
LDFLAGS="$LDFLAGS"
HAVE_GETOPT_LONG=1
i*86)
ARCH="X86"
CFLAGS="$CFLAGS -DHAVE_MMX"
i*86)
ARCH="X86"
CFLAGS="$CFLAGS -DHAVE_MMX"
ASFLAGS="-O2"
if [ "$SYS" = MACOSX ]; then
ASFLAGS="$ASFLAGS -f macho -DPREFIX"
ASFLAGS="-O2"
if [ "$SYS" = MACOSX ]; then
ASFLAGS="$ASFLAGS -f macho -DPREFIX"
else
ASFLAGS="$ASFLAGS -f elf"
fi
else
ASFLAGS="$ASFLAGS -f elf"
fi
-if [ $ARCH = X86_64 ] ; then
+if [ $ARCH = X86 -o $ARCH = X86_64 ] ; then
if ! as_check ; then
echo "No assembler. Please install yasm."
exit 1
if ! as_check ; then
echo "No assembler. Please install yasm."
exit 1