From 84676d2eba9fc18d62e168b60d7d1118d1c232d3 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Thu, 5 Apr 2007 16:11:03 +0000 Subject: [PATCH] 32bit version of ssse3 satd. switch default assembler to yasm. it will still fallback to nasm if you don't have yasm. git-svn-id: svn://svn.videolan.org/x264/trunk@645 df754926-b1dd-0310-bc7b-ec298dee348c --- common/amd64/pixel-sse2.asm | 1 - common/i386/i386inc.asm | 4 +-- common/i386/pixel-sse2.asm | 49 ++++++++++++++++++++----------------- common/pixel.c | 4 ++- configure | 7 +++--- 5 files changed, 36 insertions(+), 29 deletions(-) diff --git a/common/amd64/pixel-sse2.asm b/common/amd64/pixel-sse2.asm index bc1888be..f8128dce 100644 --- a/common/amd64/pixel-sse2.asm +++ b/common/amd64/pixel-sse2.asm @@ -31,7 +31,6 @@ BITS 64 SECTION .rodata align=16 -pb_1: times 16 db 1 pw_1: times 8 dw 1 ssim_c1: times 4 dd 416 ; .01*.01*255*255*64 ssim_c2: times 4 dd 235963 ; .03*.03*255*255*64*63 diff --git a/common/i386/i386inc.asm b/common/i386/i386inc.asm index deda8591..dedfb1f7 100644 --- a/common/i386/i386inc.asm +++ b/common/i386/i386inc.asm @@ -46,7 +46,7 @@ BITS 32 SECTION .text align=16 fakegot: %else - SECTION .rodata data align=16 + SECTION .rodata align=16 %endif %endmacro @@ -140,6 +140,6 @@ BITS 32 ; This is needed for ELF, otherwise the GNU linker assumes the stack is ; executable by default. %ifidn __OUTPUT_FORMAT__,elf -SECTION .note.GNU-stack noalloc noexec nowrite progbits +SECTION ".note.GNU-stack" noalloc noexec nowrite progbits %endif diff --git a/common/i386/pixel-sse2.asm b/common/i386/pixel-sse2.asm index e4aa7858..84509b61 100644 --- a/common/i386/pixel-sse2.asm +++ b/common/i386/pixel-sse2.asm @@ -463,8 +463,7 @@ cglobal x264_pixel_ssd_16x8_sse2 paddusw %4, %2 %endmacro -;;; two SUM4x4_SSE2 running side-by-side -%macro SUM4x4_TWO_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum +%macro SUM8x4_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum pxor %3, %3 pxor %6, %6 psubw %3, %1 @@ -483,6 +482,17 @@ cglobal x264_pixel_ssd_16x8_sse2 paddusw %7, %4 %endmacro +%macro SUM8x4_SSSE3 7 ; a02 a13 . b02 b13 . sum + pabsw %1, %1 + pabsw %2, %2 + pabsw %4, %4 + pabsw %5, %5 + paddusw %1, %2 + paddusw %4, %5 + paddusw %7, %1 + paddusw %7, %4 +%endmacro + %macro SATD_TWO_SSE2 0 LOAD_DIFF_8P xmm0, xmm4, [eax], [ecx] LOAD_DIFF_8P xmm1, xmm5, [eax+ebx], [ecx+edx] @@ -496,7 +506,7 @@ cglobal x264_pixel_ssd_16x8_sse2 HADAMARD1x4 xmm0, xmm1, xmm2, xmm3 TRANSPOSE2x4x4W xmm0, xmm1, xmm2, xmm3, xmm4 HADAMARD1x4 xmm0, xmm1, xmm2, xmm3 - SUM4x4_TWO_SSE2 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6 + SUM8x4 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6 %endmacro %macro SATD_START 0 @@ -519,81 +529,76 @@ cglobal x264_pixel_ssd_16x8_sse2 ret %endmacro +%macro SATDS 1 ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_satd_16x16_sse2 (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- -cglobal x264_pixel_satd_16x16_sse2 +cglobal x264_pixel_satd_16x16_%1 SATD_START - SATD_TWO_SSE2 SATD_TWO_SSE2 SATD_TWO_SSE2 SATD_TWO_SSE2 - mov eax, [esp+ 8] mov ecx, [esp+16] add eax, 8 add ecx, 8 - SATD_TWO_SSE2 SATD_TWO_SSE2 SATD_TWO_SSE2 SATD_TWO_SSE2 - SATD_END ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_satd_8x16_sse2 (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- -cglobal x264_pixel_satd_8x16_sse2 +cglobal x264_pixel_satd_8x16_%1 SATD_START - SATD_TWO_SSE2 SATD_TWO_SSE2 SATD_TWO_SSE2 SATD_TWO_SSE2 - SATD_END ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_satd_16x8_sse2 (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- -cglobal x264_pixel_satd_16x8_sse2 +cglobal x264_pixel_satd_16x8_%1 SATD_START - SATD_TWO_SSE2 SATD_TWO_SSE2 - mov eax, [esp+ 8] mov ecx, [esp+16] add eax, 8 add ecx, 8 - SATD_TWO_SSE2 SATD_TWO_SSE2 - SATD_END ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_satd_8x8_sse2 (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- -cglobal x264_pixel_satd_8x8_sse2 +cglobal x264_pixel_satd_8x8_%1 SATD_START - SATD_TWO_SSE2 SATD_TWO_SSE2 - SATD_END ;----------------------------------------------------------------------------- ; int __cdecl x264_pixel_satd_8x4_sse2 (uint8_t *, int, uint8_t *, int ) ;----------------------------------------------------------------------------- -cglobal x264_pixel_satd_8x4_sse2 +cglobal x264_pixel_satd_8x4_%1 SATD_START - SATD_TWO_SSE2 - SATD_END +%endmacro ; SATDS + +%define SUM8x4 SUM8x4_SSE2 +SATDS sse2 +%ifdef HAVE_SSE3 +%define SUM8x4 SUM8x4_SSSE3 +SATDS ssse3 +%endif diff --git a/common/pixel.c b/common/pixel.c index e4d813a6..2625ad54 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -539,14 +539,16 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) if( cpu&X264_CPU_SSSE3 ) { -#if defined(ARCH_X86_64) && defined(HAVE_SSE3) +#ifdef HAVE_SSE3 pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_ssse3; pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_ssse3; pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_ssse3; pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_ssse3; pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_ssse3; +#ifdef ARCH_X86_64 pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3; +#endif #endif } #endif //HAVE_MMX diff --git a/configure b/configure index b6e0d9e8..3d192054 100755 --- a/configure +++ b/configure @@ -68,7 +68,7 @@ CFLAGS="$CFLAGS -Wall -I." LDFLAGS="$LDFLAGS" HAVE_GETOPT_LONG=1 -AS="nasm" +AS="yasm" ASFLAGS="" EXE="" @@ -148,7 +148,7 @@ case "${MACHINE%%-*}" in i*86) ARCH="X86" CFLAGS="$CFLAGS -DHAVE_MMX" - AS="nasm" + AS="yasm" ASFLAGS="-O2" if [ "$SYS" = MACOSX ]; then ASFLAGS="$ASFLAGS -f macho -DPREFIX" @@ -159,6 +159,7 @@ case "${MACHINE%%-*}" in else ASFLAGS="$ASFLAGS -f elf" fi + as_check || AS="nasm" ;; x86_64) ARCH="X86_64" @@ -218,7 +219,7 @@ then fi fi -if [ $ARCH = X86_64 ] ; then +if [ $ARCH = X86 -o $ARCH = X86_64 ] ; then if ! as_check ; then echo "No assembler. Please install yasm." exit 1 -- 2.39.2