Fix inappropriate instruction use

author Anton Mitrofanov <BugMaster@narod.ru>

Thu, 28 Aug 2014 16:13:13 +0000 (20:13 +0400)

committer Fiona Glaser <fiona@x264.com>

Tue, 16 Sep 2014 19:11:42 +0000 (12:11 -0700)
author Anton Mitrofanov <BugMaster@narod.ru>
Thu, 28 Aug 2014 16:13:13 +0000 (20:13 +0400)
committer Fiona Glaser <fiona@x264.com>
Tue, 16 Sep 2014 19:11:42 +0000 (12:11 -0700)
diff --git a/common/dct.c b/common/dct.c

index f5900efd9372cab2352e1ec2ba905190780267cc..08f4e893147be8f1271e99b8f6e9a984c445c1e6 100644 (file)
--- a/common/dct.c
+++ b/common/dct.c
@@ -611,7 +611,6 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
      {
          dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
          dctf->add4x4_idct   = x264_add4x4_idct_mmx;
-        dctf->dct4x4dc      = x264_dct4x4dc_mmx;
          dctf->idct4x4dc     = x264_idct4x4dc_mmx;
          dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
  
@@ -630,6 +629,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
  
      if( cpu&X264_CPU_MMX2 )
      {
+        dctf->dct4x4dc         = x264_dct4x4dc_mmx2;
          dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_mmx2;
          dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2;
      }
diff --git a/common/quant.c b/common/quant.c

index d7b69115a82b1798154415f835693b6674c9d425..31d8901dcba6d378f6ff5c4102cd8751249a194f 100644 (file)
--- a/common/quant.c
+++ b/common/quant.c
@@ -558,8 +558,6 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
      if( cpu&X264_CPU_MMX )
      {
  #if ARCH_X86
-        pf->quant_4x4 = x264_quant_4x4_mmx;
-        pf->quant_8x8 = x264_quant_8x8_mmx;
          pf->dequant_4x4 = x264_dequant_4x4_mmx;
          pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2;
          pf->dequant_8x8 = x264_dequant_8x8_mmx;
@@ -576,6 +574,8 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
      {
          pf->quant_2x2_dc = x264_quant_2x2_dc_mmx2;
  #if ARCH_X86
+        pf->quant_4x4 = x264_quant_4x4_mmx2;
+        pf->quant_8x8 = x264_quant_8x8_mmx2;
          pf->quant_4x4_dc = x264_quant_4x4_dc_mmx2;
          pf->decimate_score15 = x264_decimate_score15_mmx2;
          pf->decimate_score16 = x264_decimate_score16_mmx2;
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm

index 4376e369c430f9df1c795f18b936b44c47370337..bc82ff632d5a8ad88fefa4f7ddbf71494cff2255 100644 (file)
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -143,7 +143,7 @@ INIT_XMM avx
  DCT4x4_DC
  %else
  
-INIT_MMX mmx
+INIT_MMX mmx2
  cglobal dct4x4dc, 1,1
      movq   m3, [r0+24]
      movq   m2, [r0+16]
diff --git a/common/x86/dct.h b/common/x86/dct.h

index 337a63271483e0abe89d0f86eab069db8cfdf2b3..f22a979a16f569068d817019e6512606472cb5d2 100644 (file)
--- a/common/x86/dct.h
+++ b/common/x86/dct.h
@@ -70,7 +70,7 @@ void x264_add8x8_idct_dc_avx    ( pixel   *p_dst, dctcoef dct    [ 4] );
  void x264_add16x16_idct_dc_avx  ( pixel   *p_dst, dctcoef dct    [16] );
  void x264_add16x16_idct_dc_avx2 ( uint8_t *p_dst, int16_t dct    [16] );
  
-void x264_dct4x4dc_mmx       ( int16_t d[16] );
+void x264_dct4x4dc_mmx2      ( int16_t d[16] );
  void x264_dct4x4dc_sse2      ( int32_t d[16] );
  void x264_dct4x4dc_avx       ( int32_t d[16] );
  void x264_idct4x4dc_mmx      ( int16_t d[16] );
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm

index 262c5377bde920ff106fa3605ebd1f14b2e820b6..f5f6a82e16ecf9e95f2d3aa7936dc5859e9cc454 100644 (file)
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1600,7 +1600,7 @@ cglobal pixel_satd_4x4, 4,6
  %macro SATDS_SSE2 0
  %define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
  
-%if vertical==0 || HIGH_BIT_DEPTH
+%if cpuflag(ssse3) && (vertical==0 || HIGH_BIT_DEPTH)
  cglobal pixel_satd_4x4, 4, 6, 6
      SATD_START_MMX
      mova m4, [hmul_4p]
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm

index fb588d361c4b11b7bbda9f6758d92f9f50090315..731f7d155a1c2b295c214e579dc548d05f75c69f 100644 (file)
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -453,7 +453,7 @@ INIT_MMX mmx2
  QUANT_DC quant_2x2_dc, 1
  %if ARCH_X86_64 == 0 ; not needed because sse2 is faster
  QUANT_DC quant_4x4_dc, 4
-INIT_MMX mmx
+INIT_MMX mmx2
  QUANT_AC quant_4x4, 4
  QUANT_AC quant_8x8, 16
  %endif
diff --git a/common/x86/quant.h b/common/x86/quant.h

index 1fcb80014db708f5dffb1809e66c2f99e9b688e2..c6a8a9b114f24257fcbb19ec7dc18f4fcc413378 100644 (file)
--- a/common/x86/quant.h
+++ b/common/x86/quant.h
@@ -30,8 +30,8 @@
  
  int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias );
  int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias );
-int x264_quant_4x4_mmx( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
-int x264_quant_8x8_mmx( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
+int x264_quant_4x4_mmx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
+int x264_quant_8x8_mmx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
  int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias );
  int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias );
  int x264_quant_4x4_sse2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
author	Anton Mitrofanov <BugMaster@narod.ru>
	Thu, 28 Aug 2014 16:13:13 +0000 (20:13 +0400)
committer	Fiona Glaser <fiona@x264.com>
	Tue, 16 Sep 2014 19:11:42 +0000 (12:11 -0700)
common/dct.c		patch \| blob \| history
common/quant.c		patch \| blob \| history
common/x86/dct-a.asm		patch \| blob \| history
common/x86/dct.h		patch \| blob \| history
common/x86/pixel-a.asm		patch \| blob \| history
common/x86/quant-a.asm		patch \| blob \| history
common/x86/quant.h		patch \| blob \| history