Merge remote-tracking branch 'qatar/master'

[ffmpeg] / libavcodec / x86 / ac3dsp.asm
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm

index 59157b7219bea917a246a4b79ec5f1dfe5951397..a7380f9bd808c0dd704ecdcb45c368227947619b 100644 (file)
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -35,7 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
  ; used in ff_ac3_extract_exponents()
  pd_1:   times 4 dd 1
  pd_151: times 4 dd 151
-pb_shuf_4dwb: db 0, 4, 8, 12
  
  SECTION .text
  
@@ -69,12 +68,12 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
  %define LOOP_ALIGN
  INIT_MMX
  AC3_EXPONENT_MIN mmx
-%ifdef HAVE_MMX2
+%if HAVE_MMX2
  %define PMINUB PMINUB_MMXEXT
  %define LOOP_ALIGN ALIGN 16
  AC3_EXPONENT_MIN mmxext
  %endif
-%ifdef HAVE_SSE
+%if HAVE_SSE
  INIT_XMM
  AC3_EXPONENT_MIN sse2
  %endif
@@ -367,7 +366,7 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
      pabsd    %1, %1
  %endmacro
  
-%ifdef HAVE_AMD3DNOW
+%if HAVE_AMD3DNOW
  INIT_MMX
  cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
      add      expq, lenq
@@ -404,15 +403,12 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
  %endif
  
  %macro AC3_EXTRACT_EXPONENTS 1
-cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
      add     expq, lenq
      lea    coefq, [coefq+4*lenq]
      neg     lenq
      mova      m2, [pd_1]
      mova      m3, [pd_151]
-%ifidn %1, ssse3 ;
-    movd      m4, [pb_shuf_4dwb]
-%endif
  .loop:
      ; move 4 32-bit coefs to xmm0
      mova      m0, [coefq+4*lenq]
@@ -426,12 +422,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
      mova      m0, m3
      psubd     m0, m1
      ; move the lowest byte in each of 4 dwords to the low dword
-%ifidn %1, ssse3
-    pshufb    m0, m4
-%else
+    ; NOTE: We cannot just extract the low bytes with pshufb because the dword
+    ;       result for 16777215 is -1 due to float inaccuracy. Using packuswb
+    ;       clips this to 0, which is the correct exponent.
      packssdw  m0, m0
      packuswb  m0, m0
-%endif
      movd  [expq+lenq], m0
  
      add     lenq, 4
@@ -439,11 +434,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
      REP_RET
  %endmacro
  
-%ifdef HAVE_SSE
+%if HAVE_SSE
  INIT_XMM
  %define PABSD PABSD_MMX
  AC3_EXTRACT_EXPONENTS sse2
-%ifdef HAVE_SSSE3
+%if HAVE_SSSE3
  %define PABSD PABSD_SSSE3
  AC3_EXTRACT_EXPONENTS ssse3
  %endif