Add apply_window_int16() to DSPContext with x86-optimized versions and use it

[ffmpeg] / libavcodec / x86 / mpegvideo_mmx.c
diff --git a/libavcodec/x86/mpegvideo_mmx.c b/libavcodec/x86/mpegvideo_mmx.c

index 406d3b8c076a2fa95b9baca56ae00fbd53d119f1..7dd9a66783c4d4e9d48378ed654cbd892e28aebe 100644 (file)
--- a/libavcodec/x86/mpegvideo_mmx.c
+++ b/libavcodec/x86/mpegvideo_mmx.c
@@ -1,27 +1,28 @@
  /*
   * The simplest mpeg encoder (well, it was the simplest!)
- * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2000,2001 Fabrice Bellard
   *
   * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
   * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
   *
- * This file is part of FFmpeg.
+ * This file is part of Libav.
   *
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
+#include "libavutil/cpu.h"
  #include "libavutil/x86_cpu.h"
  #include "libavcodec/avcodec.h"
  #include "libavcodec/dsputil.h"
@@ -65,7 +66,7 @@ __asm__ volatile(
                  "packssdw %%mm5, %%mm5          \n\t"
                  "psubw %%mm5, %%mm7             \n\t"
                  "pxor %%mm4, %%mm4              \n\t"
-                ASMALIGN(4)
+                ".p2align 4                     \n\t"
                  "1:                             \n\t"
                  "movq (%0, %3), %%mm0           \n\t"
                  "movq 8(%0, %3), %%mm1          \n\t"
@@ -128,7 +129,7 @@ __asm__ volatile(
                  "packssdw %%mm5, %%mm5          \n\t"
                  "psubw %%mm5, %%mm7             \n\t"
                  "pxor %%mm4, %%mm4              \n\t"
-                ASMALIGN(4)
+                ".p2align 4                     \n\t"
                  "1:                             \n\t"
                  "movq (%0, %3), %%mm0           \n\t"
                  "movq 8(%0, %3), %%mm1          \n\t"
@@ -221,7 +222,7 @@ __asm__ volatile(
                  "packssdw %%mm6, %%mm6          \n\t"
                  "packssdw %%mm6, %%mm6          \n\t"
                  "mov %3, %%"REG_a"              \n\t"
-                ASMALIGN(4)
+                ".p2align 4                     \n\t"
                  "1:                             \n\t"
                  "movq (%0, %%"REG_a"), %%mm0    \n\t"
                  "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
@@ -284,7 +285,7 @@ __asm__ volatile(
                  "packssdw %%mm6, %%mm6          \n\t"
                  "packssdw %%mm6, %%mm6          \n\t"
                  "mov %3, %%"REG_a"              \n\t"
-                ASMALIGN(4)
+                ".p2align 4                     \n\t"
                  "1:                             \n\t"
                  "movq (%0, %%"REG_a"), %%mm0    \n\t"
                  "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
@@ -356,7 +357,7 @@ __asm__ volatile(
                  "packssdw %%mm6, %%mm6          \n\t"
                  "packssdw %%mm6, %%mm6          \n\t"
                  "mov %3, %%"REG_a"              \n\t"
-                ASMALIGN(4)
+                ".p2align 4                     \n\t"
                  "1:                             \n\t"
                  "movq (%0, %%"REG_a"), %%mm0    \n\t"
                  "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
@@ -417,7 +418,7 @@ __asm__ volatile(
                  "packssdw %%mm6, %%mm6          \n\t"
                  "packssdw %%mm6, %%mm6          \n\t"
                  "mov %3, %%"REG_a"              \n\t"
-                ASMALIGN(4)
+                ".p2align 4                     \n\t"
                  "1:                             \n\t"
                  "movq (%0, %%"REG_a"), %%mm0    \n\t"
                  "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
@@ -580,28 +581,35 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
              " jb 1b                             \n\t"
          : "+r" (block), "+r" (sum), "+r" (offset)
          : "r"(block+64)
+          XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
+                            "%xmm4", "%xmm5", "%xmm6", "%xmm7")
      );
  }
  
-#ifdef HAVE_SSSE3
+#if HAVE_SSSE3
  #define HAVE_SSSE3_BAK
  #endif
  #undef HAVE_SSSE3
+#define HAVE_SSSE3 0
  
  #undef HAVE_SSE2
  #undef HAVE_MMX2
+#define HAVE_SSE2 0
+#define HAVE_MMX2 0
  #define RENAME(a) a ## _MMX
  #define RENAMEl(a) a ## _mmx
  #include "mpegvideo_mmx_template.c"
  
-#define HAVE_MMX2
+#undef HAVE_MMX2
+#define HAVE_MMX2 1
  #undef RENAME
  #undef RENAMEl
  #define RENAME(a) a ## _MMX2
  #define RENAMEl(a) a ## _mmx2
  #include "mpegvideo_mmx_template.c"
  
-#define HAVE_SSE2
+#undef HAVE_SSE2
+#define HAVE_SSE2 1
  #undef RENAME
  #undef RENAMEl
  #define RENAME(a) a ## _SSE2
@@ -609,7 +617,8 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
  #include "mpegvideo_mmx_template.c"
  
  #ifdef HAVE_SSSE3_BAK
-#define HAVE_SSSE3
+#undef HAVE_SSSE3
+#define HAVE_SSSE3 1
  #undef RENAME
  #undef RENAMEl
  #define RENAME(a) a ## _SSSE3
@@ -619,7 +628,9 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
  
  void MPV_common_init_mmx(MpegEncContext *s)
  {
-    if (mm_flags & FF_MM_MMX) {
+    int mm_flags = av_get_cpu_flags();
+
+    if (mm_flags & AV_CPU_FLAG_MMX) {
          const int dct_algo = s->avctx->dct_algo;
  
          s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
@@ -630,21 +641,21 @@ void MPV_common_init_mmx(MpegEncContext *s)
              s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
          s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
  
-        if (mm_flags & FF_MM_SSE2) {
+        if (mm_flags & AV_CPU_FLAG_SSE2) {
              s->denoise_dct= denoise_dct_sse2;
          } else {
                  s->denoise_dct= denoise_dct_mmx;
          }
  
          if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
-#ifdef HAVE_SSSE3
-            if(mm_flags & FF_MM_SSSE3){
+#if HAVE_SSSE3
+            if(mm_flags & AV_CPU_FLAG_SSSE3){
                  s->dct_quantize= dct_quantize_SSSE3;
              } else
  #endif
-            if(mm_flags & FF_MM_SSE2){
+            if(mm_flags & AV_CPU_FLAG_SSE2){
                  s->dct_quantize= dct_quantize_SSE2;
-            } else if(mm_flags & FF_MM_MMXEXT){
+            } else if(mm_flags & AV_CPU_FLAG_MMX2){
                  s->dct_quantize= dct_quantize_MMX2;
              } else {
                  s->dct_quantize= dct_quantize_MMX;