Make 8SVX codec names just a single word for consistency with other codec names.

[ffmpeg] / libavcodec / i386 / mpegvideo_mmx.c
diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c

index e3ab24b4e8ed25b90965eb6ebd61c21dc87da624..0823fddebc50e788c42d34a6b472de976c3fc5e6 100644 (file)
--- a/libavcodec/i386/mpegvideo_mmx.c
+++ b/libavcodec/i386/mpegvideo_mmx.c
@@ -2,6 +2,9 @@
   * The simplest mpeg encoder (well, it was the simplest!)
   * Copyright (c) 2000,2001 Fabrice Bellard.
   *
+ * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
+ * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
+ *
   * This file is part of FFmpeg.
   *
   * FFmpeg is free software; you can redistribute it and/or
@@ -17,20 +20,15 @@
   * You should have received a copy of the GNU Lesser General Public
   * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
- * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
   */
  
-#include "../dsputil.h"
-#include "../mpegvideo.h"
-#include "../avcodec.h"
+#include "dsputil.h"
+#include "dsputil_mmx.h"
+#include "mpegvideo.h"
+#include "avcodec.h"
  #include "x86_cpu.h"
  
-extern uint16_t ff_inv_zigzag_direct16[64];
-
-static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
-static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
+extern uint16_t inv_zigzag_direct16[64];
  
  
  static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
@@ -179,7 +177,7 @@ asm volatile(
                  if (level < -2048 || level > 2047)
                      fprintf(stderr, "unquant error %d %d\n", i, level);
  #endif
-  We can suppose that result of two multiplications can't be greate of 0xFFFF
+  We can suppose that result of two multiplications can't be greater than 0xFFFF
    i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
    a complex multiplication.
  =====================================================
@@ -397,7 +395,7 @@ asm volatile(
                  : "%"REG_a, "memory"
          );
      block[0]= block0;
-        //Note, we dont do mismatch control for intra as errors cannot accumulate
+        //Note, we do not do mismatch control for intra as errors cannot accumulate
  }
  
  static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
@@ -477,94 +475,6 @@ asm volatile(
          );
  }
  
-/* draw the edges of width 'w' of an image of size width, height
-   this mmx version can only handle w==8 || w==16 */
-static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
-{
-    uint8_t *ptr, *last_line;
-    int i;
-
-    last_line = buf + (height - 1) * wrap;
-    /* left and right */
-    ptr = buf;
-    if(w==8)
-    {
-        asm volatile(
-                "1:                             \n\t"
-                "movd (%0), %%mm0               \n\t"
-                "punpcklbw %%mm0, %%mm0         \n\t"
-                "punpcklwd %%mm0, %%mm0         \n\t"
-                "punpckldq %%mm0, %%mm0         \n\t"
-                "movq %%mm0, -8(%0)             \n\t"
-                "movq -8(%0, %2), %%mm1         \n\t"
-                "punpckhbw %%mm1, %%mm1         \n\t"
-                "punpckhwd %%mm1, %%mm1         \n\t"
-                "punpckhdq %%mm1, %%mm1         \n\t"
-                "movq %%mm1, (%0, %2)           \n\t"
-                "add %1, %0                     \n\t"
-                "cmp %3, %0                     \n\t"
-                " jb 1b                         \n\t"
-                : "+r" (ptr)
-                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
-        );
-    }
-    else
-    {
-        asm volatile(
-                "1:                             \n\t"
-                "movd (%0), %%mm0               \n\t"
-                "punpcklbw %%mm0, %%mm0         \n\t"
-                "punpcklwd %%mm0, %%mm0         \n\t"
-                "punpckldq %%mm0, %%mm0         \n\t"
-                "movq %%mm0, -8(%0)             \n\t"
-                "movq %%mm0, -16(%0)            \n\t"
-                "movq -8(%0, %2), %%mm1         \n\t"
-                "punpckhbw %%mm1, %%mm1         \n\t"
-                "punpckhwd %%mm1, %%mm1         \n\t"
-                "punpckhdq %%mm1, %%mm1         \n\t"
-                "movq %%mm1, (%0, %2)           \n\t"
-                "movq %%mm1, 8(%0, %2)          \n\t"
-                "add %1, %0                     \n\t"
-                "cmp %3, %0                     \n\t"
-                " jb 1b                         \n\t"
-                : "+r" (ptr)
-                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
-        );
-    }
-
-    for(i=0;i<w;i+=4) {
-        /* top and bottom (and hopefully also the corners) */
-        ptr= buf - (i + 1) * wrap - w;
-        asm volatile(
-                "1:                             \n\t"
-                "movq (%1, %0), %%mm0           \n\t"
-                "movq %%mm0, (%0)               \n\t"
-                "movq %%mm0, (%0, %2)           \n\t"
-                "movq %%mm0, (%0, %2, 2)        \n\t"
-                "movq %%mm0, (%0, %3)           \n\t"
-                "add $8, %0                     \n\t"
-                "cmp %4, %0                     \n\t"
-                " jb 1b                         \n\t"
-                : "+r" (ptr)
-                : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
-        );
-        ptr= last_line + (i + 1) * wrap - w;
-        asm volatile(
-                "1:                             \n\t"
-                "movq (%1, %0), %%mm0           \n\t"
-                "movq %%mm0, (%0)               \n\t"
-                "movq %%mm0, (%0, %2)           \n\t"
-                "movq %%mm0, (%0, %2, 2)        \n\t"
-                "movq %%mm0, (%0, %3)           \n\t"
-                "add $8, %0                     \n\t"
-                "cmp %4, %0                     \n\t"
-                " jb 1b                         \n\t"
-                : "+r" (ptr)
-                : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
-        );
-    }
-}
-
  static void  denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
      const int intra= s->mb_intra;
      int *sum= s->dct_error_sum[intra];
@@ -673,6 +583,12 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
      );
  }
  
+#ifdef HAVE_SSSE3
+#define HAVE_SSSE3_BAK
+#endif
+#undef HAVE_SSSE3
+
+#undef HAVE_SSE2
  #undef HAVE_MMX2
  #define RENAME(a) a ## _MMX
  #define RENAMEl(a) a ## _mmx
@@ -685,15 +601,25 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
  #define RENAMEl(a) a ## _mmx2
  #include "mpegvideo_mmx_template.c"
  
+#define HAVE_SSE2
  #undef RENAME
  #undef RENAMEl
  #define RENAME(a) a ## _SSE2
  #define RENAMEl(a) a ## _sse2
  #include "mpegvideo_mmx_template.c"
  
+#ifdef HAVE_SSSE3_BAK
+#define HAVE_SSSE3
+#undef RENAME
+#undef RENAMEl
+#define RENAME(a) a ## _SSSE3
+#define RENAMEl(a) a ## _sse2
+#include "mpegvideo_mmx_template.c"
+#endif
+
  void MPV_common_init_mmx(MpegEncContext *s)
  {
-    if (ff_mm_flags & MM_MMX) {
+    if (mm_flags & MM_MMX) {
          const int dct_algo = s->avctx->dct_algo;
  
          s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
@@ -704,18 +630,21 @@ void MPV_common_init_mmx(MpegEncContext *s)
              s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
          s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
  
-        draw_edges = draw_edges_mmx;
-
-        if (ff_mm_flags & MM_SSE2) {
+        if (mm_flags & MM_SSE2) {
              s->denoise_dct= denoise_dct_sse2;
          } else {
                  s->denoise_dct= denoise_dct_mmx;
          }
  
          if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
-            if(ff_mm_flags & MM_SSE2){
+#ifdef HAVE_SSSE3
+            if(mm_flags & MM_SSSE3){
+                s->dct_quantize= dct_quantize_SSSE3;
+            } else
+#endif
+            if(mm_flags & MM_SSE2){
                  s->dct_quantize= dct_quantize_SSE2;
-            } else if(ff_mm_flags & MM_MMXEXT){
+            } else if(mm_flags & MM_MMXEXT){
                  s->dct_quantize= dct_quantize_MMX2;
              } else {
                  s->dct_quantize= dct_quantize_MMX;