Remove useless intermediate var

[ffmpeg] / libavcodec / i386 / dsputil_mmx.c
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c

index d4010ca0ac2220a17eba9c925e67397b8b4f11c0..53cfe9d848f8e71f9ed6d73ef3ef14a34f341895 100644 (file)
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -22,22 +22,20 @@
   * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
   */
  
-#include "dsputil.h"
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/h263.h"
+#include "libavcodec/mpegvideo.h"
+#include "libavcodec/simple_idct.h"
  #include "dsputil_mmx.h"
-#include "simple_idct.h"
-#include "mpegvideo.h"
-#include "x86_cpu.h"
  #include "mmx.h"
  #include "vp3dsp_mmx.h"
  #include "vp3dsp_sse2.h"
-#include "h263.h"
+#include "idct_xvid.h"
  
  //#undef NDEBUG
  //#include <assert.h>
  
-extern void ff_idct_xvid_mmx(short *block);
-extern void ff_idct_xvid_mmx2(short *block);
-
  int mm_flags; /* multimedia extension flags */
  
  /* pixel operations */
@@ -54,6 +52,7 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pw_8  ) = 0x0008000800080008ULL;
  DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
  DECLARE_ALIGNED_16(const xmm_t,    ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
  DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
+DECLARE_ALIGNED_16(const xmm_t,    ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
  DECLARE_ALIGNED_16(const xmm_t,    ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
  DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
  DECLARE_ALIGNED_8 (const uint64_t, ff_pw_64 ) = 0x0040004000400040ULL;
@@ -241,7 +240,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
                  "movq   %%mm2, (%0, %1)         \n\t"
                  "movq   %%mm4, (%0, %1, 2)      \n\t"
                  "movq   %%mm6, (%0, %2)         \n\t"
-                ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p)
+                ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p)
                  :"memory");
          pix += line_size*4;
          p += 32;
@@ -266,7 +265,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
              "movq       %%mm2, (%0, %1)         \n\t"
              "movq       %%mm4, (%0, %1, 2)      \n\t"
              "movq       %%mm6, (%0, %2)         \n\t"
-            ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p)
+            ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p)
              :"memory");
  }
  
@@ -350,7 +349,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
           "subl $4, %0                   \n\t"
           "jnz 1b                        \n\t"
           : "+g"(h), "+r" (pixels),  "+r" (block)
-         : "r"((long)line_size)
+         : "r"((x86_reg)line_size)
           : "%"REG_a, "memory"
          );
  }
@@ -376,7 +375,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
           "subl $4, %0                   \n\t"
           "jnz 1b                        \n\t"
           : "+g"(h), "+r" (pixels),  "+r" (block)
-         : "r"((long)line_size)
+         : "r"((x86_reg)line_size)
           : "%"REG_a, "memory"
          );
  }
@@ -410,7 +409,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
           "subl $4, %0                   \n\t"
           "jnz 1b                        \n\t"
           : "+g"(h), "+r" (pixels),  "+r" (block)
-         : "r"((long)line_size)
+         : "r"((x86_reg)line_size)
           : "%"REG_a, "memory"
          );
  }
@@ -432,7 +431,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
           "lea (%2,%3,4), %2             \n\t"
           "jnz 1b                        \n\t"
           : "+g"(h), "+r" (pixels),  "+r" (block)
-         : "r"((long)line_size), "r"(3L*line_size)
+         : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
           : "memory"
          );
  }
@@ -458,7 +457,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
           "lea (%2,%3,4), %2             \n\t"
           "jnz 1b                        \n\t"
           : "+g"(h), "+r" (pixels),  "+r" (block)
-         : "r"((long)line_size), "r"(3L*line_size)
+         : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
           : "memory"
          );
  }
@@ -481,7 +480,7 @@ static void clear_blocks_mmx(DCTELEM *blocks)
  }
  
  static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
-    long i=0;
+    x86_reg i=0;
      asm volatile(
          "1:                             \n\t"
          "movq  (%1, %0), %%mm0          \n\t"
@@ -496,14 +495,14 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
          "cmp %3, %0                     \n\t"
          " jb 1b                         \n\t"
          : "+r" (i)
-        : "r"(src), "r"(dst), "r"((long)w-15)
+        : "r"(src), "r"(dst), "r"((x86_reg)w-15)
      );
      for(; i<w; i++)
          dst[i+0] += src[i+0];
  }
  
  static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
-    long i=0;
+    x86_reg i=0;
      asm volatile(
          "1:                             \n\t"
          "movq   (%2, %0), %%mm0         \n\t"
@@ -516,7 +515,7 @@ static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
          "cmp %4, %0                     \n\t"
          " jb 1b                         \n\t"
          : "+r" (i)
-        : "r"(dst), "r"(src1), "r"(src2), "r"((long)w-15)
+        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
      );
      for(; i<w; i++)
          dst[i] = src1[i] + src2[i];
@@ -690,8 +689,8 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
          "movd %%mm6, (%1,%3)            \n\t"
          :: "r" (src),
             "r" (src + 4*stride),
-           "r" ((long)   stride ),
-           "r" ((long)(3*stride))
+           "r" ((x86_reg)   stride ),
+           "r" ((x86_reg)(3*stride))
      );
      }
  }
@@ -724,7 +723,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
                  "cmp %3, %0                     \n\t"
                  " jb 1b                         \n\t"
                  : "+r" (ptr)
-                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
+                : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
          );
      }
      else
@@ -747,7 +746,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
                  "cmp %3, %0                     \n\t"
                  " jb 1b                         \n\t"
                  : "+r" (ptr)
-                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
+                : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
          );
      }
  
@@ -765,7 +764,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
                  "cmp %4, %0                     \n\t"
                  " jb 1b                         \n\t"
                  : "+r" (ptr)
-                : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
+                : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
          );
          ptr= last_line + (i + 1) * wrap - w;
          asm volatile(
@@ -779,7 +778,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
                  "cmp %4, %0                     \n\t"
                  " jb 1b                         \n\t"
                  : "+r" (ptr)
-                : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
+                : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
          );
      }
  }
@@ -787,8 +786,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
  #define PAETH(cpu, abs3)\
  void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
  {\
-    long i = -bpp;\
-    long end = w-3;\
+    x86_reg i = -bpp;\
+    x86_reg end = w-3;\
      asm volatile(\
          "pxor      %%mm7, %%mm7 \n"\
          "movd    (%1,%0), %%mm0 \n"\
@@ -831,7 +830,7 @@ void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, in
          "cmp       %5, %0 \n"\
          "jle 1b \n"\
          :"+r"(i)\
-        :"r"(dst), "r"(top), "r"(src), "r"((long)bpp), "g"(end),\
+        :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
           "m"(ff_pw_255)\
          :"memory"\
      );\
@@ -995,7 +994,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, in
          "decl %2                          \n\t"\
          " jnz 1b                          \n\t"\
          : "+a"(src), "+c"(dst), "+D"(h)\
-        : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
+        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
          : "memory"\
      );\
  }\
@@ -1106,7 +1105,7 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int
          "decl %2                          \n\t"\
          " jnz 1b                          \n\t"\
          : "+a"(src), "+c"(dst), "+d"(h)\
-        : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
+        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
          : "memory"\
      );\
  }\
@@ -1170,7 +1169,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
          "decl %2                        \n\t"\
          " jnz 1b                        \n\t"\
          : "+r" (src), "+r" (temp_ptr), "+r"(count)\
-        : "r" ((long)srcStride)\
+        : "r" ((x86_reg)srcStride)\
          : "memory"\
      );\
      \
@@ -1217,7 +1216,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
          " jnz 1b                        \n\t"\
          \
          : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
-        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
+        : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(x86_reg)dstStride)\
          :"memory"\
      );\
  }\
@@ -1242,7 +1241,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
          "decl %2                        \n\t"\
          " jnz 1b                        \n\t"\
          : "+r" (src), "+r" (temp_ptr), "+r"(count)\
-        : "r" ((long)srcStride)\
+        : "r" ((x86_reg)srcStride)\
          : "memory"\
      );\
      \
@@ -1277,7 +1276,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
          " jnz 1b                        \n\t"\
           \
          : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
-        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
+        : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(x86_reg)dstStride)\
          : "memory"\
     );\
  }\
@@ -1840,7 +1839,7 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
  }
  
  static void vector_fmul_3dnow(float *dst, const float *src, int len){
-    long i = (len-4)*4;
+    x86_reg i = (len-4)*4;
      asm volatile(
          "1: \n\t"
          "movq    (%1,%0), %%mm0 \n\t"
@@ -1858,7 +1857,7 @@ static void vector_fmul_3dnow(float *dst, const float *src, int len){
      );
  }
  static void vector_fmul_sse(float *dst, const float *src, int len){
-    long i = (len-8)*4;
+    x86_reg i = (len-8)*4;
      asm volatile(
          "1: \n\t"
          "movaps    (%1,%0), %%xmm0 \n\t"
@@ -1876,7 +1875,7 @@ static void vector_fmul_sse(float *dst, const float *src, int len){
  }
  
  static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
-    long i = len*4-16;
+    x86_reg i = len*4-16;
      asm volatile(
          "1: \n\t"
          "pswapd   8(%1), %%mm0 \n\t"
@@ -1894,7 +1893,7 @@ static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const floa
      asm volatile("femms");
  }
  static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
-    long i = len*4-32;
+    x86_reg i = len*4-32;
      asm volatile(
          "1: \n\t"
          "movaps        16(%1), %%xmm0 \n\t"
@@ -1915,7 +1914,7 @@ static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *
  
  static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1,
                                        const float *src2, int src3, int len, int step){
-    long i = (len-4)*4;
+    x86_reg i = (len-4)*4;
      if(step == 2 && src3 == 0){
          dst += (len-4)*2;
          asm volatile(
@@ -1964,7 +1963,7 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float
  }
  static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
                                      const float *src2, int src3, int len, int step){
-    long i = (len-8)*4;
+    x86_reg i = (len-8)*4;
      if(step == 2 && src3 == 0){
          dst += (len-8)*2;
          asm volatile(
@@ -2125,7 +2124,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
              }else if(idct_algo==FF_IDCT_CAVS){
                      c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
              }else if(idct_algo==FF_IDCT_XVIDMMX){
-                if(mm_flags & MM_MMXEXT){
+                if(mm_flags & MM_SSE2){
+                    c->idct_put= ff_idct_xvid_sse2_put;
+                    c->idct_add= ff_idct_xvid_sse2_add;
+                    c->idct    = ff_idct_xvid_sse2;
+                    c->idct_permutation_type= FF_SSE2_IDCT_PERM;
+                }else if(mm_flags & MM_MMXEXT){
                      c->idct_put= ff_idct_xvid_mmx2_put;
                      c->idct_add= ff_idct_xvid_mmx2_add;
                      c->idct    = ff_idct_xvid_mmx2;
@@ -2376,6 +2380,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
              H264_QPEL_FUNCS(3, 1, ssse3);
              H264_QPEL_FUNCS(3, 2, ssse3);
              H264_QPEL_FUNCS(3, 3, ssse3);
+            c->put_no_rnd_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_nornd;
+            c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;
+            c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;
+            c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;
+            c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
              c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
          }
  #endif