git.sesse.net Git - vlc/blob - extras/contrib/src/Patches/ffmpeg-macosx-intel-mmx.patch

   1 Index: libavcodec/x86/motion_est_mmx.c
   2 ===================================================================
   3 --- libavcodec/x86/motion_est_mmx.c     (revision 17470)
   4 +++ libavcodec/x86/motion_est_mmx.c     (working copy)
   5 @@ -168,7 +168,7 @@
   6  static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
   7  {
   8      __asm__ volatile(
   9 -        "movq "MANGLE(bone)", %%mm5     \n\t"
  10 +        "movq %4, %%mm5                 \n\t"
  11          "movq (%1), %%mm0               \n\t"
  12          "pavgb 1(%1), %%mm0             \n\t"
  13          "add %3, %1                     \n\t"
  14 @@ -191,7 +191,7 @@
  15          "sub $2, %0                     \n\t"
  16          " jg 1b                         \n\t"
  17          : "+r" (h), "+r" (blk1), "+r" (blk2)
  18 -        : "r" ((x86_reg)stride)
  19 +        : "r" ((x86_reg)stride), "m" (bone)
  20      );
  21  }
  22
  23 @@ -259,7 +259,7 @@
  24          "punpckhbw %%mm7, %%mm5         \n\t"
  25          "paddw %%mm4, %%mm2             \n\t"
  26          "paddw %%mm5, %%mm3             \n\t"
  27 -        "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
  28 +        "movq 16+%5, %%mm5              \n\t"
  29          "paddw %%mm2, %%mm0             \n\t"
  30          "paddw %%mm3, %%mm1             \n\t"
  31          "paddw %%mm5, %%mm0             \n\t"
  32 @@ -282,7 +282,7 @@
  33          "add %4, %%"REG_a"              \n\t"
  34          " js 1b                         \n\t"
  35          : "+a" (len)
  36 -        : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride)
  37 +        : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride), "m" (round_tab[0])
  38      );
  39  }
  40
  41 Index: libavcodec/x86/simple_idct_mmx.c
  42 ===================================================================
  43 --- libavcodec/x86/simple_idct_mmx.c    (revision 17470)
  44 +++ libavcodec/x86/simple_idct_mmx.c    (working copy)
  45 @@ -364,7 +364,7 @@
  46          "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
  47          "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
  48          "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
  49 -        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
  50 +        "movq %3, %%mm4                 \n\t"\
  51          "pand %%mm0, %%mm4              \n\t"\
  52          "por %%mm1, %%mm4               \n\t"\
  53          "por %%mm2, %%mm4               \n\t"\
  54 @@ -438,7 +438,7 @@
  55          "jmp 2f                         \n\t"\
  56          "1:                             \n\t"\
  57          "pslld $16, %%mm0               \n\t"\
  58 -        "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
  59 +        "#paddd %4, %%mm0               \n\t"\
  60          "psrad $13, %%mm0               \n\t"\
  61          "packssdw %%mm0, %%mm0          \n\t"\
  62          "movq %%mm0, " #dst "           \n\t"\
  63 @@ -472,7 +472,7 @@
  64          "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
  65          "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
  66          "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
  67 -        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
  68 +        "movq %3, %%mm4                 \n\t"\
  69          "pand %%mm0, %%mm4              \n\t"\
  70          "por %%mm1, %%mm4               \n\t"\
  71          "por %%mm2, %%mm4               \n\t"\
  72 @@ -546,7 +546,7 @@
  73          "jmp 2f                         \n\t"\
  74          "1:                             \n\t"\
  75          "pslld $16, %%mm0               \n\t"\
  76 -        "paddd "MANGLE(d40000)", %%mm0  \n\t"\
  77 +        "paddd %4, %%mm0                \n\t"\
  78          "psrad $13, %%mm0               \n\t"\
  79          "packssdw %%mm0, %%mm0          \n\t"\
  80          "movq %%mm0, " #dst "           \n\t"\
  81 @@ -1271,7 +1271,7 @@
  82  */
  83
  84  "9: \n\t"
  85 -                :: "r" (block), "r" (temp), "r" (coeffs)
  86 +                :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000)
  87                  : "%eax"
  88          );
  89  }
  90 Index: libavcodec/x86/cavsdsp_mmx.c
  91 ===================================================================
  92 --- libavcodec/x86/cavsdsp_mmx.c        (revision 17470)
  93 +++ libavcodec/x86/cavsdsp_mmx.c        (working copy)
  94 @@ -25,8 +25,30 @@
  95  #include "libavutil/common.h"
  96  #include "libavutil/x86_cpu.h"
  97  #include "libavcodec/dsputil.h"
  98 -#include "dsputil_mmx.h"
  99
 100 +#define SUMSUB_BA( a, b ) \
 101 +"paddw "#b", "#a" \n\t"\
 102 +"paddw "#b", "#b" \n\t"\
 103 +"psubw "#a", "#b" \n\t"
 104 +
 105 +#define SBUTTERFLY(a,b,t,n,m)\
 106 +"mov" #m " " #a ", " #t "         \n\t" /* abcd */\
 107 +"punpckl" #n " " #b ", " #a "     \n\t" /* aebf */\
 108 +"punpckh" #n " " #b ", " #t "     \n\t" /* cgdh */\
 109 +
 110 +#define TRANSPOSE4(a,b,c,d,t)\
 111 +SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
 112 +SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
 113 +SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
 114 +SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
 115 +
 116 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
 117 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
 118 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
 119 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
 120 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
 121 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
 122 +
 123  /*****************************************************************************
 124   *
 125   * inverse transform
 126 @@ -148,7 +170,7 @@
 127      }
 128
 129      for(i=0; i<2; i++){
 130 -        cavs_idct8_1d(b2+4*i, ff_pw_64.a);
 131 +        cavs_idct8_1d(b2+4*i, ff_pw_64);
 132
 133          __asm__ volatile(
 134              "psraw     $7, %%mm7  \n\t"
 135 Index: libavcodec/x86/flacdsp_mmx.c
 136 ===================================================================
 137 --- libavcodec/x86/flacdsp_mmx.c        (revision 17470)
 138 +++ libavcodec/x86/flacdsp_mmx.c        (working copy)
 139 @@ -27,7 +27,6 @@
 140      double c = 2.0 / (len-1.0);
 141      int n2 = len>>1;
 142      x86_reg i = -n2*sizeof(int32_t);
 143 -    x86_reg j =  n2*sizeof(int32_t);
 144      __asm__ volatile(
 145          "movsd   %0,     %%xmm7                \n\t"
 146          "movapd  "MANGLE(ff_pd_1)", %%xmm6     \n\t"
 147 @@ -55,7 +54,7 @@
 148          "sub      $8,      %1                  \n\t"\
 149          "add      $8,      %0                  \n\t"\
 150          "jl 1b                                 \n\t"\
 151 -        :"+&r"(i), "+&r"(j)\
 152 +        :"+&r"(i)\
 153          :"r"(w_data+n2), "r"(data+n2)\
 154      );
 155      if(len&1)
 156 @@ -88,6 +87,8 @@
 157                  "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t"
 158                  "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t"
 159                  "movsd    "MANGLE(ff_pd_1)", %%xmm2 \n\t"
 160 +                :: "m"(*ff_pd_1) );
 161 +            __asm__ volatile(
 162                  "1:                                 \n\t"
 163                  "movapd   (%4,%0), %%xmm3           \n\t"
 164                  "movupd -8(%5,%0), %%xmm4           \n\t"
 165 @@ -116,6 +117,8 @@
 166              __asm__ volatile(
 167                  "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t"
 168                  "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t"
 169 +                :: "m"(*ff_pd_1) );
 170 +            __asm__ volatile(
 171                  "1:                                 \n\t"
 172                  "movapd   (%3,%0), %%xmm3           \n\t"
 173                  "movupd -8(%4,%0), %%xmm4           \n\t"