git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches...

author ronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>

Fri, 10 Feb 2012 12:28:21 +0000 (12:28 +0000)

committer ronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>

Fri, 10 Feb 2012 12:28:21 +0000 (12:28 +0000)
author ronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Fri, 10 Feb 2012 12:28:21 +0000 (12:28 +0000)
committer ronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Fri, 10 Feb 2012 12:28:21 +0000 (12:28 +0000)
diff --git a/accelerator/cpu/image/image_mixer.cpp b/accelerator/cpu/image/image_mixer.cpp

index 054f7f59dcded928ac448bdb44a98c6d96661739..e7fd98cd502bf059f86a1dd822dfc7f79259ccb1 100644 (file)
--- a/accelerator/cpu/image/image_mixer.cpp
+++ b/accelerator/cpu/image/image_mixer.cpp
@@ -94,32 +94,23 @@ bool operator!=(const item& lhs, const item& rhs)
         return !(lhs == rhs);\r
  }\r
         \r
-inline xmm::s8_x blend(xmm::s8_x dest, xmm::s8_x source)\r
+inline xmm::s8_x blend(xmm::s8_x d, xmm::s8_x s)\r
  {      \r
         using namespace xmm;\r
-\r
-       auto s = s16_x(source);\r
-       auto d = dest;\r
-\r
-       const s16_x round       = 128;\r
-       const s16_x lomask      = 0x00FF;\r
-\r
+               \r
         // T(S, D) = S * D[A] + 0x80\r
         auto aaaa   = s8_x::shuffle(d, s8_x(15, 15, 15, 15, 11, 11, 11, 11, 7, 7, 7, 7, 3, 3, 3, 3));\r
         d                       = s8_x(u8_x::min(u8_x(d), u8_x(aaaa))); // overflow guard\r
  \r
-       auto xaxa       = s16_x(aaaa) & lomask;         \r
+       auto xaxa       = s16_x(aaaa) >> 8;             \r
                               \r
-       auto xrxb       = s & lomask;\r
-       auto t1         = s16_x::multiply_low(xrxb, xaxa) + round;    \r
-                       \r
-       auto xaxg       = s >> 8;\r
-       auto t2         = s16_x::multiply_low(xaxg, xaxa) + round;\r
+       auto t1         = s16_x::multiply_low(s16_x(s) & 0x00FF, xaxa) + 0x80;    \r
+       auto t2         = s16_x::multiply_low(s16_x(s) >> 8    , xaxa) + 0x80;\r
                 \r
         // C(S, D) = S + D - (((T >> 8) + T) >> 8);\r
-       auto rxbx       = s8_x(((t1 >> 8) + t1) >> 8);      \r
-       auto axgx       = s8_x((t2 >> 8) + t2);    \r
-       auto argb   = s8_x::blend(rxbx, axgx, s8_x(-1, 0, -1, 0));\r
+       auto xyxy       = s8_x(((t1 >> 8) + t1) >> 8);      \r
+       auto yxyx       = s8_x((t2 >> 8) + t2);    \r
+       auto argb   = s8_x::blend(xyxy, yxyx, s8_x(-1, 0, -1, 0));\r
  \r
         return s8_x(s) + (d - argb);\r
  }\r
diff --git a/accelerator/cpu/util/xmm.h b/accelerator/cpu/util/xmm.h

index 662a62eeea0509eebd14cf27f4bde3c8d0f048d1..5e9cd0565fdb45bdc64d573bf84a0fa0132a1cac 100644 (file)
--- a/accelerator/cpu/util/xmm.h
+++ b/accelerator/cpu/util/xmm.h
@@ -49,9 +49,9 @@ public:
         typedef s32_x xmm_epi_tag;\r
  \r
         s32_x();\r
-       s32_x(const s16_x& other);\r
-       s32_x(const s8_x& other);\r
-       s32_x(const u8_x& other);\r
+       explicit s32_x(const s16_x& other);\r
+       explicit s32_x(const s8_x& other);\r
+       explicit s32_x(const u8_x& other);\r
         s32_x(const __m128i& value);\r
  \r
         s32_x& operator>>=(int count);\r
@@ -75,9 +75,9 @@ public:
         typedef s16_x xmm_epi_tag;\r
  \r
         s16_x();\r
-       s16_x(const s32_x& other);\r
-       s16_x(const s8_x& other);\r
-       s16_x(const u8_x& other);\r
+       explicit s16_x(const s32_x& other);\r
+       explicit s16_x(const s8_x& other);\r
+       explicit s16_x(const u8_x& other);\r
         s16_x(const __m128i& value);\r
         s16_x(short value);\r
  \r
@@ -104,6 +104,14 @@ public:
         static s16_x min(const s16_x& lhs, const s16_x& rhs);\r
  };\r
  \r
+template<typename T>\r
+class base8_x : public base_x<s8_x>\r
+{\r
+                                                                       \r
+       char operator[](int index) const;\r
+       char& operator[](int index);\r
+};\r
+\r
  class s8_x : public base_x<s8_x>\r
  {\r
         __m128i value_;\r
@@ -116,9 +124,9 @@ public:
         typedef s8_x xmm_epi_tag;\r
  \r
         s8_x();\r
-       s8_x(const s32_x& other);\r
-       s8_x(const s16_x& other);\r
-       s8_x(const u8_x& other);\r
+       explicit s8_x(const s32_x& other);\r
+       explicit s8_x(const s16_x& other);\r
+       explicit s8_x(const u8_x& other);\r
         s8_x(const __m128i& value);     \r
         s8_x(char b);\r
         s8_x(char b3,  char b2,  char b1,  char b0);\r
@@ -128,18 +136,18 @@ public:
                  char b3,  char b2,  char b1,  char b0);\r
  \r
         s8_x& operator+=(const s8_x& other);\r
-       s8_x& operator-=(const s8_x& other);                                                                    \r
+       s8_x& operator-=(const s8_x& other);    \r
         char operator[](int index) const;\r
         char& operator[](int index);\r
         \r
         static s8_x upack(const s16_x& lhs, const s16_x& rhs);\r
  \r
-       static s16_x multiply_add(const s8_x& lhs, const s8_x& rhs);\r
-       static s8_x shuffle(const s8_x& lhs, const s8_x& rhs);\r
+       static s16_x multiply_add(const u8_x& lhs, const s8_x& rhs);\r
         static s8_x max(const s8_x& lhs, const s8_x& rhs);\r
         static s8_x min(const s8_x& lhs, const s8_x& rhs);\r
+\r
+       static s8_x shuffle(const s8_x& lhs, const s8_x& rhs);\r
         static s8_x blend(const s8_x& lhs, const s8_x& rhs, const s8_x& mask);\r
-       static s8_x zero();\r
  };\r
  \r
  class u8_x : public base_x<u8_x>\r
@@ -154,9 +162,9 @@ public:
         typedef u8_x xmm_epu_tag;\r
  \r
         u8_x();\r
-       u8_x(const s32_x& other);\r
-       u8_x(const s16_x& other);\r
-       u8_x(const s8_x& other);\r
+       explicit u8_x(const s32_x& other);\r
+       explicit u8_x(const s16_x& other);\r
+       explicit u8_x(const s8_x& other);\r
         u8_x(const __m128i& value);     \r
         u8_x(char b);\r
         u8_x(char b3,  char b2,  char b1,  char b0);\r
@@ -170,6 +178,9 @@ public:
                         \r
         static u8_x max(const u8_x& lhs, const u8_x& rhs);\r
         static u8_x min(const u8_x& lhs, const u8_x& rhs);\r
+               \r
+       static u8_x shuffle(const u8_x& lhs, const u8_x& rhs);\r
+       static u8_x blend(const u8_x& lhs, const u8_x& rhs, const u8_x& mask);\r
  };\r
  \r
  // base_x\r
@@ -528,15 +539,10 @@ s8_x s8_x::upack(const s16_x& lhs, const s16_x& rhs)
         return _mm_packus_epi16(lhs.value_, rhs.value_);\r
  }\r
  \r
-s16_x s8_x::multiply_add(const s8_x& lhs, const s8_x& rhs)\r
+s16_x s8_x::multiply_add(const u8_x& lhs, const s8_x& rhs)\r
  {              \r
         return _mm_maddubs_epi16(lhs.value_, rhs.value_);\r
  }\r
-\r
-s8_x s8_x::shuffle(const s8_x& lhs, const s8_x& rhs)\r
-{              \r
-       return _mm_shuffle_epi8(lhs.value_, rhs.value_);\r
-}\r
         \r
  s8_x s8_x::max(const s8_x& lhs, const s8_x& rhs)\r
  {              \r
@@ -547,11 +553,6 @@ s8_x s8_x::min(const s8_x& lhs, const s8_x& rhs)
  {              \r
         return _mm_min_epi8(lhs.value_, rhs.value_);\r
  }\r
-       \r
-s8_x s8_x::blend(const s8_x& lhs, const s8_x& rhs, const s8_x& mask)\r
-{              \r
-       return _mm_blendv_epi8(lhs.value_, rhs.value_, mask.value_);\r
-}\r
  \r
  inline s8_x operator+(const s8_x& lhs, const s8_x& rhs)\r
  {\r
@@ -562,6 +563,16 @@ inline s8_x operator-(const s8_x& lhs, const s8_x& rhs)
  {\r
         return s8_x(lhs) -= rhs;\r
  }\r
+       \r
+s8_x s8_x::shuffle(const s8_x& lhs, const s8_x& rhs)\r
+{              \r
+       return _mm_shuffle_epi8(lhs.value_, rhs.value_);\r
+}\r
+\r
+s8_x s8_x::blend(const s8_x& lhs, const s8_x& rhs, const s8_x& mask)\r
+{              \r
+       return _mm_blendv_epi8(lhs.value_, rhs.value_, mask.value_);\r
+}\r
  \r
  // u8_x\r
  \r
@@ -627,6 +638,15 @@ u8_x u8_x::min(const u8_x& lhs, const u8_x& rhs)
         return _mm_min_epu8(lhs.value_, rhs.value_);\r
  }\r
  \r
+u8_x u8_x::shuffle(const u8_x& lhs, const u8_x& rhs)\r
+{              \r
+       return _mm_shuffle_epi8(lhs.value_, rhs.value_);\r
+}\r
+\r
+u8_x u8_x::blend(const u8_x& lhs, const u8_x& rhs, const u8_x& mask)\r
+{              \r
+       return _mm_blendv_epi8(lhs.value_, rhs.value_, mask.value_);\r
+}\r
  \r
  // xmm_cast\r
  \r
author	ronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
	Fri, 10 Feb 2012 12:28:21 +0000 (12:28 +0000)
committer	ronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
	Fri, 10 Feb 2012 12:28:21 +0000 (12:28 +0000)
accelerator/cpu/image/image_mixer.cpp		patch \| blob \| history
accelerator/cpu/util/xmm.h		patch \| blob \| history