return !(lhs == rhs);\r
}\r
\r
-inline xmm::s8_x blend(xmm::s8_x dest, xmm::s8_x source)\r
+inline xmm::s8_x blend(xmm::s8_x d, xmm::s8_x s)\r
{ \r
using namespace xmm;\r
-\r
- auto s = s16_x(source);\r
- auto d = dest;\r
-\r
- const s16_x round = 128;\r
- const s16_x lomask = 0x00FF;\r
-\r
+ \r
// T(S, D) = S * D[A] + 0x80\r
auto aaaa = s8_x::shuffle(d, s8_x(15, 15, 15, 15, 11, 11, 11, 11, 7, 7, 7, 7, 3, 3, 3, 3));\r
d = s8_x(u8_x::min(u8_x(d), u8_x(aaaa))); // overflow guard\r
\r
- auto xaxa = s16_x(aaaa) & lomask; \r
+ auto xaxa = s16_x(aaaa) >> 8; \r
\r
- auto xrxb = s & lomask;\r
- auto t1 = s16_x::multiply_low(xrxb, xaxa) + round; \r
- \r
- auto xaxg = s >> 8;\r
- auto t2 = s16_x::multiply_low(xaxg, xaxa) + round;\r
+ auto t1 = s16_x::multiply_low(s16_x(s) & 0x00FF, xaxa) + 0x80; \r
+ auto t2 = s16_x::multiply_low(s16_x(s) >> 8 , xaxa) + 0x80;\r
\r
// C(S, D) = S + D - (((T >> 8) + T) >> 8);\r
- auto rxbx = s8_x(((t1 >> 8) + t1) >> 8); \r
- auto axgx = s8_x((t2 >> 8) + t2); \r
- auto argb = s8_x::blend(rxbx, axgx, s8_x(-1, 0, -1, 0));\r
+ auto xyxy = s8_x(((t1 >> 8) + t1) >> 8); \r
+ auto yxyx = s8_x((t2 >> 8) + t2); \r
+ auto argb = s8_x::blend(xyxy, yxyx, s8_x(-1, 0, -1, 0));\r
\r
return s8_x(s) + (d - argb);\r
}\r
typedef s32_x xmm_epi_tag;\r
\r
s32_x();\r
- s32_x(const s16_x& other);\r
- s32_x(const s8_x& other);\r
- s32_x(const u8_x& other);\r
+ explicit s32_x(const s16_x& other);\r
+ explicit s32_x(const s8_x& other);\r
+ explicit s32_x(const u8_x& other);\r
s32_x(const __m128i& value);\r
\r
s32_x& operator>>=(int count);\r
typedef s16_x xmm_epi_tag;\r
\r
s16_x();\r
- s16_x(const s32_x& other);\r
- s16_x(const s8_x& other);\r
- s16_x(const u8_x& other);\r
+ explicit s16_x(const s32_x& other);\r
+ explicit s16_x(const s8_x& other);\r
+ explicit s16_x(const u8_x& other);\r
s16_x(const __m128i& value);\r
s16_x(short value);\r
\r
static s16_x min(const s16_x& lhs, const s16_x& rhs);\r
};\r
\r
+template<typename T>\r
+class base8_x : public base_x<s8_x>\r
+{\r
+ \r
+ char operator[](int index) const;\r
+ char& operator[](int index);\r
+};\r
+\r
class s8_x : public base_x<s8_x>\r
{\r
__m128i value_;\r
typedef s8_x xmm_epi_tag;\r
\r
s8_x();\r
- s8_x(const s32_x& other);\r
- s8_x(const s16_x& other);\r
- s8_x(const u8_x& other);\r
+ explicit s8_x(const s32_x& other);\r
+ explicit s8_x(const s16_x& other);\r
+ explicit s8_x(const u8_x& other);\r
s8_x(const __m128i& value); \r
s8_x(char b);\r
s8_x(char b3, char b2, char b1, char b0);\r
char b3, char b2, char b1, char b0);\r
\r
s8_x& operator+=(const s8_x& other);\r
- s8_x& operator-=(const s8_x& other); \r
+ s8_x& operator-=(const s8_x& other); \r
char operator[](int index) const;\r
char& operator[](int index);\r
\r
static s8_x upack(const s16_x& lhs, const s16_x& rhs);\r
\r
- static s16_x multiply_add(const s8_x& lhs, const s8_x& rhs);\r
- static s8_x shuffle(const s8_x& lhs, const s8_x& rhs);\r
+ static s16_x multiply_add(const u8_x& lhs, const s8_x& rhs);\r
static s8_x max(const s8_x& lhs, const s8_x& rhs);\r
static s8_x min(const s8_x& lhs, const s8_x& rhs);\r
+\r
+ static s8_x shuffle(const s8_x& lhs, const s8_x& rhs);\r
static s8_x blend(const s8_x& lhs, const s8_x& rhs, const s8_x& mask);\r
- static s8_x zero();\r
};\r
\r
class u8_x : public base_x<u8_x>\r
typedef u8_x xmm_epu_tag;\r
\r
u8_x();\r
- u8_x(const s32_x& other);\r
- u8_x(const s16_x& other);\r
- u8_x(const s8_x& other);\r
+ explicit u8_x(const s32_x& other);\r
+ explicit u8_x(const s16_x& other);\r
+ explicit u8_x(const s8_x& other);\r
u8_x(const __m128i& value); \r
u8_x(char b);\r
u8_x(char b3, char b2, char b1, char b0);\r
\r
static u8_x max(const u8_x& lhs, const u8_x& rhs);\r
static u8_x min(const u8_x& lhs, const u8_x& rhs);\r
+ \r
+ static u8_x shuffle(const u8_x& lhs, const u8_x& rhs);\r
+ static u8_x blend(const u8_x& lhs, const u8_x& rhs, const u8_x& mask);\r
};\r
\r
// base_x\r
return _mm_packus_epi16(lhs.value_, rhs.value_);\r
}\r
\r
-s16_x s8_x::multiply_add(const s8_x& lhs, const s8_x& rhs)\r
+s16_x s8_x::multiply_add(const u8_x& lhs, const s8_x& rhs)\r
{ \r
return _mm_maddubs_epi16(lhs.value_, rhs.value_);\r
}\r
-\r
-s8_x s8_x::shuffle(const s8_x& lhs, const s8_x& rhs)\r
-{ \r
- return _mm_shuffle_epi8(lhs.value_, rhs.value_);\r
-}\r
\r
s8_x s8_x::max(const s8_x& lhs, const s8_x& rhs)\r
{ \r
{ \r
return _mm_min_epi8(lhs.value_, rhs.value_);\r
}\r
- \r
-s8_x s8_x::blend(const s8_x& lhs, const s8_x& rhs, const s8_x& mask)\r
-{ \r
- return _mm_blendv_epi8(lhs.value_, rhs.value_, mask.value_);\r
-}\r
\r
inline s8_x operator+(const s8_x& lhs, const s8_x& rhs)\r
{\r
{\r
return s8_x(lhs) -= rhs;\r
}\r
+ \r
+s8_x s8_x::shuffle(const s8_x& lhs, const s8_x& rhs)\r
+{ \r
+ return _mm_shuffle_epi8(lhs.value_, rhs.value_);\r
+}\r
+\r
+s8_x s8_x::blend(const s8_x& lhs, const s8_x& rhs, const s8_x& mask)\r
+{ \r
+ return _mm_blendv_epi8(lhs.value_, rhs.value_, mask.value_);\r
+}\r
\r
// u8_x\r
\r
return _mm_min_epu8(lhs.value_, rhs.value_);\r
}\r
\r
+u8_x u8_x::shuffle(const u8_x& lhs, const u8_x& rhs)\r
+{ \r
+ return _mm_shuffle_epi8(lhs.value_, rhs.value_);\r
+}\r
+\r
+u8_x u8_x::blend(const u8_x& lhs, const u8_x& rhs, const u8_x& mask)\r
+{ \r
+ return _mm_blendv_epi8(lhs.value_, rhs.value_, mask.value_);\r
+}\r
\r
// xmm_cast\r
\r