#include "image_mixer.h"\r
\r
#include "../util/write_frame.h"\r
-#include "../util/simd.h"\r
+#include "../util/xmm.h"\r
\r
#include <common/assert.h>\r
#include <common/gl/gl_check.h>\r
return !(lhs == rhs);\r
}\r
\r
-inline xmm_epi8 blend(xmm_epi8 dest, xmm_epi8 source)\r
+inline xmm::s8_x blend(xmm::s8_x dest, xmm::s8_x source)\r
{ \r
- auto s = xmm_cast<xmm_epi16>(source);\r
+ using namespace xmm;\r
+\r
+ auto s = s16_x(source);\r
auto d = dest;\r
\r
- const xmm_epi16 round = 128;\r
- const xmm_epi16 lomask = 0x00FF;\r
+ const s16_x round = 128;\r
+ const s16_x lomask = 0x00FF;\r
\r
// T(S, D) = S * D[A] + 0x80\r
- auto aaaa = xmm_epi8::shuffle(d, xmm_epi8(15, 15, 15, 15, 11, 11, 11, 11, 7, 7, 7, 7, 3, 3, 3, 3));\r
- d = xmm_epi8::umin(d, aaaa); // overflow guard\r
+ auto aaaa = s8_x::shuffle(d, s8_x(15, 15, 15, 15, 11, 11, 11, 11, 7, 7, 7, 7, 3, 3, 3, 3));\r
+ d = s8_x(u8_x::min(u8_x(d), u8_x(aaaa))); // overflow guard\r
\r
- auto xaxa = xmm_cast<xmm_epi16>(aaaa) & lomask; \r
+ auto xaxa = s16_x(aaaa) & lomask; \r
\r
auto xrxb = s & lomask;\r
- auto t1 = xmm_epi16::multiply_low(xrxb, xaxa) + round; \r
+ auto t1 = s16_x::multiply_low(xrxb, xaxa) + round; \r
\r
auto xaxg = s >> 8;\r
- auto t2 = xmm_epi16::multiply_low(xaxg, xaxa) + round;\r
+ auto t2 = s16_x::multiply_low(xaxg, xaxa) + round;\r
\r
// C(S, D) = S + D - (((T >> 8) + T) >> 8);\r
- auto rxbx = xmm_cast<xmm_epi8>(((t1 >> 8) + t1) >> 8); \r
- auto axgx = xmm_cast<xmm_epi8>((t2 >> 8) + t2); \r
- auto argb = xmm_epi8::blend(rxbx, axgx, xmm_epi8(-1, 0, -1, 0));\r
+ auto rxbx = s8_x(((t1 >> 8) + t1) >> 8); \r
+ auto axgx = s8_x((t2 >> 8) + t2); \r
+ auto argb = s8_x::blend(rxbx, axgx, s8_x(-1, 0, -1, 0));\r
\r
- return xmm_cast<xmm_epi8>(s) + (d - argb);\r
+ return s8_x(s) + (d - argb);\r
}\r
\r
-template<typename write_op>\r
+template<typename write_tag>\r
static void kernel(uint8_t* dest, const uint8_t* source, size_t count, const core::frame_transform& transform)\r
-{ \r
+{ \r
+ using namespace xmm;\r
+\r
for(auto n = 0; n < count; n += 32) \r
{\r
- auto s0 = xmm_epi8::load(dest+n+0);\r
- auto s1 = xmm_epi8::load(dest+n+16);\r
+ auto s0 = s8_x::load(dest+n+0);\r
+ auto s1 = s8_x::load(dest+n+16);\r
\r
- auto d0 = xmm_epi8::load(source+n+0);\r
- auto d1 = xmm_epi8::load(source+n+16);\r
+ auto d0 = s8_x::load(source+n+0);\r
+ auto d1 = s8_x::load(source+n+16);\r
\r
auto argb0 = blend(d0, s0);\r
auto argb1 = blend(d1, s1);\r
\r
- xmm_epi8::write<write_op>(argb0, dest+n+0);\r
- xmm_epi8::write<write_op>(argb1, dest+n+16);\r
+ s8_x::write(argb0, dest+n+0 , write_tag());\r
+ s8_x::write(argb1, dest+n+16, write_tag());\r
} \r
}\r
\r
\r
auto it = items.begin();\r
for(; it != items.end()-1; ++it) \r
- kernel<store_write>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
+ kernel<xmm::store_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
\r
- kernel<stream_write>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
+ kernel<xmm::stream_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
}\r
});\r
}\r