2.1.0: array: Added "cacheable" property.

[casparcg] / accelerator / ogl / util / device.cpp
diff --git a/accelerator/ogl/util/device.cpp b/accelerator/ogl/util/device.cpp

index 5df73f3f0f2f20e874d7e2a95cbed38b1097e402..706e6119a8c18721a1668606f9fc70d1c4adfc7f 100644 (file)
--- a/accelerator/ogl/util/device.cpp
+++ b/accelerator/ogl/util/device.cpp
@@ -25,56 +25,64 @@
  \r
  #include "device.h"\r
  \r
+#include "buffer.h"\r
+#include "texture.h"\r
  #include "shader.h"\r
  \r
  #include <common/assert.h>\r
  #include <common/except.h>\r
+#include <common/concurrency/async.h>\r
+#include <common/memory/array.h>\r
  #include <common/gl/gl_check.h>\r
+#include <common/os/windows/windows.h>\r
+\r
  \r
  #include <boost/foreach.hpp>\r
  \r
  #include <gl/glew.h>\r
  \r
-#include <windows.h>\r
-\r
  #include <SFML/Window/Context.hpp>\r
  \r
-#include <array>\r
-#include <unordered_map>\r
-\r
  #include <tbb/concurrent_unordered_map.h>\r
+#include <tbb/concurrent_hash_map.h>\r
  #include <tbb/concurrent_queue.h>\r
-#include <tbb/atomic.h>\r
+\r
+#include <boost/utility/declval.hpp>\r
+\r
+#include <array>\r
+#include <unordered_map>\r
  \r
  tbb::atomic<int> g_count = tbb::atomic<int>();\r
  \r
  namespace caspar { namespace accelerator { namespace ogl {\r
                 \r
  struct device::impl : public std::enable_shared_from_this<impl>\r
-{\r
-       std::map<host_buffer*, spl::shared_ptr<device_buffer>> write_buffer_transfer_cache_;\r
+{      \r
+       static_assert(std::is_same<decltype(boost::declval<device>().impl_), spl::shared_ptr<impl>>::value, "impl_ must be shared_ptr");\r
+\r
+       tbb::concurrent_hash_map<buffer*, std::shared_ptr<texture>> texture_mapping_;\r
  \r
         std::unique_ptr<sf::Context> device_;\r
         std::unique_ptr<sf::Context> host_alloc_device_;\r
         \r
-       std::array<tbb::concurrent_unordered_map<int, tbb::concurrent_bounded_queue<std::shared_ptr<device_buffer>>>, 4>        device_pools_;\r
-       std::array<tbb::concurrent_unordered_map<int, tbb::concurrent_bounded_queue<std::shared_ptr<host_buffer>>>, 2>          host_pools_;\r
+       std::array<tbb::concurrent_unordered_map<int, tbb::concurrent_bounded_queue<std::shared_ptr<texture>>>, 4>      device_pools_;\r
+       std::array<tbb::concurrent_unordered_map<int, tbb::concurrent_bounded_queue<std::shared_ptr<buffer>>>, 2>       host_pools_;\r
         \r
         GLuint fbo_;\r
  \r
-       executor& executor_;\r
-       executor  host_alloc_executor_;\r
+       executor& render_executor_;\r
+       executor  alloc_executor_;\r
                                 \r
         impl(executor& executor) \r
-               : executor_(executor)\r
-               , host_alloc_executor_(L"OpenGL allocation device")\r
+               : render_executor_(executor)\r
+               , alloc_executor_(L"OpenGL allocation context.")\r
         {\r
                 if(g_count++ > 1)\r
                         CASPAR_LOG(warning) << L"Multiple OGL devices.";\r
  \r
                 CASPAR_LOG(info) << L"Initializing OpenGL Device.";\r
                 \r
-               auto ctx1 = executor_.invoke([=]() -> HGLRC \r
+               auto ctx1 = render_executor_.invoke([=]() -> HGLRC \r
                 {\r
                         device_.reset(new sf::Context());\r
                         device_->SetActive(true);               \r
@@ -95,7 +103,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
                         return ctx1;\r
                 });\r
  \r
-               host_alloc_executor_.invoke([=]\r
+               alloc_executor_.invoke([=]\r
                 {\r
                         host_alloc_device_.reset(new sf::Context());\r
                         host_alloc_device_->SetActive(true);    \r
@@ -105,7 +113,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
                                 BOOST_THROW_EXCEPTION(gl::ogl_exception() << msg_info("Failed to share OpenGL devices."));\r
                 });\r
  \r
-               executor_.invoke([=]\r
+               render_executor_.invoke([=]\r
                 {               \r
                         device_->SetActive(true);\r
                 });\r
@@ -115,14 +123,14 @@ struct device::impl : public std::enable_shared_from_this<impl>
  \r
         ~impl()\r
         {\r
-               host_alloc_executor_.invoke([=]\r
+               alloc_executor_.invoke([=]\r
                 {\r
                         host_alloc_device_.reset();\r
                         BOOST_FOREACH(auto& pool, host_pools_)\r
                                 pool.clear();\r
                 });\r
  \r
-               executor_.invoke([=]\r
+               render_executor_.invoke([=]\r
                 {\r
                         BOOST_FOREACH(auto& pool, device_pools_)\r
                                 pool.clear();\r
@@ -131,135 +139,131 @@ struct device::impl : public std::enable_shared_from_this<impl>
                         device_.reset();\r
                 });\r
         }\r
-\r
-       spl::shared_ptr<device_buffer> allocate_device_buffer(int width, int height, int stride)\r
-       {\r
-               return executor_.invoke([&]() -> spl::shared_ptr<device_buffer>\r
+               \r
+       std::wstring version()\r
+       {       \r
+               try\r
                 {\r
-                       try\r
-                       {\r
-                               return spl::make_shared<device_buffer>(width, height, stride);\r
-                       }\r
-                       catch(...)\r
+                       return alloc_executor_.invoke([]\r
                         {\r
-                               CASPAR_LOG(error) << L"ogl: create_device_buffer failed!";\r
-                               throw;\r
-                       }\r
-               });\r
+                               return u16(reinterpret_cast<const char*>(GL2(glGetString(GL_VERSION)))) + L" " + u16(reinterpret_cast<const char*>(GL2(glGetString(GL_VENDOR))));\r
+                       });     \r
+               }\r
+               catch(...)\r
+               {\r
+                       return L"Not found";;\r
+               }\r
         }\r
-                               \r
-       spl::shared_ptr<device_buffer> create_device_buffer(int width, int height, int stride)\r
+                                                       \r
+       spl::shared_ptr<texture> create_texture(int width, int height, int stride)\r
         {\r
                 CASPAR_VERIFY(stride > 0 && stride < 5);\r
                 CASPAR_VERIFY(width > 0 && height > 0);\r
                 \r
                 auto pool = &device_pools_[stride-1][((width << 16) & 0xFFFF0000) | (height & 0x0000FFFF)];\r
                 \r
-               std::shared_ptr<device_buffer> buffer;\r
+               std::shared_ptr<texture> buffer;\r
                 if(!pool->try_pop(buffer))              \r
-                       buffer = allocate_device_buffer(width, height, stride);         \r
+                       buffer = spl::make_shared<texture>(width, height, stride);\r
         \r
-               auto self = shared_from_this();\r
-               return spl::shared_ptr<device_buffer>(buffer.get(), [self, buffer, pool](device_buffer*) mutable\r
+               return spl::shared_ptr<texture>(buffer.get(), [buffer, pool](texture*) mutable\r
                 {               \r
                         pool->push(buffer);     \r
                 });\r
         }\r
-\r
-       spl::shared_ptr<host_buffer> allocate_host_buffer(int size, host_buffer::usage usage)\r
-       {\r
-               return host_alloc_executor_.invoke([=]() -> spl::shared_ptr<host_buffer>\r
-               {\r
-                       try\r
-                       {\r
-                               auto buffer = spl::make_shared<host_buffer>(size, usage);\r
-                               if(usage == host_buffer::usage::write_only)\r
-                                       buffer->map();\r
-                               else\r
-                                       buffer->unmap();        \r
-\r
-                               return buffer;\r
-                       }\r
-                       catch(...)\r
-                       {\r
-                               CASPAR_LOG(error) << L"ogl: create_host_buffer failed!";\r
-                               throw;  \r
-                       }\r
-               });\r
-       }\r
-       \r
-       spl::shared_ptr<host_buffer> create_host_buffer(int size, host_buffer::usage usage)\r
+               \r
+       spl::shared_ptr<buffer> create_buffer(int size, buffer::usage usage)\r
         {\r
-               CASPAR_VERIFY(usage == host_buffer::usage::write_only || usage == host_buffer::usage::read_only);\r
                 CASPAR_VERIFY(size > 0);\r
                 \r
                 auto pool = &host_pools_[usage.value()][size];\r
                 \r
-               std::shared_ptr<host_buffer> buffer;\r
-               if(!pool->try_pop(buffer))      \r
-                       buffer = allocate_host_buffer(size, usage);     \r
-       \r
-               bool is_write = (usage == host_buffer::usage::write_only);\r
-\r
-               auto self = shared_from_this();\r
-               return spl::shared_ptr<host_buffer>(buffer.get(), [self, is_write, buffer, pool](host_buffer*) mutable\r
+               std::shared_ptr<buffer> buf;\r
+               if(!pool->try_pop(buf)) \r
                 {\r
-                       self->host_alloc_executor_.begin_invoke([=]() mutable\r
-                       {               \r
-                               if(is_write)                            \r
-                                       buffer->map();                          \r
-                               else\r
-                                       buffer->unmap();\r
+                       buf = alloc_executor_.invoke([&]\r
+                       {\r
+                               return spl::make_shared<buffer>(size, usage);\r
+                       });\r
+               }\r
+                               \r
+               auto ptr = buf->data();\r
+               auto self = shared_from_this(); // buffers can leave the device context, take a hold on life-time.\r
  \r
-                               pool->push(buffer);\r
-                       }, task_priority::high_priority);       \r
+               auto on_release = [self, buf, ptr, usage, pool]() mutable\r
+               {               \r
+                       if(usage == buffer::usage::write_only)                                  \r
+                               buf->map();                                     \r
+                       else\r
+                               buf->unmap();\r
  \r
-                       self->executor_.begin_invoke([=]\r
-                       {\r
-                               write_buffer_transfer_cache_.erase(buffer.get());                               \r
-                       }, task_priority::high_priority);       \r
+                       self->texture_mapping_.erase(buf.get());\r
+\r
+                       pool->push(buf);\r
+               };\r
+               \r
+               return spl::shared_ptr<buffer>(buf.get(), [=](buffer*) mutable\r
+               {\r
+                       self->alloc_executor_.begin_invoke(on_release); \r
                 });\r
         }\r
-               \r
-       std::wstring version()\r
-       {       \r
-               static std::wstring ver = L"Not found";\r
-               try\r
+\r
+       core::mutable_array create_array(int size)\r
+       {               \r
+               auto buf = create_buffer(size, buffer::usage::write_only);\r
+               return core::mutable_array(buf->data(), buf->size(), false, buf);\r
+       }\r
+\r
+       boost::unique_future<spl::shared_ptr<texture>> copy_async(const core::const_array& source, int width, int height, int stride)\r
+       {\r
+               auto buf = source.storage<spl::shared_ptr<buffer>>();\r
+                               \r
+               return render_executor_.begin_invoke([=]() -> spl::shared_ptr<texture>\r
                 {\r
-                       ver = u16(executor_.invoke([]{return std::string(reinterpret_cast<const char*>(glGetString(GL_VERSION)));})\r
-                       + " "   + executor_.invoke([]{return std::string(reinterpret_cast<const char*>(glGetString(GL_VENDOR)));}));                    \r
-               }\r
-               catch(...){}\r
+                       tbb::concurrent_hash_map<buffer*, std::shared_ptr<texture>>::const_accessor a;\r
+                       if(texture_mapping_.find(a, buf.get()))\r
+                               return spl::make_shared_ptr(a->second);\r
+\r
+                       auto texture = create_texture(width, height, stride);\r
+                       texture->copy_from(*buf);       \r
+\r
+                       texture_mapping_.insert(std::make_pair(buf.get(), texture));\r
+\r
+                       return texture;\r
  \r
-               return ver;\r
+               }, task_priority::high_priority);\r
         }\r
-                       \r
-       boost::unique_future<spl::shared_ptr<device_buffer>> copy_async(spl::shared_ptr<host_buffer>& source, int width, int height, int stride)\r
+\r
+       boost::unique_future<core::const_array> copy_async(const spl::shared_ptr<texture>& source)\r
         {\r
-               return executor_.begin_invoke([=]() -> spl::shared_ptr<device_buffer>\r
+               return flatten(render_executor_.begin_invoke([=]() -> boost::shared_future<core::const_array>\r
                 {\r
-                       auto buffer_it = write_buffer_transfer_cache_.find(source.get());\r
-                       if(buffer_it == write_buffer_transfer_cache_.end())\r
+                       auto buffer = create_buffer(source->size(), buffer::usage::read_only); \r
+                       source->copy_to(*buffer);       \r
+\r
+                       return make_shared(async(launch::deferred, [=]() mutable -> core::const_array\r
                         {\r
-                               auto result = create_device_buffer(width, height, stride);\r
-                               result->copy_from(*source);\r
-                               buffer_it = write_buffer_transfer_cache_.insert(std::make_pair(source.get(), result)).first;\r
-                       }\r
-                       return buffer_it->second;\r
-               }, task_priority::high_priority);\r
+                               const auto& buf = buffer.get();\r
+                               if(!buf->data())\r
+                                       alloc_executor_.invoke(std::bind(&buffer::map, std::ref(buf))); // Defer blocking "map" call until data is needed.\r
+\r
+                               return core::const_array(buf->data(), buf->size(), true, buffer);\r
+                       }));\r
+               }, task_priority::high_priority));\r
         }\r
  };\r
  \r
  device::device() \r
-       : executor_(L"device")\r
+       : executor_(L"OpenGL Rendering Context.")\r
         , impl_(new impl(executor_))\r
  {\r
  }\r
-       \r
-spl::shared_ptr<device_buffer>                                                 device::create_device_buffer(int width, int height, int stride){return impl_->create_device_buffer(width, height, stride);}\r
-spl::shared_ptr<host_buffer>                                                   device::create_host_buffer(int size, host_buffer::usage usage){return impl_->create_host_buffer(size, usage);}\r
-boost::unique_future<spl::shared_ptr<device_buffer>>   device::copy_async(spl::shared_ptr<host_buffer>& source, int width, int height, int stride){return impl_->copy_async(source, width, height, stride);}\r
-std::wstring                                                                                   device::version(){return impl_->version();}\r
+device::~device(){}    \r
+spl::shared_ptr<texture>                                                       device::create_texture(int width, int height, int stride){return impl_->create_texture(width, height, stride);}\r
+core::mutable_array                                                                    device::create_array(int size){return impl_->create_array(size);}\r
+boost::unique_future<spl::shared_ptr<texture>>         device::copy_async(const core::const_array& source, int width, int height, int stride){return impl_->copy_async(source, width, height, stride);}\r
+boost::unique_future<core::const_array>                                device::copy_async(const spl::shared_ptr<texture>& source){return impl_->copy_async(source);}\r
+std::wstring                                                                           device::version(){return impl_->version();}\r
  \r
  \r
  }}}\r