#include "tbb_avcodec.h"\r
\r
#include <common/log/log.h>\r
+#include <common/env.h>\r
+#include <common/utility/assert.h>\r
\r
#include <tbb/task.h>\r
#include <tbb/atomic.h>\r
+#include <tbb/parallel_for.h>\r
+#include <tbb/tbb_thread.h>\r
\r
+#if defined(_MSC_VER)\r
+#pragma warning (push)\r
+#pragma warning (disable : 4244)\r
+#endif\r
extern "C" \r
{\r
#define __STDC_CONSTANT_MACROS\r
#define __STDC_LIMIT_MACROS\r
#include <libavformat/avformat.h>\r
}\r
+#if defined(_MSC_VER)\r
+#pragma warning (pop)\r
+#endif\r
\r
-namespace caspar {\r
-\r
-struct thread_context{};\r
-\r
-int task_execute(AVCodecContext* s, const std::function<int(void* arg, int arg_size, int jobnr, int threadnr)>& func, void* arg, int* ret, int count, int size)\r
-{ \r
- // jobnr global for all threads? Doesn't order matter?\r
- tbb::atomic<int> counter;\r
- counter = 0;\r
+namespace caspar { namespace ffmpeg {\r
\r
- // Execute s->thread_count number of tasks in parallel.\r
- tbb::parallel_for(0, s->thread_count, 1, [&](int threadnr) \r
+int thread_execute(AVCodecContext* s, int (*func)(AVCodecContext *c2, void *arg2), void* arg, int* ret, int count, int size)\r
+{\r
+ tbb::parallel_for(tbb::blocked_range<size_t>(0, count), [&](const tbb::blocked_range<size_t>& r)\r
{\r
- while(true)\r
+ for(size_t n = r.begin(); n != r.end(); ++n) \r
{\r
- int jobnr = counter++;\r
- if(jobnr >= count)\r
- break;\r
-\r
- int r = func(arg, size, jobnr, threadnr);\r
- if (ret)\r
- ret[jobnr] = r;\r
+ int r = func(s, reinterpret_cast<uint8_t*>(arg) + n*size);\r
+ if(ret)\r
+ ret[n] = r;\r
}\r
});\r
- \r
- return 0;\r
-}\r
- \r
-int thread_execute(AVCodecContext* s, int (*func)(AVCodecContext *c2, void *arg2), void* arg, int* ret, int count, int size)\r
-{\r
- return task_execute(s, [&](void* arg, int arg_size, int jobnr, int threadnr) -> int\r
- {\r
- return func(s, reinterpret_cast<uint8_t*>(arg) + jobnr*size);\r
- }, arg, ret, count, size);\r
+\r
+ return 0;\r
}\r
\r
int thread_execute2(AVCodecContext* s, int (*func)(AVCodecContext* c2, void* arg2, int, int), void* arg, int* ret, int count)\r
-{\r
- return task_execute(s, [&](void* arg, int arg_size, int jobnr, int threadnr) -> int\r
- {\r
- return func(s, arg, jobnr, threadnr);\r
- }, arg, ret, count, 0);\r
+{ \r
+ tbb::atomic<int> counter; \r
+ counter = 0; \r
+\r
+ CASPAR_ASSERT(tbb::tbb_thread::hardware_concurrency() < 16);\r
+ // Note: this will probably only work when tbb::task_scheduler_init::num_threads() < 16.\r
+ tbb::parallel_for(tbb::blocked_range<int>(0, count, 2), [&](const tbb::blocked_range<int> &r) \r
+ { \r
+ int threadnr = counter++; \r
+ for(int jobnr = r.begin(); jobnr != r.end(); ++jobnr)\r
+ { \r
+ int r = func(s, arg, jobnr, threadnr); \r
+ if (ret) \r
+ ret[jobnr] = r; \r
+ }\r
+ --counter;\r
+ }); \r
+\r
+ return 0; \r
}\r
\r
-int thread_init(AVCodecContext *s)\r
+void thread_init(AVCodecContext* s)\r
{\r
- // Only makes sense for slicing since decode is already called through task scheduler.\r
- if(!(s->thread_type & FF_THREAD_SLICE)) \r
- return 0; \r
-\r
static const size_t MAX_THREADS = 16; // See mpegvideo.h\r
+ static int dummy_opaque;\r
\r
s->active_thread_type = FF_THREAD_SLICE;\r
- s->thread_opaque = malloc(sizeof(thread_context)); \r
+ s->thread_opaque = &dummy_opaque; \r
s->execute = thread_execute;\r
s->execute2 = thread_execute2;\r
- s->thread_count = MAX_THREADS; // We are using a taskscheduler, so use as many "threads/tasks" as possible. \r
+ s->thread_count = MAX_THREADS; // We are using a task-scheduler, so use as many "threads/tasks" as possible. \r
\r
CASPAR_LOG(info) << "Initialized ffmpeg tbb context.";\r
-\r
- return 0;\r
}\r
\r
void thread_free(AVCodecContext* s)\r
{\r
if(!s->thread_opaque)\r
return;\r
- \r
- free(s->thread_opaque); \r
- s->thread_opaque = nullptr;\r
\r
+ s->thread_opaque = nullptr;\r
+ \r
CASPAR_LOG(info) << "Released ffmpeg tbb context.";\r
}\r
\r
int tbb_avcodec_open(AVCodecContext* avctx, AVCodec* codec)\r
{\r
- thread_init(avctx);\r
- // ff_thread_init will not be executed since thread_opaque != nullptr.\r
+ avctx->thread_count = 1;\r
+ // Some codecs don't like to have multiple multithreaded decoding instances. Only enable for those we know work.\r
+ if((codec->id == CODEC_ID_MPEG2VIDEO) && \r
+ (codec->capabilities & CODEC_CAP_SLICE_THREADS) && \r
+ (avctx->thread_type & FF_THREAD_SLICE))\r
+ {\r
+ thread_init(avctx);\r
+ } \r
+ // ff_thread_init will not be executed since thread_opaque != nullptr || thread_count == 1.\r
return avcodec_open(avctx, codec); \r
}\r
\r
return avcodec_close(avctx); \r
}\r
\r
-}
\ No newline at end of file
+}}
\ No newline at end of file