When uploading MJPEG data to VA-API, do it directly into the buffer.

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Sun, 17 Mar 2019 21:53:36 +0000 (22:53 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Sun, 17 Mar 2019 21:53:36 +0000 (22:53 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 17 Mar 2019 21:53:36 +0000 (22:53 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 17 Mar 2019 21:53:36 +0000 (22:53 +0100)
diff --git a/nageru/bmusb b/nageru/bmusb

index 5163d25c65c3028090db1aea6587ec2fb4cb823e..03e38890b599efe6ac906fdb70b43cda63f11d01 160000 (submodule)
--- a/nageru/bmusb
+++ b/nageru/bmusb
@@ -1 +1 @@
-Subproject commit 5163d25c65c3028090db1aea6587ec2fb4cb823e
+Subproject commit 03e38890b599efe6ac906fdb70b43cda63f11d01
diff --git a/nageru/decklink_capture.cpp b/nageru/decklink_capture.cpp

index a09aefaaec14661bafcc3bedc5555d054fe19038..f7016dce7c07f607455481c9d00a34e2d379ac99 100644 (file)
--- a/nageru/decklink_capture.cpp
+++ b/nageru/decklink_capture.cpp
@@ -252,7 +252,7 @@ HRESULT STDMETHODCALLTYPE DeckLinkCapture::VideoInputFrameArrived(
                         assert(stride == width * 2);
                 }
  
-               current_video_frame = video_frame_allocator->alloc_frame();
+               current_video_frame = video_frame_allocator->create_frame(width, height, stride);
                 if (current_video_frame.data != nullptr) {
                         const uint8_t *src;
                         video_frame->GetBytes((void **)&src);
diff --git a/nageru/mixer.cpp b/nageru/mixer.cpp

index bad1a5e166461737c3463684ccc427038f7c3269..e0aa8b1fe27783c685dde0c142a8be586dcb89cf 100644 (file)
--- a/nageru/mixer.cpp
+++ b/nageru/mixer.cpp
@@ -551,7 +551,7 @@ void Mixer::configure_card(unsigned card_index, CaptureInterface *capture, CardT
  
         card->capture->set_frame_callback(bind(&Mixer::bm_frame, this, card_index, _1, _2, _3, _4, _5, _6, _7));
         if (card->frame_allocator == nullptr) {
-               card->frame_allocator.reset(new PBOFrameAllocator(pixel_format, 8 << 20, global_flags.width, global_flags.height));  // 8 MB.
+               card->frame_allocator.reset(new PBOFrameAllocator(pixel_format, 8 << 20, global_flags.width, global_flags.height, card_index, mjpeg_encoder.get()));  // 8 MB.
         }
         card->capture->set_video_frame_allocator(card->frame_allocator.get());
         if (card->surface == nullptr) {
@@ -1081,12 +1081,12 @@ void Mixer::thread_func()
                                 new_frame->upload_func = nullptr;
                         }
  
-                       // Only bother doing MJPEG encoding if there are any connected clients
-                       // that want the stream. FIXME: We should also stop memcpy-ing if there are none!
-                       if (httpd.get_num_connected_multicam_clients() > 0) {
-                               auto stream_it = global_flags.card_to_mjpeg_stream_export.find(card_index);
-                               if (stream_it != global_flags.card_to_mjpeg_stream_export.end()) {
-                                       mjpeg_encoder->upload_frame(pts_int, stream_it->second, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset);
+                       if (new_frame->frame->data_copy != nullptr) {
+                               int mjpeg_card_index = mjpeg_encoder->get_mjpeg_stream_for_card(card_index);
+                               if (mjpeg_card_index == -1) {
+                                       mjpeg_encoder->finish_frame(new_frame->frame);
+                               } else {
+                                       mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset);
                                 }
                         }
                 }
diff --git a/nageru/mjpeg_encoder.cpp b/nageru/mjpeg_encoder.cpp

index 07e302c4e93148c2b9a83edfc1034e5465f1e8bc..9ae018f81059584621bb383b3f4d6b30c01df0f4 100644 (file)
--- a/nageru/mjpeg_encoder.cpp
+++ b/nageru/mjpeg_encoder.cpp
@@ -30,12 +30,6 @@ using namespace std;
  
  extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height);
  
-#define CHECK_VASTATUS(va_status, func)                                 \
-    if (va_status != VA_STATUS_SUCCESS) {                               \
-        fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
-        exit(1);                                                        \
-    }
-
  // From libjpeg (although it's of course identical between implementations).
  static const int jpeg_natural_order[DCTSIZE2] = {
          0,  1,  8, 16,  9,  2,  3, 10,
@@ -294,6 +288,37 @@ void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFram
         any_frames_to_be_encoded.notify_all();
  }
  
+void MJPEGEncoder::finish_frame(RefCountedFrame frame)
+{
+       PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
+
+       if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
+               VAResources resources __attribute__((unused)) = move(userdata->va_resources);
+               ReleaseVAResources release = move(userdata->va_resources_release);
+               VAImage image = move(userdata->va_image);
+
+               VAStatus va_status = vaUnmapBuffer(va_dpy->va_dpy, image.buf);
+               CHECK_VASTATUS(va_status, "vaUnmapBuffer");
+               va_status = vaDestroyImage(va_dpy->va_dpy, image.image_id);
+               CHECK_VASTATUS(va_status, "vaDestroyImage");
+       }
+}
+
+int MJPEGEncoder::get_mjpeg_stream_for_card(unsigned card_index)
+{
+       // Only bother doing MJPEG encoding if there are any connected clients
+       // that want the stream.
+       if (httpd->get_num_connected_multicam_clients() == 0) {
+               return -1;
+       }
+
+       auto it = global_flags.card_to_mjpeg_stream_export.find(card_index);
+       if (it == global_flags.card_to_mjpeg_stream_export.end()) {
+               return -1;
+       }
+       return it->second;
+}
+
  void MJPEGEncoder::encoder_thread_func()
  {
         pthread_setname_np(pthread_self(), "MJPEG_Encode");
@@ -597,11 +622,20 @@ MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_resolution(unsigned width, un
  
  void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf)
  {
+       PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
         unsigned width = qf.video_format.width;
         unsigned height = qf.video_format.height;
  
-       VAResources resources = get_va_resources(width, height);
-       ReleaseVAResources release(this, resources);
+       VAResources resources;
+       ReleaseVAResources release;
+       if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
+               resources = move(userdata->va_resources);
+               release = move(userdata->va_resources_release);
+       } else {
+               assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
+               resources = get_va_resources(width, height);
+               release = ReleaseVAResources(this, resources);
+       }
  
         VAData va_data = get_va_data_for_resolution(width, height);
         va_data.pic_param.coded_buf = resources.data_buffer;
@@ -627,20 +661,27 @@ void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf)
         VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
  
         VAImage image;
-       va_status = vaDeriveImage(va_dpy->va_dpy, resources.surface, &image);
-       CHECK_VASTATUS(va_status, "vaDeriveImage");
+       if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
+               // The pixel data is already uploaded by the caller.
+               image = move(userdata->va_image);
+       } else {
+               assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
  
-       // Upload the pixel data.
-       uint8_t *surface_p = nullptr;
-       vaMapBuffer(va_dpy->va_dpy, image.buf, (void **)&surface_p);
+               // Upload the pixel data.
+               va_status = vaDeriveImage(va_dpy->va_dpy, resources.surface, &image);
+               CHECK_VASTATUS(va_status, "vaDeriveImage");
  
-       size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
-       size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
+               uint8_t *surface_p = nullptr;
+               vaMapBuffer(va_dpy->va_dpy, image.buf, (void **)&surface_p);
  
-       {
-               const uint8_t *src = qf.frame->data_copy + field_start;
-               uint8_t *dst = (unsigned char *)surface_p + image.offsets[0];
-               memcpy_with_pitch(dst, src, qf.video_format.width * 2, image.pitches[0], qf.video_format.height);
+               size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
+               size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
+
+               {
+                       const uint8_t *src = qf.frame->data_copy + field_start;
+                       uint8_t *dst = (unsigned char *)surface_p + image.offsets[0];
+                       memcpy_with_pitch(dst, src, qf.video_format.width * 2, image.pitches[0], qf.video_format.height);
+               }
         }
  
         va_status = vaUnmapBuffer(va_dpy->va_dpy, image.buf);
diff --git a/nageru/mjpeg_encoder.h b/nageru/mjpeg_encoder.h

index 8b68294a48d9e01dfd0254b213b2770d57e0330d..b7b20431825b3fabffcf5da22f97c6ced4b5bb5d 100644 (file)
--- a/nageru/mjpeg_encoder.h
+++ b/nageru/mjpeg_encoder.h
@@ -27,6 +27,12 @@ struct jpeg_compress_struct;
  struct VADisplayWithCleanup;
  struct VectorDestinationManager;
  
+#define CHECK_VASTATUS(va_status, func)                                 \
+    if (va_status != VA_STATUS_SUCCESS) {                               \
+        fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+        exit(1);                                                        \
+    }
+
  class MJPEGEncoder {
  public:
         MJPEGEncoder(HTTPD *httpd, const std::string &va_display);
@@ -34,6 +40,16 @@ public:
         void stop();
         void upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset);
  
+       // If the frame was started (data_copy != nullptr) but will not be finished
+       // (MJPEG decoding was turned off in the meantime), you'll need to call finish_frame()
+       // to release any VA-API resources.
+       void finish_frame(RefCountedFrame frame);
+
+       bool using_vaapi() const { return va_dpy != nullptr; }
+
+       // Returns -1 for inactive (ie., don't encode frames for this card right now).
+       int get_mjpeg_stream_for_card(unsigned card_index);
+
  private:
         static constexpr int quality = 90;
  
@@ -153,6 +169,8 @@ private:
         std::atomic<int64_t> metric_mjpeg_frames_oversized_dropped{0};
         std::atomic<int64_t> metric_mjpeg_overrun_dropped{0};
         std::atomic<int64_t> metric_mjpeg_overrun_submitted{0};
+
+       friend class PBOFrameAllocator;  // FIXME
  };
  
  #endif  // !defined(_MJPEG_ENCODER_H)
diff --git a/nageru/pbo_frame_allocator.cpp b/nageru/pbo_frame_allocator.cpp

index 4c1a55bcd0d4d47aeaffdff324b29f59780d9fdd..d0859b357d98a313fb40587b2663dad517c7e2e5 100644 (file)
--- a/nageru/pbo_frame_allocator.cpp
+++ b/nageru/pbo_frame_allocator.cpp
@@ -8,7 +8,9 @@
  #include <cstddef>
  
  #include "flags.h"
+#include "mjpeg_encoder.h"
  #include "v210_converter.h"
+#include "va_display_with_cleanup.h"
  
  using namespace std;
  
@@ -26,8 +28,8 @@ void set_clamp_to_edge()
  
  }  // namespace
  
-PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits)
-        : pixel_format(pixel_format), buffer(buffer)
+PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, unsigned card_index, MJPEGEncoder *mjpeg_encoder, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits)
+        : card_index(card_index), mjpeg_encoder(mjpeg_encoder), pixel_format(pixel_format), buffer(buffer)
  {
         userdata.reset(new Userdata[num_queued_frames]);
         for (size_t i = 0; i < num_queued_frames; ++i) {
@@ -52,13 +54,13 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w
         Frame frame;
         frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT);
         frame.data2 = frame.data + frame_size / 2;
-       frame.data_copy = new uint8_t[frame_size];
         check_error();
         frame.size = frame_size;
         Userdata *ud = &userdata[frame_idx];
         frame.userdata = ud;
         ud->pbo = pbo;
         ud->pixel_format = pixel_format;
+       ud->data_copy_malloc = new uint8_t[frame_size];
         frame.owner = this;
  
         // For 8-bit non-planar Y'CbCr, we ask the driver to split Y' and Cb/Cr
@@ -217,7 +219,7 @@ PBOFrameAllocator::~PBOFrameAllocator()
  void PBOFrameAllocator::destroy_frame(Frame *frame)
  {
         Userdata *ud = (Userdata *)frame->userdata;
-       delete[] frame->data_copy;
+       delete[] ud->data_copy_malloc;
  
         GLuint pbo = ud->pbo;
         glBindBuffer(buffer, pbo);
@@ -273,6 +275,71 @@ bmusb::FrameAllocator::Frame PBOFrameAllocator::alloc_frame()
         }
         vf.len = 0;
         vf.overflow = 0;
+
+       if (mjpeg_encoder != nullptr && mjpeg_encoder->using_vaapi() &&
+           mjpeg_encoder->get_mjpeg_stream_for_card(card_index) != -1) {
+               Userdata *ud = (Userdata *)vf.userdata;
+               vf.data_copy = ud->data_copy_malloc;
+               ud->data_copy_current_src = Userdata::FROM_MALLOC;
+       } else {
+               vf.data_copy = nullptr;
+       }
+
+       return vf;
+}
+
+bmusb::FrameAllocator::Frame PBOFrameAllocator::create_frame(size_t width, size_t height, size_t stride)
+{
+        Frame vf;
+
+       {
+               lock_guard<mutex> lock(freelist_mutex);
+               if (freelist.empty()) {
+                       printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
+                       vf.len = 0;
+                       vf.overflow = 0;
+                       return vf;
+               } else {
+                       vf = freelist.front();
+                       freelist.pop();
+               }
+       }
+       vf.len = 0;
+       vf.overflow = 0;
+
+       Userdata *userdata = (Userdata *)vf.userdata;
+
+       if (mjpeg_encoder != nullptr && mjpeg_encoder->using_vaapi() &&
+           mjpeg_encoder->get_mjpeg_stream_for_card(card_index) != -1) {
+               VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
+               MJPEGEncoder::VAResources resources = mjpeg_encoder->get_va_resources(width, height);
+               MJPEGEncoder::ReleaseVAResources release(mjpeg_encoder, resources);
+
+               VAImage image;
+               VAStatus va_status = vaDeriveImage(va_dpy, resources.surface, &image);
+               CHECK_VASTATUS(va_status, "vaDeriveImage");
+
+               if (image.pitches[0] == stride) {
+                       userdata->va_resources = move(resources);
+                       userdata->va_resources_release = move(release);
+                       userdata->va_image = move(image);
+
+                       va_status = vaMapBuffer(va_dpy, image.buf, (void **)&vf.data_copy);
+                       CHECK_VASTATUS(va_status, "vaMapBuffer");
+                       vf.data_copy += image.offsets[0];
+                       userdata->data_copy_current_src = Userdata::FROM_VA_API;
+               } else {
+                       printf("WARNING: Could not copy directly into VA-API MJPEG buffer for %zu x %zu, since producer and consumer disagreed on stride (%zu != %d).\n", width, height, stride, image.pitches[0]);
+                       vf.data_copy = userdata->data_copy_malloc;
+                       userdata->data_copy_current_src = Userdata::FROM_MALLOC;
+
+                       va_status = vaDestroyImage(va_dpy, image.image_id);
+                       CHECK_VASTATUS(va_status, "vaDestroyImage");
+               }
+       } else {
+               vf.data_copy = nullptr;
+       }
+
         return vf;
  }
  
diff --git a/nageru/pbo_frame_allocator.h b/nageru/pbo_frame_allocator.h

index ab51f6bc108839297363dd0ede80860dacfb4481..a7ae92e93a9bc0620af7b8606a143ff1686128ab 100644 (file)
--- a/nageru/pbo_frame_allocator.h
+++ b/nageru/pbo_frame_allocator.h
@@ -11,6 +11,9 @@
  #include <movit/ycbcr.h>
  
  #include "bmusb/bmusb.h"
+#include "mjpeg_encoder.h"
+
+class MJPEGEncoder;
  
  // An allocator that allocates straight into OpenGL pinned memory.
  // Meant for video frames only. We use a queue rather than a stack,
@@ -22,12 +25,15 @@ public:
         PBOFrameAllocator(bmusb::PixelFormat pixel_format,
                           size_t frame_size,
                           GLuint width, GLuint height,
+                         unsigned card_index,
+                         MJPEGEncoder *mjpeg_encoder = nullptr,
                           size_t num_queued_frames = 16,
                           GLenum buffer = GL_PIXEL_UNPACK_BUFFER_ARB,
                           GLenum permissions = GL_MAP_WRITE_BIT,
                           GLenum map_bits = GL_MAP_FLUSH_EXPLICIT_BIT);
         ~PBOFrameAllocator() override;
         Frame alloc_frame() override;
+       Frame create_frame(size_t width, size_t height, size_t stride) override;
         void release_frame(Frame frame) override;
  
         struct Userdata {
@@ -54,12 +60,34 @@ public:
                 unsigned last_frame_rate_nom, last_frame_rate_den;
                 bool has_last_subtitle = false;
                 std::string last_subtitle;
+
+               // These are the source of the “data_copy” member in Frame,
+               // used for MJPEG encoding. There are three possibilities:
+               //
+               //  - MJPEG encoding is not active (at all, or for this specific
+               //    card). Then data_copy is nullptr, and what's in here
+               //    does not matter at all.
+               //  - We can encode directly into VA-API buffers (ie., VA-API
+               //    is active, and nothing strange happened wrt. strides);
+               //    then va_resources, va_resources_release and va_image
+               //    are fetched from MJPEGEncoder at create_frame() and released
+               //    back when the frame is uploaded (or would have been).
+               //    In this case, data_copy points into the mapped VAImage.
+               //  - If not, data_copy points to data_copy_malloc, and is copied
+               //    from there into VA-API buffers (by MJPEGEncoder) if needed.
+               enum { FROM_MALLOC, FROM_VA_API } data_copy_current_src;
+               uint8_t *data_copy_malloc;
+               MJPEGEncoder::VAResources va_resources;
+               MJPEGEncoder::ReleaseVAResources va_resources_release;
+               VAImage va_image;
         };
  
  private:
         void init_frame(size_t frame_idx, size_t frame_size, GLuint width, GLuint height, GLenum permissions, GLenum map_bits);
         void destroy_frame(Frame *frame);
  
+       unsigned card_index;
+       MJPEGEncoder *mjpeg_encoder;
         bmusb::PixelFormat pixel_format;
         std::mutex freelist_mutex;
         std::queue<Frame> freelist;
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sun, 17 Mar 2019 21:53:36 +0000 (22:53 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sun, 17 Mar 2019 21:53:36 +0000 (22:53 +0100)
nageru/bmusb		patch \| blob \| history
nageru/decklink_capture.cpp		patch \| blob \| history
nageru/mixer.cpp		patch \| blob \| history
nageru/mjpeg_encoder.cpp		patch \| blob \| history
nageru/mjpeg_encoder.h		patch \| blob \| history
nageru/pbo_frame_allocator.cpp		patch \| blob \| history
nageru/pbo_frame_allocator.h		patch \| blob \| history