Move texture uploading out of a lambda.

author Steinar H. Gunderson <steinar+nageru@gunderson.no>

Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)

committer Steinar H. Gunderson <steinar+nageru@gunderson.no>

Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)
author Steinar H. Gunderson <steinar+nageru@gunderson.no>
Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)
committer Steinar H. Gunderson <steinar+nageru@gunderson.no>
Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)
diff --git a/nageru/mixer.cpp b/nageru/mixer.cpp

index a5206c9b3ea48edf27c9d8db2462a8933465927f..3f5119a43d74c7de00a58d1152290315837e77b8 100644 (file)
--- a/nageru/mixer.cpp
+++ b/nageru/mixer.cpp
@@ -1019,7 +1019,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
         }
  #endif
  
-       size_t cbcr_width, cbcr_height, cbcr_offset, y_offset;
+       size_t y_offset, cbcr_offset;
         size_t expected_length = video_format.stride * (video_format.height + video_format.extra_lines_top + video_format.extra_lines_bottom);
         if (userdata != nullptr && userdata->pixel_format == PixelFormat_8BitYCbCrPlanar) {
                 // The calculation above is wrong for planar Y'CbCr, so just override it.
@@ -1028,16 +1028,12 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                 expected_length = video_frame.len;
  
                 userdata->ycbcr_format = (static_cast<FFmpegCapture *>(card->capture.get()))->get_current_frame_ycbcr_format();
-               cbcr_width = video_format.width / userdata->ycbcr_format.chroma_subsampling_x;
-               cbcr_height = video_format.height / userdata->ycbcr_format.chroma_subsampling_y;
-               cbcr_offset = video_format.width * video_format.height;
                 y_offset = 0;
+               cbcr_offset = video_format.width * video_format.height;
         } else {
                 // All the other Y'CbCr formats are 4:2:2.
-               cbcr_width = video_format.width / 2;
-               cbcr_height = video_format.height;
-               cbcr_offset = video_offset / 2;
                 y_offset = video_frame.size / 2 + video_offset / 2;
+               cbcr_offset = video_offset / 2;
         }
         if (video_frame.len - video_offset == 0 ||
             video_frame.len - video_offset != expected_length) {
@@ -1068,19 +1064,14 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
  
         unsigned num_fields = video_format.interlaced ? 2 : 1;
         steady_clock::time_point frame_upload_start;
-       bool interlaced_stride = false;
         if (video_format.interlaced) {
                 // Send the two fields along as separate frames; the other side will need to add
                 // a deinterlacer to actually get this right.
                 assert(video_format.height % 2 == 0);
                 video_format.height /= 2;
-               cbcr_height /= 2;
                 assert(frame_length % 2 == 0);
                 frame_length /= 2;
                 num_fields = 2;
-               if (video_format.second_field_start == 1) {
-                       interlaced_stride = true;
-               }
                 frame_upload_start = steady_clock::now();
         }
         assert(userdata != nullptr);
@@ -1091,79 +1082,15 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
         userdata->last_frame_rate_den = video_format.frame_rate_den;
         RefCountedFrame frame(video_frame);
  
-       // Upload the textures.
+       // Send the frames on to the main thread, which will upload and process htem.
+       // It is entirely possible to upload them in the same thread (and it might even be
+       // faster, depending on the GPU and driver), but it appears to be trickling
+       // driver bugs very easily.
+       //
+       // Note that this means we must hold on to the actual frame data in <userdata>
+       // until the upload is done, but we hold on to <frame> much longer than that
+       // (in fact, all the way until we no longer use the texture in rendering).
         for (unsigned field = 0; field < num_fields; ++field) {
-               // Put the actual texture upload in a lambda that is executed in the main thread.
-               // It is entirely possible to do this in the same thread (and it might even be
-               // faster, depending on the GPU and driver), but it appears to be trickling
-               // driver bugs very easily.
-               //
-               // Note that this means we must hold on to the actual frame data in <userdata>
-               // until the upload command is run, but we hold on to <frame> much longer than that
-               // (in fact, all the way until we no longer use the texture in rendering).
-               auto upload_func = [this, field, video_format, y_offset, video_offset, cbcr_offset, cbcr_width, cbcr_height, interlaced_stride, userdata]() {
-                       unsigned field_start_line;
-                       if (field == 1) {
-                               field_start_line = video_format.second_field_start;
-                       } else {
-                               field_start_line = video_format.extra_lines_top;
-                       }
-
-                       // For anything not FRAME_FORMAT_YCBCR_10BIT, v210_width will be nonsensical but not used.
-                       size_t v210_width = video_format.stride / sizeof(uint32_t);
-                       ensure_texture_resolution(userdata, field, video_format.width, video_format.height, cbcr_width, cbcr_height, v210_width);
-
-                       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, userdata->pbo);
-                       check_error();
-
-                       switch (userdata->pixel_format) {
-                       case PixelFormat_10BitYCbCr: {
-                               size_t field_start = video_offset + video_format.stride * field_start_line;
-                               upload_texture(userdata->tex_v210[field], v210_width, video_format.height, video_format.stride, interlaced_stride, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, field_start);
-                               v210_converter->convert(userdata->tex_v210[field], userdata->tex_444[field], video_format.width, video_format.height);
-                               break;
-                       }
-                       case PixelFormat_8BitYCbCr: {
-                               size_t field_y_start = y_offset + video_format.width * field_start_line;
-                               size_t field_cbcr_start = cbcr_offset + cbcr_width * field_start_line * sizeof(uint16_t);
-
-                               // Make up our own strides, since we are interleaving.
-                               upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
-                               upload_texture(userdata->tex_cbcr[field], cbcr_width, cbcr_height, cbcr_width * sizeof(uint16_t), interlaced_stride, GL_RG, GL_UNSIGNED_BYTE, field_cbcr_start);
-                               break;
-                       }
-                       case PixelFormat_8BitYCbCrPlanar: {
-                               assert(field_start_line == 0);  // We don't really support interlaced here.
-                               size_t field_y_start = y_offset;
-                               size_t field_cb_start = cbcr_offset;
-                               size_t field_cr_start = cbcr_offset + cbcr_width * cbcr_height;
-
-                               // Make up our own strides, since we are interleaving.
-                               upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
-                               upload_texture(userdata->tex_cb[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cb_start);
-                               upload_texture(userdata->tex_cr[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cr_start);
-                               break;
-                       }
-                       case PixelFormat_8BitBGRA: {
-                               size_t field_start = video_offset + video_format.stride * field_start_line;
-                               upload_texture(userdata->tex_rgba[field], video_format.width, video_format.height, video_format.stride, interlaced_stride, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, field_start);
-                               // These could be asked to deliver mipmaps at any time.
-                               glBindTexture(GL_TEXTURE_2D, userdata->tex_rgba[field]);
-                               check_error();
-                               glGenerateMipmap(GL_TEXTURE_2D);
-                               check_error();
-                               glBindTexture(GL_TEXTURE_2D, 0);
-                               check_error();
-                               break;
-                       }
-                       default:
-                               assert(false);
-                       }
-
-                       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-                       check_error();
-               };
-
                 if (field == 1) {
                         // Don't upload the second field as fast as we can; wait until
                         // the field time has approximately passed. (Otherwise, we could
@@ -1183,12 +1110,13 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                         new_frame.length = frame_length;
                         new_frame.field = field;
                         new_frame.interlaced = video_format.interlaced;
-                       new_frame.upload_func = upload_func;
                         new_frame.dropped_frames = dropped_frames;
                         new_frame.received_timestamp = video_frame.received_timestamp;  // Ignore the audio timestamp.
                         new_frame.video_format = video_format;
+                       new_frame.video_offset = video_offset;
                         new_frame.y_offset = y_offset;
                         new_frame.cbcr_offset = cbcr_offset;
+                       new_frame.texture_uploaded = false;
                         if (card->type == CardType::FFMPEG_INPUT) {
                                 FFmpegCapture *ffmpeg_capture = static_cast<FFmpegCapture *>(card->capture.get());
                                 new_frame.neutral_color = ffmpeg_capture->get_last_neutral_color();
@@ -1201,6 +1129,87 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
         }
  }
  
+void Mixer::upload_texture_for_frame(
+       int field, bmusb::VideoFormat video_format,
+       size_t y_offset, size_t cbcr_offset, size_t video_offset, PBOFrameAllocator::Userdata *userdata)
+{
+       size_t cbcr_width, cbcr_height;
+       if (userdata != nullptr && userdata->pixel_format == PixelFormat_8BitYCbCrPlanar) {
+               cbcr_width = video_format.width / userdata->ycbcr_format.chroma_subsampling_x;
+               cbcr_height = video_format.height / userdata->ycbcr_format.chroma_subsampling_y;
+       } else {
+               // All the other Y'CbCr formats are 4:2:2.
+               cbcr_width = video_format.width / 2;
+               cbcr_height = video_format.height;
+       }
+
+       bool interlaced_stride = video_format.interlaced && (video_format.second_field_start == 1);
+       if (video_format.interlaced) {
+               cbcr_height /= 2;
+       }
+
+       unsigned field_start_line;
+       if (field == 1) {
+               field_start_line = video_format.second_field_start;
+       } else {
+               field_start_line = video_format.extra_lines_top;
+       }
+
+       // For anything not FRAME_FORMAT_YCBCR_10BIT, v210_width will be nonsensical but not used.
+       size_t v210_width = video_format.stride / sizeof(uint32_t);
+       ensure_texture_resolution(userdata, field, video_format.width, video_format.height, cbcr_width, cbcr_height, v210_width);
+
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, userdata->pbo);
+       check_error();
+
+       switch (userdata->pixel_format) {
+               case PixelFormat_10BitYCbCr: {
+                       size_t field_start = video_offset + video_format.stride * field_start_line;
+                       upload_texture(userdata->tex_v210[field], v210_width, video_format.height, video_format.stride, interlaced_stride, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, field_start);
+                       v210_converter->convert(userdata->tex_v210[field], userdata->tex_444[field], video_format.width, video_format.height);
+                       break;
+               }
+               case PixelFormat_8BitYCbCr: {
+                       size_t field_y_start = y_offset + video_format.width * field_start_line;
+                       size_t field_cbcr_start = cbcr_offset + cbcr_width * field_start_line * sizeof(uint16_t);
+
+                       // Make up our own strides, since we are interleaving.
+                       upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
+                       upload_texture(userdata->tex_cbcr[field], cbcr_width, cbcr_height, cbcr_width * sizeof(uint16_t), interlaced_stride, GL_RG, GL_UNSIGNED_BYTE, field_cbcr_start);
+                       break;
+               }
+               case PixelFormat_8BitYCbCrPlanar: {
+                       assert(field_start_line == 0);  // We don't really support interlaced here.
+                       size_t field_y_start = y_offset;
+                       size_t field_cb_start = cbcr_offset;
+                       size_t field_cr_start = cbcr_offset + cbcr_width * cbcr_height;
+
+                       // Make up our own strides, since we are interleaving.
+                       upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
+                       upload_texture(userdata->tex_cb[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cb_start);
+                       upload_texture(userdata->tex_cr[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cr_start);
+                       break;
+               }
+               case PixelFormat_8BitBGRA: {
+                       size_t field_start = video_offset + video_format.stride * field_start_line;
+                       upload_texture(userdata->tex_rgba[field], video_format.width, video_format.height, video_format.stride, interlaced_stride, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, field_start);
+                       // These could be asked to deliver mipmaps at any time.
+                       glBindTexture(GL_TEXTURE_2D, userdata->tex_rgba[field]);
+                       check_error();
+                       glGenerateMipmap(GL_TEXTURE_2D);
+                       check_error();
+                       glBindTexture(GL_TEXTURE_2D, 0);
+                       check_error();
+                       break;
+               }
+               default:
+                       assert(false);
+       }
+
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+       check_error();
+}
+
  void Mixer::bm_hotplug_add(libusb_device *dev)
  {
         lock_guard<mutex> lock(hotplug_mutex);
@@ -1299,9 +1308,10 @@ void Mixer::thread_func()
                         check_error();
  
                         // The new texture might need uploading before use.
-                       if (new_frame->upload_func) {
-                               new_frame->upload_func();
-                               new_frame->upload_func = nullptr;
+                       if (!new_frame->texture_uploaded) {
+                               upload_texture_for_frame(new_frame->field, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset,
+                                       new_frame->video_offset, (PBOFrameAllocator::Userdata *)new_frame->frame->userdata);
+                               new_frame->texture_uploaded = true;
                         }
  
                         // Only set the white balance if it actually changed. This means that the user
diff --git a/nageru/mixer.h b/nageru/mixer.h

index af07f8896665044291a281918af5bbd51961cd0a..1852e48ed8196fa79938a5d4527e9e55f7f7a14a 100644 (file)
--- a/nageru/mixer.h
+++ b/nageru/mixer.h
@@ -450,6 +450,10 @@ private:
         void bm_frame(unsigned card_index, uint16_t timecode,
                 bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
                 bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format);
+       void upload_texture_for_frame(
+               int field, bmusb::VideoFormat video_format,
+               size_t y_offset, size_t cbcr_offset, size_t video_offset,
+               PBOFrameAllocator::Userdata *userdata);
         void bm_hotplug_add(libusb_device *dev);
         void bm_hotplug_remove(unsigned card_index);
         void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1);
@@ -552,16 +556,15 @@ private:
                         int64_t length;  // In TIMEBASE units.
                         bool interlaced;
                         unsigned field;  // Which field (0 or 1) of the frame to use. Always 0 for progressive.
-                       std::function<void()> upload_func;  // Needs to be called to actually upload the texture to OpenGL.
+                       bool texture_uploaded = false;
                         unsigned dropped_frames = 0;  // Number of dropped frames before this one.
                         std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min();
                         movit::RGBTriplet neutral_color{1.0f, 1.0f, 1.0f};
  
-                       // Used for MJPEG encoding. (upload_func packs everything it needs
-                       // into the functor, but would otherwise also use these.)
+                       // Used for MJPEG encoding, and texture upload.
                         // width=0 or height=0 means a broken frame, ie., do not upload.
                         bmusb::VideoFormat video_format;
-                       size_t y_offset, cbcr_offset;
+                       size_t video_offset, y_offset, cbcr_offset;
                 };
                 std::deque<NewFrame> new_frames;
                 std::condition_variable new_frames_changed;  // Set whenever new_frames is changed.
author	Steinar H. Gunderson <steinar+nageru@gunderson.no>
	Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)
committer	Steinar H. Gunderson <steinar+nageru@gunderson.no>
	Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)
nageru/mixer.cpp		patch \| blob \| history
nageru/mixer.h		patch \| blob \| history