]> git.sesse.net Git - nageru/commitdiff
Move texture uploading out of a lambda.
authorSteinar H. Gunderson <steinar+nageru@gunderson.no>
Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)
committerSteinar H. Gunderson <steinar+nageru@gunderson.no>
Sun, 24 May 2020 19:16:00 +0000 (21:16 +0200)
This doesn't make all that much sense anymore, and having it outside
std::function helps debuggability. (Also, it probably saves a few
allocations.)

nageru/mixer.cpp
nageru/mixer.h

index a5206c9b3ea48edf27c9d8db2462a8933465927f..3f5119a43d74c7de00a58d1152290315837e77b8 100644 (file)
@@ -1019,7 +1019,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
        }
 #endif
 
-       size_t cbcr_width, cbcr_height, cbcr_offset, y_offset;
+       size_t y_offset, cbcr_offset;
        size_t expected_length = video_format.stride * (video_format.height + video_format.extra_lines_top + video_format.extra_lines_bottom);
        if (userdata != nullptr && userdata->pixel_format == PixelFormat_8BitYCbCrPlanar) {
                // The calculation above is wrong for planar Y'CbCr, so just override it.
@@ -1028,16 +1028,12 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                expected_length = video_frame.len;
 
                userdata->ycbcr_format = (static_cast<FFmpegCapture *>(card->capture.get()))->get_current_frame_ycbcr_format();
-               cbcr_width = video_format.width / userdata->ycbcr_format.chroma_subsampling_x;
-               cbcr_height = video_format.height / userdata->ycbcr_format.chroma_subsampling_y;
-               cbcr_offset = video_format.width * video_format.height;
                y_offset = 0;
+               cbcr_offset = video_format.width * video_format.height;
        } else {
                // All the other Y'CbCr formats are 4:2:2.
-               cbcr_width = video_format.width / 2;
-               cbcr_height = video_format.height;
-               cbcr_offset = video_offset / 2;
                y_offset = video_frame.size / 2 + video_offset / 2;
+               cbcr_offset = video_offset / 2;
        }
        if (video_frame.len - video_offset == 0 ||
            video_frame.len - video_offset != expected_length) {
@@ -1068,19 +1064,14 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 
        unsigned num_fields = video_format.interlaced ? 2 : 1;
        steady_clock::time_point frame_upload_start;
-       bool interlaced_stride = false;
        if (video_format.interlaced) {
                // Send the two fields along as separate frames; the other side will need to add
                // a deinterlacer to actually get this right.
                assert(video_format.height % 2 == 0);
                video_format.height /= 2;
-               cbcr_height /= 2;
                assert(frame_length % 2 == 0);
                frame_length /= 2;
                num_fields = 2;
-               if (video_format.second_field_start == 1) {
-                       interlaced_stride = true;
-               }
                frame_upload_start = steady_clock::now();
        }
        assert(userdata != nullptr);
@@ -1091,79 +1082,15 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
        userdata->last_frame_rate_den = video_format.frame_rate_den;
        RefCountedFrame frame(video_frame);
 
-       // Upload the textures.
+       // Send the frames on to the main thread, which will upload and process htem.
+       // It is entirely possible to upload them in the same thread (and it might even be
+       // faster, depending on the GPU and driver), but it appears to be trickling
+       // driver bugs very easily.
+       //
+       // Note that this means we must hold on to the actual frame data in <userdata>
+       // until the upload is done, but we hold on to <frame> much longer than that
+       // (in fact, all the way until we no longer use the texture in rendering).
        for (unsigned field = 0; field < num_fields; ++field) {
-               // Put the actual texture upload in a lambda that is executed in the main thread.
-               // It is entirely possible to do this in the same thread (and it might even be
-               // faster, depending on the GPU and driver), but it appears to be trickling
-               // driver bugs very easily.
-               //
-               // Note that this means we must hold on to the actual frame data in <userdata>
-               // until the upload command is run, but we hold on to <frame> much longer than that
-               // (in fact, all the way until we no longer use the texture in rendering).
-               auto upload_func = [this, field, video_format, y_offset, video_offset, cbcr_offset, cbcr_width, cbcr_height, interlaced_stride, userdata]() {
-                       unsigned field_start_line;
-                       if (field == 1) {
-                               field_start_line = video_format.second_field_start;
-                       } else {
-                               field_start_line = video_format.extra_lines_top;
-                       }
-
-                       // For anything not FRAME_FORMAT_YCBCR_10BIT, v210_width will be nonsensical but not used.
-                       size_t v210_width = video_format.stride / sizeof(uint32_t);
-                       ensure_texture_resolution(userdata, field, video_format.width, video_format.height, cbcr_width, cbcr_height, v210_width);
-
-                       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, userdata->pbo);
-                       check_error();
-
-                       switch (userdata->pixel_format) {
-                       case PixelFormat_10BitYCbCr: {
-                               size_t field_start = video_offset + video_format.stride * field_start_line;
-                               upload_texture(userdata->tex_v210[field], v210_width, video_format.height, video_format.stride, interlaced_stride, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, field_start);
-                               v210_converter->convert(userdata->tex_v210[field], userdata->tex_444[field], video_format.width, video_format.height);
-                               break;
-                       }
-                       case PixelFormat_8BitYCbCr: {
-                               size_t field_y_start = y_offset + video_format.width * field_start_line;
-                               size_t field_cbcr_start = cbcr_offset + cbcr_width * field_start_line * sizeof(uint16_t);
-
-                               // Make up our own strides, since we are interleaving.
-                               upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
-                               upload_texture(userdata->tex_cbcr[field], cbcr_width, cbcr_height, cbcr_width * sizeof(uint16_t), interlaced_stride, GL_RG, GL_UNSIGNED_BYTE, field_cbcr_start);
-                               break;
-                       }
-                       case PixelFormat_8BitYCbCrPlanar: {
-                               assert(field_start_line == 0);  // We don't really support interlaced here.
-                               size_t field_y_start = y_offset;
-                               size_t field_cb_start = cbcr_offset;
-                               size_t field_cr_start = cbcr_offset + cbcr_width * cbcr_height;
-
-                               // Make up our own strides, since we are interleaving.
-                               upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
-                               upload_texture(userdata->tex_cb[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cb_start);
-                               upload_texture(userdata->tex_cr[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cr_start);
-                               break;
-                       }
-                       case PixelFormat_8BitBGRA: {
-                               size_t field_start = video_offset + video_format.stride * field_start_line;
-                               upload_texture(userdata->tex_rgba[field], video_format.width, video_format.height, video_format.stride, interlaced_stride, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, field_start);
-                               // These could be asked to deliver mipmaps at any time.
-                               glBindTexture(GL_TEXTURE_2D, userdata->tex_rgba[field]);
-                               check_error();
-                               glGenerateMipmap(GL_TEXTURE_2D);
-                               check_error();
-                               glBindTexture(GL_TEXTURE_2D, 0);
-                               check_error();
-                               break;
-                       }
-                       default:
-                               assert(false);
-                       }
-
-                       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-                       check_error();
-               };
-
                if (field == 1) {
                        // Don't upload the second field as fast as we can; wait until
                        // the field time has approximately passed. (Otherwise, we could
@@ -1183,12 +1110,13 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                        new_frame.length = frame_length;
                        new_frame.field = field;
                        new_frame.interlaced = video_format.interlaced;
-                       new_frame.upload_func = upload_func;
                        new_frame.dropped_frames = dropped_frames;
                        new_frame.received_timestamp = video_frame.received_timestamp;  // Ignore the audio timestamp.
                        new_frame.video_format = video_format;
+                       new_frame.video_offset = video_offset;
                        new_frame.y_offset = y_offset;
                        new_frame.cbcr_offset = cbcr_offset;
+                       new_frame.texture_uploaded = false;
                        if (card->type == CardType::FFMPEG_INPUT) {
                                FFmpegCapture *ffmpeg_capture = static_cast<FFmpegCapture *>(card->capture.get());
                                new_frame.neutral_color = ffmpeg_capture->get_last_neutral_color();
@@ -1201,6 +1129,87 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
        }
 }
 
+void Mixer::upload_texture_for_frame(
+       int field, bmusb::VideoFormat video_format,
+       size_t y_offset, size_t cbcr_offset, size_t video_offset, PBOFrameAllocator::Userdata *userdata)
+{
+       size_t cbcr_width, cbcr_height;
+       if (userdata != nullptr && userdata->pixel_format == PixelFormat_8BitYCbCrPlanar) {
+               cbcr_width = video_format.width / userdata->ycbcr_format.chroma_subsampling_x;
+               cbcr_height = video_format.height / userdata->ycbcr_format.chroma_subsampling_y;
+       } else {
+               // All the other Y'CbCr formats are 4:2:2.
+               cbcr_width = video_format.width / 2;
+               cbcr_height = video_format.height;
+       }
+
+       bool interlaced_stride = video_format.interlaced && (video_format.second_field_start == 1);
+       if (video_format.interlaced) {
+               cbcr_height /= 2;
+       }
+
+       unsigned field_start_line;
+       if (field == 1) {
+               field_start_line = video_format.second_field_start;
+       } else {
+               field_start_line = video_format.extra_lines_top;
+       }
+
+       // For anything not FRAME_FORMAT_YCBCR_10BIT, v210_width will be nonsensical but not used.
+       size_t v210_width = video_format.stride / sizeof(uint32_t);
+       ensure_texture_resolution(userdata, field, video_format.width, video_format.height, cbcr_width, cbcr_height, v210_width);
+
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, userdata->pbo);
+       check_error();
+
+       switch (userdata->pixel_format) {
+               case PixelFormat_10BitYCbCr: {
+                       size_t field_start = video_offset + video_format.stride * field_start_line;
+                       upload_texture(userdata->tex_v210[field], v210_width, video_format.height, video_format.stride, interlaced_stride, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, field_start);
+                       v210_converter->convert(userdata->tex_v210[field], userdata->tex_444[field], video_format.width, video_format.height);
+                       break;
+               }
+               case PixelFormat_8BitYCbCr: {
+                       size_t field_y_start = y_offset + video_format.width * field_start_line;
+                       size_t field_cbcr_start = cbcr_offset + cbcr_width * field_start_line * sizeof(uint16_t);
+
+                       // Make up our own strides, since we are interleaving.
+                       upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
+                       upload_texture(userdata->tex_cbcr[field], cbcr_width, cbcr_height, cbcr_width * sizeof(uint16_t), interlaced_stride, GL_RG, GL_UNSIGNED_BYTE, field_cbcr_start);
+                       break;
+               }
+               case PixelFormat_8BitYCbCrPlanar: {
+                       assert(field_start_line == 0);  // We don't really support interlaced here.
+                       size_t field_y_start = y_offset;
+                       size_t field_cb_start = cbcr_offset;
+                       size_t field_cr_start = cbcr_offset + cbcr_width * cbcr_height;
+
+                       // Make up our own strides, since we are interleaving.
+                       upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
+                       upload_texture(userdata->tex_cb[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cb_start);
+                       upload_texture(userdata->tex_cr[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cr_start);
+                       break;
+               }
+               case PixelFormat_8BitBGRA: {
+                       size_t field_start = video_offset + video_format.stride * field_start_line;
+                       upload_texture(userdata->tex_rgba[field], video_format.width, video_format.height, video_format.stride, interlaced_stride, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, field_start);
+                       // These could be asked to deliver mipmaps at any time.
+                       glBindTexture(GL_TEXTURE_2D, userdata->tex_rgba[field]);
+                       check_error();
+                       glGenerateMipmap(GL_TEXTURE_2D);
+                       check_error();
+                       glBindTexture(GL_TEXTURE_2D, 0);
+                       check_error();
+                       break;
+               }
+               default:
+                       assert(false);
+       }
+
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+       check_error();
+}
+
 void Mixer::bm_hotplug_add(libusb_device *dev)
 {
        lock_guard<mutex> lock(hotplug_mutex);
@@ -1299,9 +1308,10 @@ void Mixer::thread_func()
                        check_error();
 
                        // The new texture might need uploading before use.
-                       if (new_frame->upload_func) {
-                               new_frame->upload_func();
-                               new_frame->upload_func = nullptr;
+                       if (!new_frame->texture_uploaded) {
+                               upload_texture_for_frame(new_frame->field, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset,
+                                       new_frame->video_offset, (PBOFrameAllocator::Userdata *)new_frame->frame->userdata);
+                               new_frame->texture_uploaded = true;
                        }
 
                        // Only set the white balance if it actually changed. This means that the user
index af07f8896665044291a281918af5bbd51961cd0a..1852e48ed8196fa79938a5d4527e9e55f7f7a14a 100644 (file)
@@ -450,6 +450,10 @@ private:
        void bm_frame(unsigned card_index, uint16_t timecode,
                bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
                bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format);
+       void upload_texture_for_frame(
+               int field, bmusb::VideoFormat video_format,
+               size_t y_offset, size_t cbcr_offset, size_t video_offset,
+               PBOFrameAllocator::Userdata *userdata);
        void bm_hotplug_add(libusb_device *dev);
        void bm_hotplug_remove(unsigned card_index);
        void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1);
@@ -552,16 +556,15 @@ private:
                        int64_t length;  // In TIMEBASE units.
                        bool interlaced;
                        unsigned field;  // Which field (0 or 1) of the frame to use. Always 0 for progressive.
-                       std::function<void()> upload_func;  // Needs to be called to actually upload the texture to OpenGL.
+                       bool texture_uploaded = false;
                        unsigned dropped_frames = 0;  // Number of dropped frames before this one.
                        std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min();
                        movit::RGBTriplet neutral_color{1.0f, 1.0f, 1.0f};
 
-                       // Used for MJPEG encoding. (upload_func packs everything it needs
-                       // into the functor, but would otherwise also use these.)
+                       // Used for MJPEG encoding, and texture upload.
                        // width=0 or height=0 means a broken frame, ie., do not upload.
                        bmusb::VideoFormat video_format;
-                       size_t y_offset, cbcr_offset;
+                       size_t video_offset, y_offset, cbcr_offset;
                };
                std::deque<NewFrame> new_frames;
                std::condition_variable new_frames_changed;  // Set whenever new_frames is changed.