]> git.sesse.net Git - pkanalytics/commitdiff
Make persistent PBOs for faster texture upload.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 22 Jul 2023 17:48:32 +0000 (19:48 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 22 Jul 2023 18:01:49 +0000 (20:01 +0200)
video_widget.cpp
video_widget.h

index 9c37cd79bb770dbd631d8eb51fe5984ebe142a84..6d21e1b983648d443412eda57cbe35b8b2c43e5d 100644 (file)
@@ -36,6 +36,8 @@ extern "C" {
 #include <QWheelEvent>
 #include <QMouseEvent>
 
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
 using namespace std;
 using namespace std::chrono;
 
@@ -403,15 +405,25 @@ void VideoWidget::paintGL()
                }
        }
 
-       glTextureSubImage2D(tex[0], 0, 0, 0, frame->width, frame->height, GL_RED, GL_UNSIGNED_BYTE, frame->data.get());
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, frame->pbo);
+
+       if (frame->need_flush_len > 0) {
+               glFlushMappedNamedBufferRange(frame->pbo, 0, frame->need_flush_len);
+               frame->need_flush_len = 0;
+       }
+
+       glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+       glTextureSubImage2D(tex[0], 0, 0, 0, frame->width, frame->height, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
        glGenerateTextureMipmap(tex[0]);
 
-       glTextureSubImage2D(tex[1], 0, 0, 0, frame->chroma_width, frame->chroma_height, GL_RED, GL_UNSIGNED_BYTE, frame->data.get() + frame->width * frame->height);
+       glTextureSubImage2D(tex[1], 0, 0, 0, frame->chroma_width, frame->chroma_height, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(frame->width * frame->height));
        glGenerateTextureMipmap(tex[1]);
 
-       glTextureSubImage2D(tex[2], 0, 0, 0, frame->chroma_width, frame->chroma_height, GL_RED, GL_UNSIGNED_BYTE, frame->data.get() + frame->width * frame->height + frame->chroma_width * frame->chroma_height);
+       glTextureSubImage2D(tex[2], 0, 0, 0, frame->chroma_width, frame->chroma_height, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(frame->width * frame->height + frame->chroma_width * frame->chroma_height));
        glGenerateTextureMipmap(tex[2]);
 
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
        glBindTextureUnit(0, tex[0]);
        glBindTextureUnit(1, tex[1]);
        glBindTextureUnit(2, tex[2]);
@@ -1020,7 +1032,27 @@ shared_ptr<VideoWidget::Frame> VideoWidget::alloc_frame(unsigned width, unsigned
        frame->chroma_height = chroma_height;
 
        size_t len = frame->width * frame->height + 2 * frame->chroma_width * frame->chroma_height;
-       frame->data.reset(new uint8_t[len]);
+
+       // Augh :-)
+       mutex mu;
+       condition_variable done_cv;
+       bool done = false;
+
+       post_to_main_thread([this, &frame, len, &done, &mu, &done_cv]{
+               makeCurrent();
+               glCreateBuffers(1, &frame->pbo);
+               glNamedBufferStorage(frame->pbo, len, nullptr, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
+               frame->data = (uint8_t *)glMapNamedBufferRange(frame->pbo, 0, len, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_PERSISTENT_BIT);
+               doneCurrent();
+
+               lock_guard lock(mu);
+               done = true;
+               done_cv.notify_all();
+       });
+       {
+               unique_lock lock(mu);
+               done_cv.wait(lock, [&done]{ return done; });
+       }
 
        return shared_ptr<Frame>{frame, free_frame};
 }
@@ -1030,6 +1062,13 @@ void VideoWidget::free_frame(VideoWidget::Frame *frame)
        VideoWidget *self = frame->owner;
        lock_guard lock(self->freelist_mu);
        if (self->frame_freelist.size() >= 16) {
+               GLuint pbo = frame->pbo;
+               post_to_main_thread([self, pbo]{
+                       self->makeCurrent();
+                       glUnmapNamedBuffer(pbo);
+                       glDeleteBuffers(1, &pbo);
+                       self->doneCurrent();
+               });
                delete self->frame_freelist.front();
                self->frame_freelist.pop_front();
        }
@@ -1088,7 +1127,7 @@ shared_ptr<VideoWidget::Frame> VideoWidget::make_video_frame(const AVFrame *fram
        cbcr_offset[0] = compute_chroma_offset(0.0f, 1 << desc->log2_chroma_w, video_frame->chroma_width);
        cbcr_offset[1] = compute_chroma_offset(0.5f, 1 << desc->log2_chroma_h, video_frame->chroma_height);
 
-       pic_data[0] = video_frame->data.get();
+       pic_data[0] = video_frame->data;
        linesizes[0] = frame->width;
 
        pic_data[1] = pic_data[0] + frame->width * frame->height;
@@ -1099,6 +1138,8 @@ shared_ptr<VideoWidget::Frame> VideoWidget::make_video_frame(const AVFrame *fram
 
        sws_scale(sws_ctx.get(), frame->data, frame->linesize, 0, frame->height, pic_data, linesizes);
 
+       video_frame->need_flush_len = video_frame->width * video_frame->height + 2 * video_frame->chroma_width * video_frame->chroma_height;
+
        return video_frame;
 }
 
index 24aaea3b6a8129e179ff6c7c23343b7dc4872874..30b4b9b4747343258cc3da68aa7ba8148e72a67b 100644 (file)
@@ -44,13 +44,14 @@ public:
        void mouseReleaseEvent(QMouseEvent *e);
        void mouseMoveEvent(QMouseEvent *e);
 
-       // Should really have a PBO, but this is OK for now.
        // public due to shared_ptr.
        struct Frame {
                unsigned width, height;
                unsigned chroma_width, chroma_height;
-               std::unique_ptr<uint8_t[]> data;  // Y, followed by Cb, followed by Cr.
                VideoWidget *owner;  // For the freelist.
+               GLuint pbo;
+               uint8_t *data;  // Persistently mapped into the PBO. Y, followed by Cb, followed by Cr.
+               size_t need_flush_len;  // 0 = no flush needed.
        };
 
 signals: