Let the application manage PBOs.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 11 Jan 2014 23:48:41 +0000 (00:48 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 12 Jan 2014 00:07:20 +0000 (01:07 +0100)
Creating a PBO to hold the texture data just before upload (like we
did before this patch) is pointless; if that would help at all, the driver
could just do it itself. Instead, we expose the PBOs to the application
(in a way such that applications that don't care can continue to use
the simple interface). This means that a client that needs to do
e.g. a fade can optimize its texture upload by a process like this:

  1. Decode frame from input A.
  2. Upload frame from input A to GPU (by putting it into a PBO).
     Texture upload starts in the background.
  3. Decode frame from input B.
  4. Upload frame from input B to GPU. (This time, there will be
     no parallelism, though.)
  5. Render.

With correct use of ping-ponging PBOs, it is also possible to overlap
step 4/5 with operations from the _next_ frame in the fade.

More information can be found in this presentation:

  http://on-demand.gputechconf.com/gtc/2012/presentations/S0356-GTC2012-Texture-Transfers.pdf

flat_input.cpp
flat_input.h
flat_input_test.cpp
ycbcr_input.cpp
ycbcr_input.h
ycbcr_input_test.cpp

index 40fb733..28380f4 100644 (file)
@@ -28,10 +28,6 @@ FlatInput::FlatInput(ImageFormat image_format, MovitPixelFormat pixel_format, GL
 
 FlatInput::~FlatInput()
 {
-       if (pbo != 0) {
-               glDeleteBuffers(1, &pbo);
-               check_error();
-       }
        if (texture_num != 0) {
                glDeleteTextures(1, &texture_num);
                check_error();
@@ -53,48 +49,27 @@ void FlatInput::finalize()
        }
        if (pixel_format == FORMAT_RGB) {
                format = GL_RGB;
-               bytes_per_pixel = 3;
        } else if (pixel_format == FORMAT_RGBA_PREMULTIPLIED_ALPHA ||
                   pixel_format == FORMAT_RGBA_POSTMULTIPLIED_ALPHA) {
                format = GL_RGBA;
-               bytes_per_pixel = 4;
        } else if (pixel_format == FORMAT_BGR) {
                format = GL_BGR;
-               bytes_per_pixel = 3;
        } else if (pixel_format == FORMAT_BGRA_PREMULTIPLIED_ALPHA ||
                   pixel_format == FORMAT_BGRA_POSTMULTIPLIED_ALPHA) {
                format = GL_BGRA;
-               bytes_per_pixel = 4;
        } else if (pixel_format == FORMAT_GRAYSCALE) {
                format = GL_LUMINANCE;
-               bytes_per_pixel = 1;
        } else {
                assert(false);
        }
-       if (type == GL_FLOAT) {
-               bytes_per_pixel *= sizeof(float);
-       }
 
-       // Create PBO to hold the texture holding the input image, and then the texture itself.
-       glGenBuffers(1, &pbo);
-       check_error();
-       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
-       check_error();
-       glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, pitch * height * bytes_per_pixel, NULL, GL_STREAM_DRAW);
-       check_error();
-       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
-       check_error();
-       
+       // Create the texture itself.
        glGenTextures(1, &texture_num);
        check_error();
        glBindTexture(GL_TEXTURE_2D, texture_num);
        check_error();
-       glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
-       check_error();
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, needs_mipmaps ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR);
        check_error();
-       glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch);
-       check_error();
        // Intel/Mesa seems to have a broken glGenerateMipmap() for non-FBO textures, so do it here
        // instead of calling glGenerateMipmap().
        glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, needs_mipmaps ? GL_TRUE : GL_FALSE);
@@ -103,6 +78,8 @@ void FlatInput::finalize()
        check_error();
        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
        check_error();
+       glBindTexture(GL_TEXTURE_2D, 0);
+       check_error();
 
        needs_update = true;
        finalized = true;
@@ -116,18 +93,14 @@ void FlatInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix,
        check_error();
 
        if (needs_update) {
-               // Copy the pixel data into the PBO.
+               // Re-upload the texture.
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
                check_error();
-               void *mapped_pbo = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
-               memcpy(mapped_pbo, pixel_data, pitch * height * bytes_per_pixel);
-               glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB);
+               glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
                check_error();
-
-               // Re-upload the texture from the PBO.
                glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch);
                check_error();
-               glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, BUFFER_OFFSET(0));
+               glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, pixel_data);
                check_error();
                glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
                check_error();
index 4728fb0..9fcaf53 100644 (file)
@@ -61,17 +61,25 @@ public:
        // this data, you must either call set_pixel_data() again (using the same pointer
        // is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded
        // on subsequent frames.
-       void set_pixel_data(const unsigned char *pixel_data)
+       //
+       // The data can either be a regular pointer (if pbo==0), or a byte offset
+       // into a PBO. The latter will allow you to start uploading the texture data
+       // asynchronously to the GPU, if you have any CPU-intensive work between the
+       // call to set_pixel_data() and the actual rendering. In either case,
+       // the pointer (and PBO, if set) has to be valid at the time of the render call.
+       void set_pixel_data(const unsigned char *pixel_data, GLuint pbo = 0)
        {
                assert(this->type == GL_UNSIGNED_BYTE);
                this->pixel_data = pixel_data;
+               this->pbo = pbo;
                invalidate_pixel_data();
        }
 
-       void set_pixel_data(const float *pixel_data)
+       void set_pixel_data(const float *pixel_data, GLuint pbo = 0)
        {
                assert(this->type == GL_FLOAT);
                this->pixel_data = pixel_data;
+               this->pbo = pbo;
                invalidate_pixel_data();
        }
 
@@ -92,7 +100,7 @@ private:
        GLuint pbo, texture_num;
        bool needs_update, finalized;
        int output_linear_gamma, needs_mipmaps;
-       unsigned width, height, pitch, bytes_per_pixel;
+       unsigned width, height, pitch;
        const void *pixel_data;
 };
 
index c8df483..a297623 100644 (file)
@@ -1,11 +1,13 @@
 // Unit tests for FlatInput.
 
+#include <GL/glew.h>
 #include <stddef.h>
 
 #include "effect_chain.h"
 #include "flat_input.h"
 #include "gtest/gtest.h"
 #include "test_util.h"
+#include "util.h"
 
 TEST(FlatInput, SimpleGrayscale) {
        const int size = 4;
@@ -226,3 +228,39 @@ TEST(FlatInput, UpdatedData) {
        tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
        expect_equal(data, out_data, width, height);
 }
+
+TEST(FlatInput, PBO) {
+       const int width = 3;
+       const int height = 2;
+
+       float data[width * height] = {
+               0.0, 1.0, 0.5,
+               0.5, 0.5, 0.2,
+       };
+       float expected_data[4 * width * height] = {
+               0.0, 0.0, 0.0, 1.0,  1.0, 1.0, 1.0, 1.0,  0.5, 0.5, 0.5, 1.0,
+               0.5, 0.5, 0.5, 1.0,  0.5, 0.5, 0.5, 1.0,  0.2, 0.2, 0.2, 1.0,
+       };
+       float out_data[4 * width * height];
+
+       GLuint pbo;
+       glGenBuffers(1, &pbo);
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
+       glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * sizeof(float), data, GL_STREAM_DRAW);
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+
+       EffectChainTester tester(NULL, width, height);
+
+       ImageFormat format;
+       format.color_space = COLORSPACE_sRGB;
+       format.gamma_curve = GAMMA_LINEAR;
+
+       FlatInput *input = new FlatInput(format, FORMAT_GRAYSCALE, GL_FLOAT, width, height);
+       input->set_pixel_data((float *)BUFFER_OFFSET(0), pbo);
+       tester.get_chain()->add_input(input);
+
+       tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR);
+       expect_equal(expected_data, out_data, 4 * width, height);
+
+       glDeleteBuffers(1, &pbo);
+}
index d1d849a..e089e51 100644 (file)
@@ -62,7 +62,6 @@ YCbCrInput::YCbCrInput(const ImageFormat &image_format,
        : image_format(image_format),
          ycbcr_format(ycbcr_format),
          needs_update(false),
-         needs_pbo_recreate(false),
          finalized(false),
          needs_mipmaps(false),
          width(width),
@@ -88,10 +87,6 @@ YCbCrInput::YCbCrInput(const ImageFormat &image_format,
 
 YCbCrInput::~YCbCrInput()
 {
-       if (pbos[0] != 0) {
-               glDeleteBuffers(3, pbos);
-               check_error();
-       }
        if (texture_num[0] != 0) {
                glDeleteTextures(3, texture_num);
                check_error();
@@ -100,33 +95,17 @@ YCbCrInput::~YCbCrInput()
 
 void YCbCrInput::finalize()
 {
-       glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
-       check_error();
-
-       // Create PBOs to hold the textures holding the input image, and then the texture itself.
-       glGenBuffers(3, pbos);
-       check_error();
+       // Create the textures themselves.
        glGenTextures(3, texture_num);
        check_error();
 
        for (unsigned channel = 0; channel < 3; ++channel) {
-               glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbos[channel]);
-               check_error();
-               glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, pitch[channel] * heights[channel], NULL, GL_STREAM_DRAW);
-               check_error();
-               glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
-               check_error();
-               
                glBindTexture(GL_TEXTURE_2D, texture_num[channel]);
                check_error();
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
                check_error();
-               glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch[channel]);
-               check_error();
                glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE8, widths[channel], heights[channel], 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);
                check_error();
-               glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-               check_error();
        }
 
        needs_update = true;
@@ -141,27 +120,16 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix
                glBindTexture(GL_TEXTURE_2D, texture_num[channel]);
                check_error();
 
-               if (needs_update || needs_pbo_recreate) {
+               if (needs_update) {
+                       // Re-upload the texture.
                        // Copy the pixel data into the PBO.
                        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbos[channel]);
                        check_error();
-
-                       if (needs_pbo_recreate) {
-                               // The pitch has changed; we need to reallocate this PBO.
-                               glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, pitch[channel] * heights[channel], NULL, GL_STREAM_DRAW);
-                               check_error();
-                       }
-
-                       void *mapped_pbo = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
-                       memcpy(mapped_pbo, pixel_data[channel], pitch[channel] * heights[channel]);
-
-                       glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB);
+                       glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
                        check_error();
-
-                       // Re-upload the texture from the PBO.
                        glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch[channel]);
                        check_error();
-                       glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], heights[channel], GL_LUMINANCE, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+                       glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], heights[channel], GL_LUMINANCE, GL_UNSIGNED_BYTE, pixel_data[channel]);
                        check_error();
                        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
                        check_error();
@@ -169,11 +137,12 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix
                        check_error();
                        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
                        check_error();
-                       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
-                       check_error();
                }
        }
 
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+       check_error();
+
        // Bind samplers.
        set_uniform_int(glsl_program_num, prefix, "tex_y", *sampler_num + 0);
        set_uniform_int(glsl_program_num, prefix, "tex_cb", *sampler_num + 1);
@@ -181,7 +150,6 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix
 
        *sampler_num += 3;
        needs_update = false;
-       needs_pbo_recreate = false;
 }
 
 std::string YCbCrInput::output_fragment_shader()
index 1225d6e..0f868a8 100644 (file)
@@ -63,10 +63,17 @@ public:
        // this data, you must either call set_pixel_data() again (using the same pointer
        // is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded
        // on subsequent frames.
-       void set_pixel_data(unsigned channel, const unsigned char *pixel_data)
+       //
+       // The data can either be a regular pointer (if pbo==0), or a byte offset
+       // into a PBO. The latter will allow you to start uploading the texture data
+       // asynchronously to the GPU, if you have any CPU-intensive work between the
+       // call to set_pixel_data() and the actual rendering. In either case,
+       // the pointer (and PBO, if set) has to be valid at the time of the render call.
+       void set_pixel_data(unsigned channel, const unsigned char *pixel_data, GLuint pbo = 0)
        {
                assert(channel >= 0 && channel < 3);
                this->pixel_data[channel] = pixel_data;
+               this->pbos[channel] = pbo;
                invalidate_pixel_data();
        }
 
@@ -77,17 +84,14 @@ public:
 
        void set_pitch(unsigned channel, unsigned pitch) {
                assert(channel >= 0 && channel < 3);
-               if (this->pitch[channel] != pitch) {
-                       this->pitch[channel] = pitch;
-                       needs_pbo_recreate = true;
-               }
+               this->pitch[channel] = pitch;
        }
 
 private:
        ImageFormat image_format;
        YCbCrFormat ycbcr_format;
        GLuint pbos[3], texture_num[3];
-       bool needs_update, needs_pbo_recreate, finalized;
+       bool needs_update, finalized;
 
        int needs_mipmaps;
 
index 4ff263a..f9be193 100644 (file)
@@ -7,6 +7,7 @@
 #include "gtest/gtest.h"
 #include "test_util.h"
 #include "ycbcr_input.h"
+#include "util.h"
 
 TEST(YCbCrInput, Simple444) {
        const int width = 1;
@@ -415,3 +416,60 @@ TEST(YCbCrInput, DifferentCbAndCrPositioning) {
        tester.run(out_data, GL_BLUE, COLORSPACE_sRGB, GAMMA_sRGB);
        expect_equal(expected_data_blue, out_data, width, height, 0.01, 0.001);
 }
+
+TEST(YCbCrInput, PBO) {
+       const int width = 1;
+       const int height = 5;
+
+       // Pure-color test inputs, calculated with the formulas in Rec. 601
+       // section 2.5.4.
+       unsigned char data[width * height * 3] = {
+               16, 235, 81, 145, 41,
+               128, 128, 90, 54, 240,
+               128, 128, 240, 34, 110,
+       };
+       float expected_data[4 * width * height] = {
+               0.0, 0.0, 0.0, 1.0,
+               1.0, 1.0, 1.0, 1.0,
+               1.0, 0.0, 0.0, 1.0,
+               0.0, 1.0, 0.0, 1.0,
+               0.0, 0.0, 1.0, 1.0,
+       };
+       float out_data[4 * width * height];
+
+       GLuint pbo;
+       glGenBuffers(1, &pbo);
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
+       glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * 3, data, GL_STREAM_DRAW);
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+
+       EffectChainTester tester(NULL, width, height);
+
+       ImageFormat format;
+       format.color_space = COLORSPACE_sRGB;
+       format.gamma_curve = GAMMA_sRGB;
+
+       YCbCrFormat ycbcr_format;
+       ycbcr_format.luma_coefficients = YCBCR_REC_601;
+       ycbcr_format.full_range = false;
+       ycbcr_format.chroma_subsampling_x = 1;
+       ycbcr_format.chroma_subsampling_y = 1;
+       ycbcr_format.cb_x_position = 0.5f;
+       ycbcr_format.cb_y_position = 0.5f;
+       ycbcr_format.cr_x_position = 0.5f;
+       ycbcr_format.cr_y_position = 0.5f;
+
+       YCbCrInput *input = new YCbCrInput(format, ycbcr_format, width, height);
+       input->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET(0), pbo);
+       input->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(width * height), pbo);
+       input->set_pixel_data(2, (unsigned char *)BUFFER_OFFSET(width * height * 2), pbo);
+       tester.get_chain()->add_input(input);
+
+       tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+       // Y'CbCr isn't 100% accurate (the input values are rounded),
+       // so we need some leeway.
+       expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002);
+
+       glDeleteBuffers(1, &pbo);
+}