From 9cf78e3b5a801b7841133011f74fc7962861705d Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 12 Jan 2014 00:48:41 +0100 Subject: [PATCH] Let the application manage PBOs. Creating a PBO to hold the texture data just before upload (like we did before this patch) is pointless; if that would help at all, the driver could just do it itself. Instead, we expose the PBOs to the application (in a way such that applications that don't care can continue to use the simple interface). This means that a client that needs to do e.g. a fade can optimize its texture upload by a process like this: 1. Decode frame from input A. 2. Upload frame from input A to GPU (by putting it into a PBO). Texture upload starts in the background. 3. Decode frame from input B. 4. Upload frame from input B to GPU. (This time, there will be no parallelism, though.) 5. Render. With correct use of ping-ponging PBOs, it is also possible to overlap step 4/5 with operations from the _next_ frame in the fade. More information can be found in this presentation: http://on-demand.gputechconf.com/gtc/2012/presentations/S0356-GTC2012-Texture-Transfers.pdf --- flat_input.cpp | 39 +++++------------------------ flat_input.h | 14 ++++++++--- flat_input_test.cpp | 38 +++++++++++++++++++++++++++++ ycbcr_input.cpp | 48 ++++++------------------------------ ycbcr_input.h | 16 +++++++----- ycbcr_input_test.cpp | 58 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 131 insertions(+), 82 deletions(-) diff --git a/flat_input.cpp b/flat_input.cpp index 40fb733..28380f4 100644 --- a/flat_input.cpp +++ b/flat_input.cpp @@ -28,10 +28,6 @@ FlatInput::FlatInput(ImageFormat image_format, MovitPixelFormat pixel_format, GL FlatInput::~FlatInput() { - if (pbo != 0) { - glDeleteBuffers(1, &pbo); - check_error(); - } if (texture_num != 0) { glDeleteTextures(1, &texture_num); check_error(); @@ -53,48 +49,27 @@ void FlatInput::finalize() } if (pixel_format == FORMAT_RGB) { format = GL_RGB; - bytes_per_pixel = 3; } else if (pixel_format == FORMAT_RGBA_PREMULTIPLIED_ALPHA || pixel_format == FORMAT_RGBA_POSTMULTIPLIED_ALPHA) { format = GL_RGBA; - bytes_per_pixel = 4; } else if (pixel_format == FORMAT_BGR) { format = GL_BGR; - bytes_per_pixel = 3; } else if (pixel_format == FORMAT_BGRA_PREMULTIPLIED_ALPHA || pixel_format == FORMAT_BGRA_POSTMULTIPLIED_ALPHA) { format = GL_BGRA; - bytes_per_pixel = 4; } else if (pixel_format == FORMAT_GRAYSCALE) { format = GL_LUMINANCE; - bytes_per_pixel = 1; } else { assert(false); } - if (type == GL_FLOAT) { - bytes_per_pixel *= sizeof(float); - } - // Create PBO to hold the texture holding the input image, and then the texture itself. - glGenBuffers(1, &pbo); - check_error(); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); - check_error(); - glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, pitch * height * bytes_per_pixel, NULL, GL_STREAM_DRAW); - check_error(); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); - check_error(); - + // Create the texture itself. glGenTextures(1, &texture_num); check_error(); glBindTexture(GL_TEXTURE_2D, texture_num); check_error(); - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - check_error(); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, needs_mipmaps ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); check_error(); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch); - check_error(); // Intel/Mesa seems to have a broken glGenerateMipmap() for non-FBO textures, so do it here // instead of calling glGenerateMipmap(). glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, needs_mipmaps ? GL_TRUE : GL_FALSE); @@ -103,6 +78,8 @@ void FlatInput::finalize() check_error(); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); check_error(); + glBindTexture(GL_TEXTURE_2D, 0); + check_error(); needs_update = true; finalized = true; @@ -116,18 +93,14 @@ void FlatInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix, check_error(); if (needs_update) { - // Copy the pixel data into the PBO. + // Re-upload the texture. glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); check_error(); - void *mapped_pbo = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY); - memcpy(mapped_pbo, pixel_data, pitch * height * bytes_per_pixel); - glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); check_error(); - - // Re-upload the texture from the PBO. glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch); check_error(); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, BUFFER_OFFSET(0)); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, pixel_data); check_error(); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); check_error(); diff --git a/flat_input.h b/flat_input.h index 4728fb0..9fcaf53 100644 --- a/flat_input.h +++ b/flat_input.h @@ -61,17 +61,25 @@ public: // this data, you must either call set_pixel_data() again (using the same pointer // is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded // on subsequent frames. - void set_pixel_data(const unsigned char *pixel_data) + // + // The data can either be a regular pointer (if pbo==0), or a byte offset + // into a PBO. The latter will allow you to start uploading the texture data + // asynchronously to the GPU, if you have any CPU-intensive work between the + // call to set_pixel_data() and the actual rendering. In either case, + // the pointer (and PBO, if set) has to be valid at the time of the render call. + void set_pixel_data(const unsigned char *pixel_data, GLuint pbo = 0) { assert(this->type == GL_UNSIGNED_BYTE); this->pixel_data = pixel_data; + this->pbo = pbo; invalidate_pixel_data(); } - void set_pixel_data(const float *pixel_data) + void set_pixel_data(const float *pixel_data, GLuint pbo = 0) { assert(this->type == GL_FLOAT); this->pixel_data = pixel_data; + this->pbo = pbo; invalidate_pixel_data(); } @@ -92,7 +100,7 @@ private: GLuint pbo, texture_num; bool needs_update, finalized; int output_linear_gamma, needs_mipmaps; - unsigned width, height, pitch, bytes_per_pixel; + unsigned width, height, pitch; const void *pixel_data; }; diff --git a/flat_input_test.cpp b/flat_input_test.cpp index c8df483..a297623 100644 --- a/flat_input_test.cpp +++ b/flat_input_test.cpp @@ -1,11 +1,13 @@ // Unit tests for FlatInput. +#include #include #include "effect_chain.h" #include "flat_input.h" #include "gtest/gtest.h" #include "test_util.h" +#include "util.h" TEST(FlatInput, SimpleGrayscale) { const int size = 4; @@ -226,3 +228,39 @@ TEST(FlatInput, UpdatedData) { tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR); expect_equal(data, out_data, width, height); } + +TEST(FlatInput, PBO) { + const int width = 3; + const int height = 2; + + float data[width * height] = { + 0.0, 1.0, 0.5, + 0.5, 0.5, 0.2, + }; + float expected_data[4 * width * height] = { + 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, + 0.5, 0.5, 0.5, 1.0, 0.5, 0.5, 0.5, 1.0, 0.2, 0.2, 0.2, 1.0, + }; + float out_data[4 * width * height]; + + GLuint pbo; + glGenBuffers(1, &pbo); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); + glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * sizeof(float), data, GL_STREAM_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); + + EffectChainTester tester(NULL, width, height); + + ImageFormat format; + format.color_space = COLORSPACE_sRGB; + format.gamma_curve = GAMMA_LINEAR; + + FlatInput *input = new FlatInput(format, FORMAT_GRAYSCALE, GL_FLOAT, width, height); + input->set_pixel_data((float *)BUFFER_OFFSET(0), pbo); + tester.get_chain()->add_input(input); + + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR); + expect_equal(expected_data, out_data, 4 * width, height); + + glDeleteBuffers(1, &pbo); +} diff --git a/ycbcr_input.cpp b/ycbcr_input.cpp index d1d849a..e089e51 100644 --- a/ycbcr_input.cpp +++ b/ycbcr_input.cpp @@ -62,7 +62,6 @@ YCbCrInput::YCbCrInput(const ImageFormat &image_format, : image_format(image_format), ycbcr_format(ycbcr_format), needs_update(false), - needs_pbo_recreate(false), finalized(false), needs_mipmaps(false), width(width), @@ -88,10 +87,6 @@ YCbCrInput::YCbCrInput(const ImageFormat &image_format, YCbCrInput::~YCbCrInput() { - if (pbos[0] != 0) { - glDeleteBuffers(3, pbos); - check_error(); - } if (texture_num[0] != 0) { glDeleteTextures(3, texture_num); check_error(); @@ -100,33 +95,17 @@ YCbCrInput::~YCbCrInput() void YCbCrInput::finalize() { - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - check_error(); - - // Create PBOs to hold the textures holding the input image, and then the texture itself. - glGenBuffers(3, pbos); - check_error(); + // Create the textures themselves. glGenTextures(3, texture_num); check_error(); for (unsigned channel = 0; channel < 3; ++channel) { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbos[channel]); - check_error(); - glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, pitch[channel] * heights[channel], NULL, GL_STREAM_DRAW); - check_error(); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); - check_error(); - glBindTexture(GL_TEXTURE_2D, texture_num[channel]); check_error(); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); check_error(); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch[channel]); - check_error(); glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE8, widths[channel], heights[channel], 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL); check_error(); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - check_error(); } needs_update = true; @@ -141,27 +120,16 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix glBindTexture(GL_TEXTURE_2D, texture_num[channel]); check_error(); - if (needs_update || needs_pbo_recreate) { + if (needs_update) { + // Re-upload the texture. // Copy the pixel data into the PBO. glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbos[channel]); check_error(); - - if (needs_pbo_recreate) { - // The pitch has changed; we need to reallocate this PBO. - glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, pitch[channel] * heights[channel], NULL, GL_STREAM_DRAW); - check_error(); - } - - void *mapped_pbo = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY); - memcpy(mapped_pbo, pixel_data[channel], pitch[channel] * heights[channel]); - - glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); check_error(); - - // Re-upload the texture from the PBO. glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch[channel]); check_error(); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], heights[channel], GL_LUMINANCE, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0)); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], heights[channel], GL_LUMINANCE, GL_UNSIGNED_BYTE, pixel_data[channel]); check_error(); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); check_error(); @@ -169,11 +137,12 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix check_error(); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); check_error(); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); - check_error(); } } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); + check_error(); + // Bind samplers. set_uniform_int(glsl_program_num, prefix, "tex_y", *sampler_num + 0); set_uniform_int(glsl_program_num, prefix, "tex_cb", *sampler_num + 1); @@ -181,7 +150,6 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix *sampler_num += 3; needs_update = false; - needs_pbo_recreate = false; } std::string YCbCrInput::output_fragment_shader() diff --git a/ycbcr_input.h b/ycbcr_input.h index 1225d6e..0f868a8 100644 --- a/ycbcr_input.h +++ b/ycbcr_input.h @@ -63,10 +63,17 @@ public: // this data, you must either call set_pixel_data() again (using the same pointer // is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded // on subsequent frames. - void set_pixel_data(unsigned channel, const unsigned char *pixel_data) + // + // The data can either be a regular pointer (if pbo==0), or a byte offset + // into a PBO. The latter will allow you to start uploading the texture data + // asynchronously to the GPU, if you have any CPU-intensive work between the + // call to set_pixel_data() and the actual rendering. In either case, + // the pointer (and PBO, if set) has to be valid at the time of the render call. + void set_pixel_data(unsigned channel, const unsigned char *pixel_data, GLuint pbo = 0) { assert(channel >= 0 && channel < 3); this->pixel_data[channel] = pixel_data; + this->pbos[channel] = pbo; invalidate_pixel_data(); } @@ -77,17 +84,14 @@ public: void set_pitch(unsigned channel, unsigned pitch) { assert(channel >= 0 && channel < 3); - if (this->pitch[channel] != pitch) { - this->pitch[channel] = pitch; - needs_pbo_recreate = true; - } + this->pitch[channel] = pitch; } private: ImageFormat image_format; YCbCrFormat ycbcr_format; GLuint pbos[3], texture_num[3]; - bool needs_update, needs_pbo_recreate, finalized; + bool needs_update, finalized; int needs_mipmaps; diff --git a/ycbcr_input_test.cpp b/ycbcr_input_test.cpp index 4ff263a..f9be193 100644 --- a/ycbcr_input_test.cpp +++ b/ycbcr_input_test.cpp @@ -7,6 +7,7 @@ #include "gtest/gtest.h" #include "test_util.h" #include "ycbcr_input.h" +#include "util.h" TEST(YCbCrInput, Simple444) { const int width = 1; @@ -415,3 +416,60 @@ TEST(YCbCrInput, DifferentCbAndCrPositioning) { tester.run(out_data, GL_BLUE, COLORSPACE_sRGB, GAMMA_sRGB); expect_equal(expected_data_blue, out_data, width, height, 0.01, 0.001); } + +TEST(YCbCrInput, PBO) { + const int width = 1; + const int height = 5; + + // Pure-color test inputs, calculated with the formulas in Rec. 601 + // section 2.5.4. + unsigned char data[width * height * 3] = { + 16, 235, 81, 145, 41, + 128, 128, 90, 54, 240, + 128, 128, 240, 34, 110, + }; + float expected_data[4 * width * height] = { + 0.0, 0.0, 0.0, 1.0, + 1.0, 1.0, 1.0, 1.0, + 1.0, 0.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + 0.0, 0.0, 1.0, 1.0, + }; + float out_data[4 * width * height]; + + GLuint pbo; + glGenBuffers(1, &pbo); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); + glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * 3, data, GL_STREAM_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); + + EffectChainTester tester(NULL, width, height); + + ImageFormat format; + format.color_space = COLORSPACE_sRGB; + format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.luma_coefficients = YCBCR_REC_601; + ycbcr_format.full_range = false; + ycbcr_format.chroma_subsampling_x = 1; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.5f; + ycbcr_format.cb_y_position = 0.5f; + ycbcr_format.cr_x_position = 0.5f; + ycbcr_format.cr_y_position = 0.5f; + + YCbCrInput *input = new YCbCrInput(format, ycbcr_format, width, height); + input->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET(0), pbo); + input->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(width * height), pbo); + input->set_pixel_data(2, (unsigned char *)BUFFER_OFFSET(width * height * 2), pbo); + tester.get_chain()->add_input(input); + + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB); + + // Y'CbCr isn't 100% accurate (the input values are rounded), + // so we need some leeway. + expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002); + + glDeleteBuffers(1, &pbo); +} -- 2.39.2