Upload the texture via PBOs. Slight speedup on nVidia, rather neutral on Intel.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 2 Oct 2012 10:36:39 +0000 (12:36 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 2 Oct 2012 10:36:39 +0000 (12:36 +0200)
effect_chain.cpp
effect_chain.h
main.cpp
util.h

index a90f5f3..9eee1f6 100644 (file)
@@ -185,21 +185,8 @@ void EffectChain::finalize()
        glLinkProgram(glsl_program_num);
        check_error();
 
-       finalized = true;
-}
-
-void EffectChain::render_to_screen(unsigned char *src)
-{
-       assert(finalized);
-
-       check_error();
-       glUseProgram(glsl_program_num);
-       check_error();
-
-       glActiveTexture(GL_TEXTURE0);
-       glBindTexture(GL_TEXTURE_2D, SOURCE_IMAGE);
-
-       GLenum format, internal_format;
+       // Translate the format to OpenGL's enums.
+       GLenum internal_format;
        if (use_srgb_texture_format) {
                internal_format = GL_SRGB8;
        } else {
@@ -207,22 +194,65 @@ void EffectChain::render_to_screen(unsigned char *src)
        }
        if (input_format.pixel_format == FORMAT_RGB) {
                format = GL_RGB;
+               bytes_per_pixel = 3;
        } else if (input_format.pixel_format == FORMAT_RGBA) {
                format = GL_RGBA;
+               bytes_per_pixel = 4;
        } else if (input_format.pixel_format == FORMAT_BGR) {
                format = GL_BGR;
+               bytes_per_pixel = 3;
        } else if (input_format.pixel_format == FORMAT_BGRA) {
                format = GL_BGRA;
+               bytes_per_pixel = 4;
        } else {
                assert(false);
        }
 
-       static bool first = true;
-       if (first) {
-               glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, src);
-       } else {
-               glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, src);
-       }
+       // Create PBO to hold the texture, and then the texture itself.
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 2);
+       check_error();
+       glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * bytes_per_pixel, NULL, GL_STREAM_DRAW);
+       check_error();
+
+       void *mapped_pbo = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
+       memset(mapped_pbo, 0, width * height * bytes_per_pixel);
+       glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB);
+       
+       glBindTexture(GL_TEXTURE_2D, SOURCE_IMAGE);
+       check_error();
+       glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+       check_error();
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+       check_error();
+
+       finalized = true;
+}
+
+void EffectChain::render_to_screen(unsigned char *src)
+{
+       assert(finalized);
+
+       // Copy the pixel data into the PBO.
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 2);
+       check_error();
+       void *mapped_pbo = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
+       memcpy(mapped_pbo, src, width * height * bytes_per_pixel);
+       glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB);
+       check_error();
+
+       // Re-upload the texture from the PBO.
+       glActiveTexture(GL_TEXTURE0);
+       check_error();
+       glBindTexture(GL_TEXTURE_2D, SOURCE_IMAGE);
+       check_error();
+       glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+       check_error();
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+       check_error();
+
+       glUseProgram(glsl_program_num);
+       check_error();
+
        check_error();
        glUniform1i(glGetUniformLocation(glsl_program_num, "input_tex"), 0);
 
index c480df1..3fa62ee 100644 (file)
@@ -55,7 +55,8 @@ private:
 
        bool use_srgb_texture_format;
 
-       int glsl_program_num;
+       GLint glsl_program_num;
+       GLenum format, bytes_per_pixel;
        bool finalized;
 
        // Used during the building of the effect chain.
index 5c01cdb..287e19a 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -3,7 +3,6 @@
 
 #define WIDTH 1280
 #define HEIGHT 720
-#define BUFFER_OFFSET(i) ((char *)NULL + (i))
 
 #include <string.h>
 #include <math.h>
@@ -261,8 +260,12 @@ int main(int argc, char **argv)
                vignette_effect->set_float("inner_radius", inner_radius);
                chain.render_to_screen(src_img);
                
+               glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 1);
+               check_error();
                glReadPixels(0, 0, WIDTH, HEIGHT, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
                check_error();
+               glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
+               check_error();
 
                draw_hsv_wheel(0.0f, lift_rad, lift_theta, lift_v);
                draw_hsv_wheel(0.2f, gamma_rad, gamma_theta, gamma_v);
@@ -274,6 +277,8 @@ int main(int argc, char **argv)
                SDL_GL_SwapBuffers();
                check_error();
 
+               glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 1);
+               check_error();
                unsigned char *screenbuf = (unsigned char *)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY);
                check_error();
                if (screenshot) {
@@ -285,6 +290,8 @@ int main(int argc, char **argv)
                }
                glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB);
                check_error();
+               glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
+               check_error();
 
 #if 1
 #if _POSIX_C_SOURCE >= 199309L
diff --git a/util.h b/util.h
index 1c2f6a6..dcb84a0 100644 (file)
--- a/util.h
+++ b/util.h
@@ -8,6 +8,8 @@
 
 #include <GL/gl.h>
 
+#define BUFFER_OFFSET(i) ((char *)NULL + (i))
+
 // assumes h in [0, 2pi> or [-pi, pi>
 void hsv2rgb(float h, float s, float v, float *r, float *g, float *b);