]> git.sesse.net Git - movit/commitdiff
Make combine_two_pixels() handle the fact that the GPU has limited subpixel interpola...
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)
Currently we only use it to somewhat improve the total_weight estimate
to reduce the error slightly. However, the function can now also return
the total error estimate, which will be useful for knowing when we can
and cannot combine weights with reasonable error.

Makefile
blur_effect.cpp
demo.cpp
init.cpp [new file with mode: 0644]
init.h [new file with mode: 0644]
test_util.cpp
util.cpp
util.h

index edbe09b91e589c063f17576b5cd61399f7d35e46..59a89e765a2424f9af045477054f68f6eb537df7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ TESTS += flat_input_test
 TESTS += ycbcr_input_test
 
 # Core.
-LIB_OBJS=util.o widgets.o effect.o effect_chain.o
+LIB_OBJS=util.o widgets.o effect.o effect_chain.o init.o
 
 # Inputs.
 LIB_OBJS += flat_input.o
index 90d1b68653b9af51c009c1fae48a7e007998da8d..ec61aeab784f8a9f8b3574a29e48141162b6b2f7 100644 (file)
@@ -162,7 +162,7 @@ void SingleBlurPassEffect::set_gl_state(GLuint glsl_program_num, const std::stri
                float w2 = weight[base_pos + 1];
 
                float offset, total_weight;
-               combine_two_samples(w1, w2, &offset, &total_weight);
+               combine_two_samples(w1, w2, &offset, &total_weight, NULL);
 
                float x = 0.0f, y = 0.0f;
 
index 552380a9cf971ac984c43ebbf77708ea58bdefb2..2243f2dd702b90419ef491963636c6508c76b669 100644 (file)
--- a/demo.cpp
+++ b/demo.cpp
@@ -18,6 +18,7 @@
 #include <SDL/SDL_opengl.h>
 #include <SDL/SDL_image.h>
 
+#include "init.h"
 #include "effect.h"
 #include "effect_chain.h"
 #include "util.h"
@@ -152,14 +153,16 @@ int main(int argc, char **argv)
        SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
        SDL_SetVideoMode(WIDTH, HEIGHT, 0, SDL_OPENGL);
        SDL_WM_SetCaption("OpenGL window", NULL);
-       
-       // geez 
-       glPixelStorei(GL_PACK_ALIGNMENT, 1);
 
+       init_movit();
+       printf("GPU texture subpixel precision: about %.1f bits\n",
+               log2(1.0f / movit_texel_subpixel_precision));
+       
        unsigned img_w, img_h;
        unsigned char *src_img = load_image("blg_wheels_woman_1.jpg", &img_w, &img_h);
 
        EffectChain chain(WIDTH, HEIGHT);
+       glViewport(0, 0, WIDTH, HEIGHT);
 
        ImageFormat inout_format;
        inout_format.color_space = COLORSPACE_sRGB;
diff --git a/init.cpp b/init.cpp
new file mode 100644 (file)
index 0000000..129ee4c
--- /dev/null
+++ b/init.cpp
@@ -0,0 +1,137 @@
+#include "init.h"
+#include "opengl.h"
+#include "util.h"
+
+bool movit_initialized = false;
+float movit_texel_subpixel_precision;
+
+namespace {
+
+void measure_texel_subpixel_precision()
+{
+       static const unsigned width = 1024;
+
+       // Generate a destination texture to render to, and an FBO.
+       GLuint dst_texnum, fbo;
+
+       glGenTextures(1, &dst_texnum);
+       check_error();
+       glBindTexture(GL_TEXTURE_2D, dst_texnum);
+       check_error();
+       glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, width, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+       check_error();
+
+       glGenFramebuffers(1, &fbo);
+       check_error();
+       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+       check_error();
+       glFramebufferTexture2D(
+               GL_FRAMEBUFFER,
+               GL_COLOR_ATTACHMENT0,
+               GL_TEXTURE_2D,
+               dst_texnum,
+               0);
+       check_error();
+
+       // Now generate a simple texture that's just [0,1].
+       GLuint src_texnum;
+       float texdata[] = { 0, 1 };
+       glGenTextures(1, &dst_texnum);
+       check_error();
+       glBindTexture(GL_TEXTURE_1D, dst_texnum);
+       check_error();
+       glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       check_error();
+       glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       check_error();
+       glTexImage1D(GL_TEXTURE_1D, 0, GL_LUMINANCE16F_ARB, 2, 0, GL_LUMINANCE, GL_FLOAT, texdata);
+       check_error();
+       glEnable(GL_TEXTURE_1D);
+       check_error();
+
+       // Basic state.
+       glDisable(GL_BLEND);
+       check_error();
+       glDisable(GL_DEPTH_TEST);
+       check_error();
+       glDepthMask(GL_FALSE);
+       check_error();
+
+       glViewport(0, 0, width, 1);
+
+       glMatrixMode(GL_PROJECTION);
+       glLoadIdentity();
+       glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+
+       glMatrixMode(GL_MODELVIEW);
+       glLoadIdentity();
+       check_error();
+
+       // Draw the texture stretched over a long quad, interpolating it out.
+       // Note that since the texel center is in (0.5), we need to adjust the
+       // texture coordinates in order not to get long stretches of (1,1,1,...)
+       // at the start and (...,0,0,0) at the end.
+       glBegin(GL_QUADS);
+
+       glTexCoord1f(0.25f);
+       glVertex2f(0.0f, 0.0f);
+
+       glTexCoord1f(0.75f);
+       glVertex2f(1.0f, 0.0f);
+
+       glTexCoord1f(0.75f);
+       glVertex2f(1.0f, 1.0f);
+
+       glTexCoord1f(0.25f);
+       glVertex2f(0.0f, 1.0f);
+
+       glEnd();
+       check_error();
+
+       glDisable(GL_TEXTURE_1D);
+       check_error();
+
+       // Now read the data back and see what the card did.
+       // (We only look at the red channel; the others will surely be the same.)
+       // We assume a linear ramp; anything else will give sort of odd results here.
+       float out_data[width];
+       glReadPixels(0, 0, width, 1, GL_RED, GL_FLOAT, out_data);
+       check_error();
+
+       float biggest_jump = 0.0f;
+       for (unsigned i = 1; i < width; ++i) {
+               assert(out_data[i] >= out_data[i - 1]);
+               biggest_jump = std::max(biggest_jump, out_data[i] - out_data[i - 1]);
+       }
+
+       movit_texel_subpixel_precision = biggest_jump;
+
+       // Clean up.
+       glBindTexture(GL_TEXTURE_1D, 0);
+       check_error();
+       glBindFramebuffer(GL_FRAMEBUFFER, 0);
+       check_error();
+       glDeleteFramebuffers(1, &fbo);
+       check_error();
+       glDeleteTextures(1, &dst_texnum);
+       check_error();
+       glDeleteTextures(1, &src_texnum);
+       check_error();
+}
+
+}  // namespace
+
+void init_movit()
+{
+       if (movit_initialized) {
+               return;
+       }
+
+       // geez 
+       glPixelStorei(GL_PACK_ALIGNMENT, 1);
+       glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+
+       measure_texel_subpixel_precision();
+
+       movit_initialized = true;
+}
diff --git a/init.h b/init.h
new file mode 100644 (file)
index 0000000..e0dbe63
--- /dev/null
+++ b/init.h
@@ -0,0 +1,21 @@
+#ifndef _INIT_H
+#define _INIT_H
+
+// Initialize the library; in particular, will query the GPU for information
+// that is needed by various components. (In time, for instance, we will query
+// about extensions here.)
+void init_movit();
+
+// GPU features. These are not intended for end-user use.
+
+// Whether init_movit() has been called.
+extern bool movit_initialized;
+
+// An estimate on the number of different levels the linear texture interpolation
+// of the GPU can deliver. My Intel card seems to be limited to 2^6 levels here,
+// while a modern nVidia card (GTX 550 Ti) seem to use 2^8.
+//
+// We currently don't bother to test above 2^10.
+extern float movit_texel_subpixel_precision;
+
+#endif  // !defined(_INIT_H)
index 6beaba1423bda237d06d07d1c288850dc87b4f61..afeb8486041487878ac666b6916db9efc12d30fd 100644 (file)
@@ -1,3 +1,4 @@
+#include "init.h"
 #include "test_util.h"
 #include "flat_input.h"
 #include "gtest/gtest.h"
@@ -11,6 +12,8 @@ EffectChainTester::EffectChainTester(const float *data, unsigned width, unsigned
                                      MovitPixelFormat pixel_format, Colorspace color_space, GammaCurve gamma_curve)
        : chain(width, height), width(width), height(height), finalized(false)
 {
+       init_movit();
+
        if (data != NULL) {
                add_input(data, pixel_format, color_space, gamma_curve);
        }
index c6517c90fbe8af07733463fa9d41d0831a0a73f0..eeea198a4a074fa357311356d3157e7d00f15d36 100644 (file)
--- a/util.cpp
+++ b/util.cpp
@@ -5,6 +5,7 @@
 #include <math.h>
 #include "util.h"
 #include "opengl.h"
+#include "init.h"
 
 void hsv2rgb(float h, float s, float v, float *r, float *g, float *b)
 {
@@ -125,16 +126,44 @@ std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m)
        return buf;
 }
 
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight)
+void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error)
 {
+       assert(movit_initialized);
        assert(w1 * w2 >= 0.0f);  // Should not have differing signs.
+       float z;  // Just a shorter name for offset.
        if (fabs(w1 + w2) < 1e-6) {
-               *offset = 0.5f;
-               *total_weight = 0.0f;
+               z = 0.5f;
        } else {
-               *offset = w2 / (w1 + w2);
-               *total_weight = w1 + w2;
+               z = w2 / (w1 + w2);
        }
+
+       // Round to the minimum number of bits we have measured earlier.
+       // The card will do this for us anyway, but if we know what the real z
+       // is, we can pick a better total_weight below.
+       z = lrintf(z / movit_texel_subpixel_precision) * movit_texel_subpixel_precision;
+       
+       // Choose total weight w so that we minimize total squared error
+       // for the effective weights:
+       //
+       //   e = (w(1-z) - a)² + (wz - b)²
+       //
+       // Differentiating by w and setting equal to zero:
+       //
+       //   2(w(1-z) - a)(1-z) + 2(wz - b)z = 0
+       //   w(1-z)² - a(1-z) + wz² - bz = 0
+       //   w((1-z)² + z²) = a(1-z) + bz
+       //   w = (a(1-z) + bz) / ((1-z)² + z²)
+       //
+       // If z had infinite precision, this would simply reduce to w = w1 + w2.
+       *total_weight = (w1 * (1 - z) + w2 * z) / (z * z + (1 - z) * (1 - z));
+       *offset = z;
+
+       if (sum_sq_error != NULL) {
+               float err1 = *total_weight * (1 - z) - w1;
+               float err2 = *total_weight * z - w2;
+               *sum_sq_error = err1 * err1 + err2 * err2;
+       }
+
        assert(*offset >= 0.0f);
        assert(*offset <= 1.0f);
 }
diff --git a/util.h b/util.h
index 414c20890027f8762e4a64a4fa2acd3d7428def1..082d1146a7f4c86c2d935ffeaa62307f6575b884 100644 (file)
--- a/util.h
+++ b/util.h
@@ -36,7 +36,12 @@ std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m);
 
 // Calculate where to sample, and with what weight, if one wants to use
 // the GPU's bilinear hardware to sample w1 * x[0] + w2 * x[1].
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight);
+//
+// Note that since the GPU might have limited precision in its linear
+// interpolation, the effective weights might be different from the ones you
+// asked for. sum_sq_error, if not NULL, will contain the sum of the
+// (estimated) squared errors of the two weights.
+void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error);
 
 #ifdef NDEBUG
 #define check_error()