Make combine_two_pixels() handle the fact that the GPU has limited subpixel interpola...

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)
diff --git a/Makefile b/Makefile

index edbe09b91e589c063f17576b5cd61399f7d35e46..59a89e765a2424f9af045477054f68f6eb537df7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ TESTS += flat_input_test
  TESTS += ycbcr_input_test
  
  # Core.
-LIB_OBJS=util.o widgets.o effect.o effect_chain.o
+LIB_OBJS=util.o widgets.o effect.o effect_chain.o init.o
  
  # Inputs.
  LIB_OBJS += flat_input.o
diff --git a/blur_effect.cpp b/blur_effect.cpp

index 90d1b68653b9af51c009c1fae48a7e007998da8d..ec61aeab784f8a9f8b3574a29e48141162b6b2f7 100644 (file)
--- a/blur_effect.cpp
+++ b/blur_effect.cpp
@@ -162,7 +162,7 @@ void SingleBlurPassEffect::set_gl_state(GLuint glsl_program_num, const std::stri
                 float w2 = weight[base_pos + 1];
  
                 float offset, total_weight;
-               combine_two_samples(w1, w2, &offset, &total_weight);
+               combine_two_samples(w1, w2, &offset, &total_weight, NULL);
  
                 float x = 0.0f, y = 0.0f;
  
diff --git a/demo.cpp b/demo.cpp

index 552380a9cf971ac984c43ebbf77708ea58bdefb2..2243f2dd702b90419ef491963636c6508c76b669 100644 (file)
--- a/demo.cpp
+++ b/demo.cpp
@@ -18,6 +18,7 @@
  #include <SDL/SDL_opengl.h>
  #include <SDL/SDL_image.h>
  
+#include "init.h"
  #include "effect.h"
  #include "effect_chain.h"
  #include "util.h"
@@ -152,14 +153,16 @@ int main(int argc, char **argv)
         SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
         SDL_SetVideoMode(WIDTH, HEIGHT, 0, SDL_OPENGL);
         SDL_WM_SetCaption("OpenGL window", NULL);
-       
-       // geez 
-       glPixelStorei(GL_PACK_ALIGNMENT, 1);
  
+       init_movit();
+       printf("GPU texture subpixel precision: about %.1f bits\n",
+               log2(1.0f / movit_texel_subpixel_precision));
+       
         unsigned img_w, img_h;
         unsigned char *src_img = load_image("blg_wheels_woman_1.jpg", &img_w, &img_h);
  
         EffectChain chain(WIDTH, HEIGHT);
+       glViewport(0, 0, WIDTH, HEIGHT);
  
         ImageFormat inout_format;
         inout_format.color_space = COLORSPACE_sRGB;
diff --git a/init.cpp b/init.cpp

new file mode 100644 (file)

index 0000000..129ee4c
--- /dev/null
+++ b/init.cpp
@@ -0,0 +1,137 @@
+#include "init.h"
+#include "opengl.h"
+#include "util.h"
+
+bool movit_initialized = false;
+float movit_texel_subpixel_precision;
+
+namespace {
+
+void measure_texel_subpixel_precision()
+{
+       static const unsigned width = 1024;
+
+       // Generate a destination texture to render to, and an FBO.
+       GLuint dst_texnum, fbo;
+
+       glGenTextures(1, &dst_texnum);
+       check_error();
+       glBindTexture(GL_TEXTURE_2D, dst_texnum);
+       check_error();
+       glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, width, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+       check_error();
+
+       glGenFramebuffers(1, &fbo);
+       check_error();
+       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+       check_error();
+       glFramebufferTexture2D(
+               GL_FRAMEBUFFER,
+               GL_COLOR_ATTACHMENT0,
+               GL_TEXTURE_2D,
+               dst_texnum,
+               0);
+       check_error();
+
+       // Now generate a simple texture that's just [0,1].
+       GLuint src_texnum;
+       float texdata[] = { 0, 1 };
+       glGenTextures(1, &dst_texnum);
+       check_error();
+       glBindTexture(GL_TEXTURE_1D, dst_texnum);
+       check_error();
+       glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       check_error();
+       glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       check_error();
+       glTexImage1D(GL_TEXTURE_1D, 0, GL_LUMINANCE16F_ARB, 2, 0, GL_LUMINANCE, GL_FLOAT, texdata);
+       check_error();
+       glEnable(GL_TEXTURE_1D);
+       check_error();
+
+       // Basic state.
+       glDisable(GL_BLEND);
+       check_error();
+       glDisable(GL_DEPTH_TEST);
+       check_error();
+       glDepthMask(GL_FALSE);
+       check_error();
+
+       glViewport(0, 0, width, 1);
+
+       glMatrixMode(GL_PROJECTION);
+       glLoadIdentity();
+       glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+
+       glMatrixMode(GL_MODELVIEW);
+       glLoadIdentity();
+       check_error();
+
+       // Draw the texture stretched over a long quad, interpolating it out.
+       // Note that since the texel center is in (0.5), we need to adjust the
+       // texture coordinates in order not to get long stretches of (1,1,1,...)
+       // at the start and (...,0,0,0) at the end.
+       glBegin(GL_QUADS);
+
+       glTexCoord1f(0.25f);
+       glVertex2f(0.0f, 0.0f);
+
+       glTexCoord1f(0.75f);
+       glVertex2f(1.0f, 0.0f);
+
+       glTexCoord1f(0.75f);
+       glVertex2f(1.0f, 1.0f);
+
+       glTexCoord1f(0.25f);
+       glVertex2f(0.0f, 1.0f);
+
+       glEnd();
+       check_error();
+
+       glDisable(GL_TEXTURE_1D);
+       check_error();
+
+       // Now read the data back and see what the card did.
+       // (We only look at the red channel; the others will surely be the same.)
+       // We assume a linear ramp; anything else will give sort of odd results here.
+       float out_data[width];
+       glReadPixels(0, 0, width, 1, GL_RED, GL_FLOAT, out_data);
+       check_error();
+
+       float biggest_jump = 0.0f;
+       for (unsigned i = 1; i < width; ++i) {
+               assert(out_data[i] >= out_data[i - 1]);
+               biggest_jump = std::max(biggest_jump, out_data[i] - out_data[i - 1]);
+       }
+
+       movit_texel_subpixel_precision = biggest_jump;
+
+       // Clean up.
+       glBindTexture(GL_TEXTURE_1D, 0);
+       check_error();
+       glBindFramebuffer(GL_FRAMEBUFFER, 0);
+       check_error();
+       glDeleteFramebuffers(1, &fbo);
+       check_error();
+       glDeleteTextures(1, &dst_texnum);
+       check_error();
+       glDeleteTextures(1, &src_texnum);
+       check_error();
+}
+
+}  // namespace
+
+void init_movit()
+{
+       if (movit_initialized) {
+               return;
+       }
+
+       // geez 
+       glPixelStorei(GL_PACK_ALIGNMENT, 1);
+       glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+
+       measure_texel_subpixel_precision();
+
+       movit_initialized = true;
+}
diff --git a/init.h b/init.h

new file mode 100644 (file)

index 0000000..e0dbe63
--- /dev/null
+++ b/init.h
@@ -0,0 +1,21 @@
+#ifndef _INIT_H
+#define _INIT_H
+
+// Initialize the library; in particular, will query the GPU for information
+// that is needed by various components. (In time, for instance, we will query
+// about extensions here.)
+void init_movit();
+
+// GPU features. These are not intended for end-user use.
+
+// Whether init_movit() has been called.
+extern bool movit_initialized;
+
+// An estimate on the number of different levels the linear texture interpolation
+// of the GPU can deliver. My Intel card seems to be limited to 2^6 levels here,
+// while a modern nVidia card (GTX 550 Ti) seem to use 2^8.
+//
+// We currently don't bother to test above 2^10.
+extern float movit_texel_subpixel_precision;
+
+#endif  // !defined(_INIT_H)
diff --git a/test_util.cpp b/test_util.cpp

index 6beaba1423bda237d06d07d1c288850dc87b4f61..afeb8486041487878ac666b6916db9efc12d30fd 100644 (file)
--- a/test_util.cpp
+++ b/test_util.cpp
@@ -1,3 +1,4 @@
+#include "init.h"
  #include "test_util.h"
  #include "flat_input.h"
  #include "gtest/gtest.h"
@@ -11,6 +12,8 @@ EffectChainTester::EffectChainTester(const float *data, unsigned width, unsigned
                                       MovitPixelFormat pixel_format, Colorspace color_space, GammaCurve gamma_curve)
         : chain(width, height), width(width), height(height), finalized(false)
  {
+       init_movit();
+
         if (data != NULL) {
                 add_input(data, pixel_format, color_space, gamma_curve);
         }
diff --git a/util.cpp b/util.cpp

index c6517c90fbe8af07733463fa9d41d0831a0a73f0..eeea198a4a074fa357311356d3157e7d00f15d36 100644 (file)
--- a/util.cpp
+++ b/util.cpp
@@ -5,6 +5,7 @@
  #include <math.h>
  #include "util.h"
  #include "opengl.h"
+#include "init.h"
  
  void hsv2rgb(float h, float s, float v, float *r, float *g, float *b)
  {
@@ -125,16 +126,44 @@ std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m)
         return buf;
  }
  
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight)
+void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error)
  {
+       assert(movit_initialized);
         assert(w1 * w2 >= 0.0f);  // Should not have differing signs.
+       float z;  // Just a shorter name for offset.
         if (fabs(w1 + w2) < 1e-6) {
-               *offset = 0.5f;
-               *total_weight = 0.0f;
+               z = 0.5f;
         } else {
-               *offset = w2 / (w1 + w2);
-               *total_weight = w1 + w2;
+               z = w2 / (w1 + w2);
         }
+
+       // Round to the minimum number of bits we have measured earlier.
+       // The card will do this for us anyway, but if we know what the real z
+       // is, we can pick a better total_weight below.
+       z = lrintf(z / movit_texel_subpixel_precision) * movit_texel_subpixel_precision;
+       
+       // Choose total weight w so that we minimize total squared error
+       // for the effective weights:
+       //
+       //   e = (w(1-z) - a)² + (wz - b)²
+       //
+       // Differentiating by w and setting equal to zero:
+       //
+       //   2(w(1-z) - a)(1-z) + 2(wz - b)z = 0
+       //   w(1-z)² - a(1-z) + wz² - bz = 0
+       //   w((1-z)² + z²) = a(1-z) + bz
+       //   w = (a(1-z) + bz) / ((1-z)² + z²)
+       //
+       // If z had infinite precision, this would simply reduce to w = w1 + w2.
+       *total_weight = (w1 * (1 - z) + w2 * z) / (z * z + (1 - z) * (1 - z));
+       *offset = z;
+
+       if (sum_sq_error != NULL) {
+               float err1 = *total_weight * (1 - z) - w1;
+               float err2 = *total_weight * z - w2;
+               *sum_sq_error = err1 * err1 + err2 * err2;
+       }
+
         assert(*offset >= 0.0f);
         assert(*offset <= 1.0f);
  }
diff --git a/util.h b/util.h

index 414c20890027f8762e4a64a4fa2acd3d7428def1..082d1146a7f4c86c2d935ffeaa62307f6575b884 100644 (file)
--- a/util.h
+++ b/util.h
@@ -36,7 +36,12 @@ std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m);
  
  // Calculate where to sample, and with what weight, if one wants to use
  // the GPU's bilinear hardware to sample w1 * x[0] + w2 * x[1].
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight);
+//
+// Note that since the GPU might have limited precision in its linear
+// interpolation, the effective weights might be different from the ones you
+// asked for. sum_sq_error, if not NULL, will contain the sum of the
+// (estimated) squared errors of the two weights.
+void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error);
  
  #ifdef NDEBUG
  #define check_error()
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sun, 28 Oct 2012 16:40:56 +0000 (17:40 +0100)
Makefile		patch \| blob \| history
blur_effect.cpp		patch \| blob \| history
demo.cpp		patch \| blob \| history
init.cpp	[new file with mode: 0644]	patch \| blob
init.h	[new file with mode: 0644]	patch \| blob
test_util.cpp		patch \| blob \| history
util.cpp		patch \| blob \| history
util.h		patch \| blob \| history