TESTS += ycbcr_input_test
# Core.
-LIB_OBJS=util.o widgets.o effect.o effect_chain.o
+LIB_OBJS=util.o widgets.o effect.o effect_chain.o init.o
# Inputs.
LIB_OBJS += flat_input.o
float w2 = weight[base_pos + 1];
float offset, total_weight;
- combine_two_samples(w1, w2, &offset, &total_weight);
+ combine_two_samples(w1, w2, &offset, &total_weight, NULL);
float x = 0.0f, y = 0.0f;
#include <SDL/SDL_opengl.h>
#include <SDL/SDL_image.h>
+#include "init.h"
#include "effect.h"
#include "effect_chain.h"
#include "util.h"
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
SDL_SetVideoMode(WIDTH, HEIGHT, 0, SDL_OPENGL);
SDL_WM_SetCaption("OpenGL window", NULL);
-
- // geez
- glPixelStorei(GL_PACK_ALIGNMENT, 1);
+ init_movit();
+ printf("GPU texture subpixel precision: about %.1f bits\n",
+ log2(1.0f / movit_texel_subpixel_precision));
+
unsigned img_w, img_h;
unsigned char *src_img = load_image("blg_wheels_woman_1.jpg", &img_w, &img_h);
EffectChain chain(WIDTH, HEIGHT);
+ glViewport(0, 0, WIDTH, HEIGHT);
ImageFormat inout_format;
inout_format.color_space = COLORSPACE_sRGB;
--- /dev/null
+#include "init.h"
+#include "opengl.h"
+#include "util.h"
+
+bool movit_initialized = false;
+float movit_texel_subpixel_precision;
+
+namespace {
+
+void measure_texel_subpixel_precision()
+{
+ static const unsigned width = 1024;
+
+ // Generate a destination texture to render to, and an FBO.
+ GLuint dst_texnum, fbo;
+
+ glGenTextures(1, &dst_texnum);
+ check_error();
+ glBindTexture(GL_TEXTURE_2D, dst_texnum);
+ check_error();
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, width, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ check_error();
+
+ glGenFramebuffers(1, &fbo);
+ check_error();
+ glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+ check_error();
+ glFramebufferTexture2D(
+ GL_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0,
+ GL_TEXTURE_2D,
+ dst_texnum,
+ 0);
+ check_error();
+
+ // Now generate a simple texture that's just [0,1].
+ GLuint src_texnum;
+ float texdata[] = { 0, 1 };
+ glGenTextures(1, &dst_texnum);
+ check_error();
+ glBindTexture(GL_TEXTURE_1D, dst_texnum);
+ check_error();
+ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ check_error();
+ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ check_error();
+ glTexImage1D(GL_TEXTURE_1D, 0, GL_LUMINANCE16F_ARB, 2, 0, GL_LUMINANCE, GL_FLOAT, texdata);
+ check_error();
+ glEnable(GL_TEXTURE_1D);
+ check_error();
+
+ // Basic state.
+ glDisable(GL_BLEND);
+ check_error();
+ glDisable(GL_DEPTH_TEST);
+ check_error();
+ glDepthMask(GL_FALSE);
+ check_error();
+
+ glViewport(0, 0, width, 1);
+
+ glMatrixMode(GL_PROJECTION);
+ glLoadIdentity();
+ glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+
+ glMatrixMode(GL_MODELVIEW);
+ glLoadIdentity();
+ check_error();
+
+ // Draw the texture stretched over a long quad, interpolating it out.
+ // Note that since the texel center is in (0.5), we need to adjust the
+ // texture coordinates in order not to get long stretches of (1,1,1,...)
+ // at the start and (...,0,0,0) at the end.
+ glBegin(GL_QUADS);
+
+ glTexCoord1f(0.25f);
+ glVertex2f(0.0f, 0.0f);
+
+ glTexCoord1f(0.75f);
+ glVertex2f(1.0f, 0.0f);
+
+ glTexCoord1f(0.75f);
+ glVertex2f(1.0f, 1.0f);
+
+ glTexCoord1f(0.25f);
+ glVertex2f(0.0f, 1.0f);
+
+ glEnd();
+ check_error();
+
+ glDisable(GL_TEXTURE_1D);
+ check_error();
+
+ // Now read the data back and see what the card did.
+ // (We only look at the red channel; the others will surely be the same.)
+ // We assume a linear ramp; anything else will give sort of odd results here.
+ float out_data[width];
+ glReadPixels(0, 0, width, 1, GL_RED, GL_FLOAT, out_data);
+ check_error();
+
+ float biggest_jump = 0.0f;
+ for (unsigned i = 1; i < width; ++i) {
+ assert(out_data[i] >= out_data[i - 1]);
+ biggest_jump = std::max(biggest_jump, out_data[i] - out_data[i - 1]);
+ }
+
+ movit_texel_subpixel_precision = biggest_jump;
+
+ // Clean up.
+ glBindTexture(GL_TEXTURE_1D, 0);
+ check_error();
+ glBindFramebuffer(GL_FRAMEBUFFER, 0);
+ check_error();
+ glDeleteFramebuffers(1, &fbo);
+ check_error();
+ glDeleteTextures(1, &dst_texnum);
+ check_error();
+ glDeleteTextures(1, &src_texnum);
+ check_error();
+}
+
+} // namespace
+
+void init_movit()
+{
+ if (movit_initialized) {
+ return;
+ }
+
+ // geez
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+
+ measure_texel_subpixel_precision();
+
+ movit_initialized = true;
+}
--- /dev/null
+#ifndef _INIT_H
+#define _INIT_H
+
+// Initialize the library; in particular, will query the GPU for information
+// that is needed by various components. (In time, for instance, we will query
+// about extensions here.)
+void init_movit();
+
+// GPU features. These are not intended for end-user use.
+
+// Whether init_movit() has been called.
+extern bool movit_initialized;
+
+// An estimate on the number of different levels the linear texture interpolation
+// of the GPU can deliver. My Intel card seems to be limited to 2^6 levels here,
+// while a modern nVidia card (GTX 550 Ti) seem to use 2^8.
+//
+// We currently don't bother to test above 2^10.
+extern float movit_texel_subpixel_precision;
+
+#endif // !defined(_INIT_H)
+#include "init.h"
#include "test_util.h"
#include "flat_input.h"
#include "gtest/gtest.h"
MovitPixelFormat pixel_format, Colorspace color_space, GammaCurve gamma_curve)
: chain(width, height), width(width), height(height), finalized(false)
{
+ init_movit();
+
if (data != NULL) {
add_input(data, pixel_format, color_space, gamma_curve);
}
#include <math.h>
#include "util.h"
#include "opengl.h"
+#include "init.h"
void hsv2rgb(float h, float s, float v, float *r, float *g, float *b)
{
return buf;
}
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight)
+void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error)
{
+ assert(movit_initialized);
assert(w1 * w2 >= 0.0f); // Should not have differing signs.
+ float z; // Just a shorter name for offset.
if (fabs(w1 + w2) < 1e-6) {
- *offset = 0.5f;
- *total_weight = 0.0f;
+ z = 0.5f;
} else {
- *offset = w2 / (w1 + w2);
- *total_weight = w1 + w2;
+ z = w2 / (w1 + w2);
}
+
+ // Round to the minimum number of bits we have measured earlier.
+ // The card will do this for us anyway, but if we know what the real z
+ // is, we can pick a better total_weight below.
+ z = lrintf(z / movit_texel_subpixel_precision) * movit_texel_subpixel_precision;
+
+ // Choose total weight w so that we minimize total squared error
+ // for the effective weights:
+ //
+ // e = (w(1-z) - a)² + (wz - b)²
+ //
+ // Differentiating by w and setting equal to zero:
+ //
+ // 2(w(1-z) - a)(1-z) + 2(wz - b)z = 0
+ // w(1-z)² - a(1-z) + wz² - bz = 0
+ // w((1-z)² + z²) = a(1-z) + bz
+ // w = (a(1-z) + bz) / ((1-z)² + z²)
+ //
+ // If z had infinite precision, this would simply reduce to w = w1 + w2.
+ *total_weight = (w1 * (1 - z) + w2 * z) / (z * z + (1 - z) * (1 - z));
+ *offset = z;
+
+ if (sum_sq_error != NULL) {
+ float err1 = *total_weight * (1 - z) - w1;
+ float err2 = *total_weight * z - w2;
+ *sum_sq_error = err1 * err1 + err2 * err2;
+ }
+
assert(*offset >= 0.0f);
assert(*offset <= 1.0f);
}
// Calculate where to sample, and with what weight, if one wants to use
// the GPU's bilinear hardware to sample w1 * x[0] + w2 * x[1].
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight);
+//
+// Note that since the GPU might have limited precision in its linear
+// interpolation, the effective weights might be different from the ones you
+// asked for. sum_sq_error, if not NULL, will contain the sum of the
+// (estimated) squared errors of the two weights.
+void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error);
#ifdef NDEBUG
#define check_error()