X-Git-Url: https://git.sesse.net/?p=movit;a=blobdiff_plain;f=util.h;h=5e8cc6d721da56616728bbffdafd599baefc5554;hp=a89d3a2adf4be608e296b2357e0b79f6104b3151;hb=b4ec29a251e118f463ce940ffaf0945188bf6894;hpb=42f0fd5ccbb3560a76d55f3e725416a5e0f93523 diff --git a/util.h b/util.h index a89d3a2..5e8cc6d 100644 --- a/util.h +++ b/util.h @@ -8,8 +8,10 @@ #include #include #include +#include "defs.h" +#include "fp16.h" -#define BUFFER_OFFSET(i) ((char *)NULL + (i)) +#define BUFFER_OFFSET(i) ((char *)nullptr + (i)) namespace movit { @@ -38,6 +40,11 @@ void print_3x3_matrix(const Eigen::Matrix3d &m); // Output a GLSL 3x3 matrix declaration. std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m); +// Output GLSL scalar, 2-length and 3-length vector declarations. +std::string output_glsl_float(const std::string &name, float x); +std::string output_glsl_vec2(const std::string &name, float x, float y); +std::string output_glsl_vec3(const std::string &name, float x, float y, float z); + // Calculate a / b, rounding up. Does not handle overflow correctly. unsigned div_round_up(unsigned a, unsigned b); @@ -49,12 +56,16 @@ enum CombineRoundingBehavior { // Calculate where to sample, and with what weight, if one wants to use // the GPU's bilinear hardware to sample w1 * x[pos1] + w2 * x[pos2], // where pos1 and pos2 must be normalized coordinates describing neighboring -// pixels in the mipmap level at which you sample, and the total number of -// pixels (in given mipmap level) is . +// texels in the mipmap level at which you sample. is the +// number of distinct accessible subtexels in the given mipmap level, +// calculated by num_texels / movit_texel_subpixel_precision. It is a float +// for performance reasons, even though it is expected to be a whole number. +// is simply its inverse (1/x). is +// (pos2-pos1) and is 1/(pos2-pos1). // // Note that since the GPU might have limited precision in its linear // interpolation, the effective weights might be different from the ones you -// asked for. sum_sq_error, if not NULL, will contain the sum of the +// asked for. sum_sq_error, if not nullptr, will contain the sum of the // (estimated) squared errors of the two weights. // // The answer, in "offset", comes as a normalized coordinate, @@ -62,8 +73,51 @@ enum CombineRoundingBehavior { // is COMBINE_ROUND_TO_FP16, the coordinate is assumed to be stored as a // rounded fp16 value. This enables more precise calculation of total_weight // and sum_sq_error. -void combine_two_samples(float w1, float w2, float pos1, float pos2, unsigned size, CombineRoundingBehavior rounding_behavior, - float *offset, float *total_weight, float *sum_sq_error); +template +void combine_two_samples(float w1, float w2, float pos1, float pos1_pos2_diff, float inv_pos1_pos2_diff, float num_subtexels, float inv_num_subtexels, + DestFloat *offset, DestFloat *total_weight, float *sum_sq_error) +{ + assert(w1 * w2 >= 0.0f); // Should not have differing signs. + float z; // Normalized 0..1 between pos1 and pos2. + if (fabs(w1 + w2) < 1e-6) { + z = 0.5f; + } else { + z = w2 / (w1 + w2); + } + + // Round to the desired precision. Note that this might take z outside the 0..1 range. + *offset = from_fp32(pos1 + z * pos1_pos2_diff); + z = (to_fp32(*offset) - pos1) * inv_pos1_pos2_diff; + + // Round to the minimum number of bits we have measured earlier. + // The card will do this for us anyway, but if we know what the real z + // is, we can pick a better total_weight below. + z = lrintf(z * num_subtexels) * inv_num_subtexels; + + // Choose total weight w so that we minimize total squared error + // for the effective weights: + // + // e = (w(1-z) - a)² + (wz - b)² + // + // Differentiating by w and setting equal to zero: + // + // 2(w(1-z) - a)(1-z) + 2(wz - b)z = 0 + // w(1-z)² - a(1-z) + wz² - bz = 0 + // w((1-z)² + z²) = a(1-z) + bz + // w = (a(1-z) + bz) / ((1-z)² + z²) + // + // If z had infinite precision, this would simply reduce to w = w1 + w2. + *total_weight = from_fp32((w1 + z * (w2 - w1)) / (z * z + (1 - z) * (1 - z))); + + if (sum_sq_error != nullptr) { + float err1 = to_fp32(*total_weight) * (1 - z) - w1; + float err2 = to_fp32(*total_weight) * z - w2; + *sum_sq_error = err1 * err1 + err2 * err2; + } +} + +// Create a VBO with the given data. Returns the VBO number. +GLuint generate_vbo(GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data); // Create a VBO with the given data, and bind it to the vertex attribute // with name . Returns the VBO number. @@ -81,12 +135,15 @@ unsigned next_power_of_two(unsigned v); // back into anything you intend to pass into OpenGL. void *get_gl_context_identifier(); +// Used in the check_error() macro, below. +void abort_gl_error(GLenum err, const char *filename, int line) DOES_NOT_RETURN; + } // namespace movit #ifdef NDEBUG #define check_error() #else -#define check_error() { int err = glGetError(); if (err != GL_NO_ERROR) { printf("GL error 0x%x at %s:%d\n", err, __FILE__, __LINE__); abort(); } } +#define check_error() { GLenum err = glGetError(); if (err != GL_NO_ERROR) { movit::abort_gl_error(err, __FILE__, __LINE__); } } #endif // CHECK() is like assert(), but retains any side effects no matter the compilation mode.