unsigned base_pos = i * 2 - 1;
float w1 = weight[base_pos];
float w2 = weight[base_pos + 1];
-
- float offset, total_weight;
- combine_two_samples(w1, w2, &offset, &total_weight, NULL);
-
+ int size;
if (direction == HORIZONTAL) {
- samples[2 * i + 0] = (base_pos + offset) / (float)width;
+ size = width;
} else if (direction == VERTICAL) {
- samples[2 * i + 0] = (base_pos + offset) / (float)height;
+ size = height;
} else {
assert(false);
}
+ float pos1 = base_pos / (float)size;
+ float pos2 = (base_pos + 1) / (float)size;
+ float pos, total_weight;
+ combine_two_samples(w1, w2, pos1, pos2, size, COMBINE_DO_NOT_ROUND, &pos, &total_weight, NULL);
+
+ samples[2 * i + 0] = pos;
samples[2 * i + 1] = total_weight;
}
return a;
}
-unsigned combine_samples(Tap<float> *src, Tap<float> *dst, unsigned num_src_samples, unsigned max_samples_saved)
+unsigned combine_samples(Tap<float> *src, Tap<float> *dst, unsigned src_size, unsigned num_src_samples, unsigned max_samples_saved)
{
unsigned num_samples_saved = 0;
for (unsigned i = 0, j = 0; i < num_src_samples; ++i, ++j) {
float pos2 = src[i + 1].pos;
assert(pos2 > pos1);
- float offset, total_weight, sum_sq_error;
- combine_two_samples(w1, w2, &offset, &total_weight, &sum_sq_error);
+ float pos, total_weight, sum_sq_error;
+ combine_two_samples(w1, w2, pos1, pos2, src_size, COMBINE_ROUND_TO_FP16, &pos, &total_weight, &sum_sq_error);
// If the interpolation error is larger than that of about sqrt(2) of
// a level at 8-bit precision, don't combine. (You'd think 1.0 was enough,
// OK, we can combine this and the next sample.
if (dst != NULL) {
dst[j].weight = total_weight;
- dst[j].pos = pos1 + offset * (pos2 - pos1);
+ dst[j].pos = pos;
}
++i; // Skip the next sample.
// The greedy strategy for combining samples is optimal.
src_bilinear_samples = 0;
for (unsigned y = 0; y < dst_samples; ++y) {
- unsigned num_samples_saved = combine_samples(weights + y * src_samples, NULL, src_samples, UINT_MAX);
+ unsigned num_samples_saved = combine_samples(weights + y * src_samples, NULL, src_size, src_samples, UINT_MAX);
src_bilinear_samples = max<int>(src_bilinear_samples, src_samples - num_samples_saved);
}
unsigned num_samples_saved = combine_samples(
weights + y * src_samples,
bilinear_weights_ptr,
+ src_size,
src_samples,
src_samples - src_bilinear_samples);
assert(int(src_samples) - int(num_samples_saved) == src_bilinear_samples);
// the texel center right (everything is nicely symmetric).
// The approximate magnitudes have been checked against ImageMagick.
float expected_data[size * size] = {
- 0.0045, -0.0067, -0.0598, -0.0067, 0.0045,
- -0.0067, 0.0099, 0.0886, 0.0099, -0.0067,
- -0.0598, 0.0886, 0.7930, 0.0886, -0.0598,
- -0.0067, 0.0099, 0.0886, 0.0099, -0.0067,
- 0.0045, -0.0067, -0.0598, -0.0067, 0.0045,
+ 0.0046, -0.0068, -0.0611, -0.0068, 0.0047,
+ -0.0068, 0.0100, 0.0895, 0.0100, -0.0068,
+ -0.0603, 0.0892, 0.7993, 0.0895, -0.0611,
+ -0.0067, 0.0100, 0.0892, 0.0100, -0.0068,
+ 0.0045, -0.0067, -0.0603, -0.0068, 0.0046,
};
float data[size * size * 4], out_data[size * size];
expect_equal(expected_data, out_data, width, height);
}
+TEST(ResampleEffectTest, Precision) {
+ const int size = 2048;
+ const int offset = 5;
+
+ // Deliberately put the data of interest very close to the right,
+ // where texture coordinates are farther from 0 and thus less precise.
+ float data[size] = {0};
+ data[size - offset] = 1.0f;
+ float expected_data[size * 2] = {0};
+ for (int x = 0; x < size * 2; ++x) {
+ expected_data[x] = lanczos((x - (size - 2 * offset + 1) + 0.5f) * 0.5f, 3.0f);
+ }
+ float out_data[size * 2];
+
+ EffectChainTester tester(data, size * 2, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+ Effect *resample_effect = tester.get_chain()->add_effect(new ResampleEffect());
+ ASSERT_TRUE(resample_effect->set_int("width", size * 2));
+ ASSERT_TRUE(resample_effect->set_int("height", 1));
+ ASSERT_TRUE(resample_effect->set_float("zoom_x", 2.0f));
+ tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+
+ expect_equal(expected_data, out_data, size, 1);
+}
+
} // namespace movit
#include <string.h>
#include <Eigen/Core>
+#include "fp16.h"
#include "init.h"
#include "util.h"
return buf;
}
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error)
+void combine_two_samples(float w1, float w2, float pos1, float pos2, unsigned size, CombineRoundingBehavior rounding_behavior,
+ float *offset, float *total_weight, float *sum_sq_error)
{
assert(movit_initialized);
assert(w1 * w2 >= 0.0f); // Should not have differing signs.
- float z; // Just a shorter name for offset.
+ float z; // Normalized 0..1 between pos1 and pos2.
if (fabs(w1 + w2) < 1e-6) {
z = 0.5f;
} else {
z = w2 / (w1 + w2);
}
+ *offset = pos1 + z * (pos2 - pos1);
+ if (rounding_behavior == COMBINE_ROUND_TO_FP16) {
+ // Round to fp16. Note that this might take z outside the 0..1 range.
+ *offset = fp16_to_fp64(fp64_to_fp16(*offset));
+ z = (z - pos1) / (pos2 - pos1);
+ } else {
+ assert(rounding_behavior == COMBINE_DO_NOT_ROUND);
+ }
+
// Round to the minimum number of bits we have measured earlier.
// The card will do this for us anyway, but if we know what the real z
// is, we can pick a better total_weight below.
- z = lrintf(z / movit_texel_subpixel_precision) * movit_texel_subpixel_precision;
+ z *= size; // Move to pixel coordinates,
+ z = lrintf(z / movit_texel_subpixel_precision) * movit_texel_subpixel_precision; // Round.
+ z /= size; // Move back to normalized coordinates.
// Choose total weight w so that we minimize total squared error
// for the effective weights:
//
// If z had infinite precision, this would simply reduce to w = w1 + w2.
*total_weight = (w1 * (1 - z) + w2 * z) / (z * z + (1 - z) * (1 - z));
- *offset = z;
if (sum_sq_error != NULL) {
float err1 = *total_weight * (1 - z) - w1;
float err2 = *total_weight * z - w2;
*sum_sq_error = err1 * err1 + err2 * err2;
}
-
- assert(*offset >= 0.0f);
- assert(*offset <= 1.0f);
}
GLuint fill_vertex_attribute(GLuint glsl_program_num, const string &attribute_name, GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data)
// Calculate a / b, rounding up. Does not handle overflow correctly.
unsigned div_round_up(unsigned a, unsigned b);
+enum CombineRoundingBehavior {
+ COMBINE_DO_NOT_ROUND = 0,
+ COMBINE_ROUND_TO_FP16 = 1,
+};
+
// Calculate where to sample, and with what weight, if one wants to use
-// the GPU's bilinear hardware to sample w1 * x[0] + w2 * x[1].
+// the GPU's bilinear hardware to sample w1 * x[pos1] + w2 * x[pos2],
+// where pos1 and pos2 must be normalized coordinates describing neighboring
+// pixels in the mipmap level at which you sample, and the total number of
+// pixels (in given mipmap level) is <size>.
//
// Note that since the GPU might have limited precision in its linear
// interpolation, the effective weights might be different from the ones you
// asked for. sum_sq_error, if not NULL, will contain the sum of the
// (estimated) squared errors of the two weights.
-void combine_two_samples(float w1, float w2, float *offset, float *total_weight, float *sum_sq_error);
+//
+// The answer, in "offset", comes as a normalized coordinate,
+// so if e.g. w2 = 0, you have simply offset = pos1. If <rounding_behavior>
+// is COMBINE_ROUND_TO_FP16, the coordinate is assumed to be stored as a
+// rounded fp16 value. This enables more precise calculation of total_weight
+// and sum_sq_error.
+void combine_two_samples(float w1, float w2, float pos1, float pos2, unsigned size, CombineRoundingBehavior rounding_behavior,
+ float *offset, float *total_weight, float *sum_sq_error);
// Create a VBO with the given data, and bind it to the vertex attribute
// with name <attribute_name>. Returns the VBO number.