This not only fixes issues with poor downconversion on ATI, but also
allows us to normalize while being aware of fp16 roundoff issues.
Seems to about cut the error in half in the HeavyResampleGetsSumRight
test, which as far as I can see would take us up to 10-bit accuracy.
#include "effect_chain.h"
#include "effect_util.h"
#include "effect_chain.h"
#include "effect_util.h"
#include "resample_effect.h"
#include "util.h"
#include "resample_effect.h"
#include "util.h"
input_height = height;
update_size();
}
input_height = height;
update_size();
}
void ResampleEffect::update_size()
{
bool ok = true;
void ResampleEffect::update_size()
{
bool ok = true;
// Now that we know the right width, actually combine the samples.
float *bilinear_weights = new float[dst_samples * src_bilinear_samples * 2];
// Now that we know the right width, actually combine the samples.
float *bilinear_weights = new float[dst_samples * src_bilinear_samples * 2];
+ fp16_int_t *bilinear_weights_fp16 = new fp16_int_t[dst_samples * src_bilinear_samples * 2];
for (unsigned y = 0; y < dst_samples; ++y) {
for (unsigned y = 0; y < dst_samples; ++y) {
+ float *bilinear_weights_ptr = bilinear_weights + (y * src_bilinear_samples) * 2;
+ fp16_int_t *bilinear_weights_fp16_ptr = bilinear_weights_fp16 + (y * src_bilinear_samples) * 2;
unsigned num_samples_saved = combine_samples(
weights + (y * src_samples) * 2,
unsigned num_samples_saved = combine_samples(
weights + (y * src_samples) * 2,
- bilinear_weights + (y * src_bilinear_samples) * 2,
src_samples,
src_samples - src_bilinear_samples);
assert(int(src_samples) - int(num_samples_saved) == src_bilinear_samples);
src_samples,
src_samples - src_bilinear_samples);
assert(int(src_samples) - int(num_samples_saved) == src_bilinear_samples);
+ // Convert to fp16.
+ for (int i = 0; i < src_bilinear_samples; ++i) {
+ bilinear_weights_fp16_ptr[i * 2 + 0] = fp64_to_fp16(bilinear_weights_ptr[i * 2 + 0]);
+ bilinear_weights_fp16_ptr[i * 2 + 1] = fp64_to_fp16(bilinear_weights_ptr[i * 2 + 1]);
+ }
+
// Normalize so that the sum becomes one. Note that we do it twice;
// this sometimes helps a tiny little bit when we have many samples.
for (int normalize_pass = 0; normalize_pass < 2; ++normalize_pass) {
// Normalize so that the sum becomes one. Note that we do it twice;
// this sometimes helps a tiny little bit when we have many samples.
for (int normalize_pass = 0; normalize_pass < 2; ++normalize_pass) {
for (int i = 0; i < src_bilinear_samples; ++i) {
for (int i = 0; i < src_bilinear_samples; ++i) {
- sum += bilinear_weights[(y * src_bilinear_samples + i) * 2 + 0];
+ sum += fp16_to_fp64(bilinear_weights_fp16_ptr[i * 2 + 0]);
}
for (int i = 0; i < src_bilinear_samples; ++i) {
}
for (int i = 0; i < src_bilinear_samples; ++i) {
- bilinear_weights[(y * src_bilinear_samples + i) * 2 + 0] /= sum;
+ bilinear_weights_fp16_ptr[i * 2 + 0] = fp64_to_fp16(
+ fp16_to_fp64(bilinear_weights_fp16_ptr[i * 2 + 0]) / sum);
// Encode as a two-component texture. Note the GL_REPEAT.
glActiveTexture(GL_TEXTURE0 + *sampler_num);
// Encode as a two-component texture. Note the GL_REPEAT.
glActiveTexture(GL_TEXTURE0 + *sampler_num);
check_error();
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
check_error();
check_error();
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
check_error();
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16F, src_bilinear_samples, dst_samples, 0, GL_RG, GL_FLOAT, bilinear_weights);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16F, src_bilinear_samples, dst_samples, 0, GL_RG, GL_HALF_FLOAT, bilinear_weights_fp16);
check_error();
delete[] weights;
delete[] bilinear_weights;
check_error();
delete[] weights;
delete[] bilinear_weights;
+ delete[] bilinear_weights_fp16;
}
void SingleResamplePassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
}
void SingleResamplePassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
ASSERT_TRUE(resample_effect->set_int("height", dheight));
tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
ASSERT_TRUE(resample_effect->set_int("height", dheight));
tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
- // Require that we are within 10-bit accuracy. Note that this is for
- // one pass only; some cards that don't have correct fp32 -> fp16
- // rounding in the intermediate framebuffers will go outside this after
- // a 2D resize. This limit is tight enough that it will be good enough
- // for 8-bit accuracy, though.
- expect_equal(expected_data, out_data, dwidth, dheight, 0.5 / 1023.0);
+ // Require that we are within 10-bit accuracy. Note that this limit is for
+ // one pass only, but the limit is tight enough that it should be good enough
+ // for 10-bit accuracy even after two passes.
+ expect_equal(expected_data, out_data, dwidth, dheight, 0.1 / 1023.0);