Do our own fp16 conversion in ResampleEffect.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 9 Mar 2014 17:22:18 +0000 (18:22 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 9 Mar 2014 17:25:37 +0000 (18:25 +0100)
This not only fixes issues with poor downconversion on ATI, but also
allows us to normalize while being aware of fp16 roundoff issues.
Seems to about cut the error in half in the HeavyResampleGetsSumRight
test, which as far as I can see would take us up to 10-bit accuracy.

resample_effect.cpp
resample_effect_test.cpp

index dbc5788..4af60de 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "effect_chain.h"
 #include "effect_util.h"
+#include "fp16.h"
 #include "resample_effect.h"
 #include "util.h"
 
@@ -142,7 +143,7 @@ void ResampleEffect::inform_input_size(unsigned input_num, unsigned width, unsig
        input_height = height;
        update_size();
 }
-               
+
 void ResampleEffect::update_size()
 {
        bool ok = true;
@@ -325,26 +326,36 @@ void SingleResamplePassEffect::update_texture(GLuint glsl_program_num, const str
 
        // Now that we know the right width, actually combine the samples.
        float *bilinear_weights = new float[dst_samples * src_bilinear_samples * 2];
+       fp16_int_t *bilinear_weights_fp16 = new fp16_int_t[dst_samples * src_bilinear_samples * 2];
        for (unsigned y = 0; y < dst_samples; ++y) {
+               float *bilinear_weights_ptr = bilinear_weights + (y * src_bilinear_samples) * 2;
+               fp16_int_t *bilinear_weights_fp16_ptr = bilinear_weights_fp16 + (y * src_bilinear_samples) * 2;
                unsigned num_samples_saved = combine_samples(
                        weights + (y * src_samples) * 2,
-                       bilinear_weights + (y * src_bilinear_samples) * 2,
+                       bilinear_weights_ptr,
                        src_samples,
                        src_samples - src_bilinear_samples);
                assert(int(src_samples) - int(num_samples_saved) == src_bilinear_samples);
 
+               // Convert to fp16.
+               for (int i = 0; i < src_bilinear_samples; ++i) {
+                       bilinear_weights_fp16_ptr[i * 2 + 0] = fp64_to_fp16(bilinear_weights_ptr[i * 2 + 0]);
+                       bilinear_weights_fp16_ptr[i * 2 + 1] = fp64_to_fp16(bilinear_weights_ptr[i * 2 + 1]);
+               }
+
                // Normalize so that the sum becomes one. Note that we do it twice;
                // this sometimes helps a tiny little bit when we have many samples.
                for (int normalize_pass = 0; normalize_pass < 2; ++normalize_pass) {
-                       float sum = 0.0;
+                       double sum = 0.0;
                        for (int i = 0; i < src_bilinear_samples; ++i) {
-                               sum += bilinear_weights[(y * src_bilinear_samples + i) * 2 + 0];
+                               sum += fp16_to_fp64(bilinear_weights_fp16_ptr[i * 2 + 0]);
                        }
                        for (int i = 0; i < src_bilinear_samples; ++i) {
-                               bilinear_weights[(y * src_bilinear_samples + i) * 2 + 0] /= sum;
+                               bilinear_weights_fp16_ptr[i * 2 + 0] = fp64_to_fp16(
+                                       fp16_to_fp64(bilinear_weights_fp16_ptr[i * 2 + 0]) / sum);
                        }
                }
-       }       
+       }
 
        // Encode as a two-component texture. Note the GL_REPEAT.
        glActiveTexture(GL_TEXTURE0 + *sampler_num);
@@ -357,11 +368,12 @@ void SingleResamplePassEffect::update_texture(GLuint glsl_program_num, const str
        check_error();
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
        check_error();
-       glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16F, src_bilinear_samples, dst_samples, 0, GL_RG, GL_FLOAT, bilinear_weights);
+       glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16F, src_bilinear_samples, dst_samples, 0, GL_RG, GL_HALF_FLOAT, bilinear_weights_fp16);
        check_error();
 
        delete[] weights;
        delete[] bilinear_weights;
+       delete[] bilinear_weights_fp16;
 }
 
 void SingleResamplePassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
index 971e694..211e6d1 100644 (file)
@@ -203,12 +203,10 @@ TEST(ResampleEffectTest, HeavyResampleGetsSumRight) {
        ASSERT_TRUE(resample_effect->set_int("height", dheight));
        tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
 
-       // Require that we are within 10-bit accuracy. Note that this is for
-       // one pass only; some cards that don't have correct fp32 -> fp16
-       // rounding in the intermediate framebuffers will go outside this after
-       // a 2D resize. This limit is tight enough that it will be good enough
-       // for 8-bit accuracy, though.
-       expect_equal(expected_data, out_data, dwidth, dheight, 0.5 / 1023.0);
+       // Require that we are within 10-bit accuracy. Note that this limit is for
+       // one pass only, but the limit is tight enough that it should be good enough
+       // for 10-bit accuracy even after two passes.
+       expect_equal(expected_data, out_data, dwidth, dheight, 0.1 / 1023.0);
 }
 
 }  // namespace movit