Do our own fp16 conversion in ResampleEffect.

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Sun, 9 Mar 2014 17:22:18 +0000 (18:22 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Sun, 9 Mar 2014 17:25:37 +0000 (18:25 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 9 Mar 2014 17:22:18 +0000 (18:22 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 9 Mar 2014 17:25:37 +0000 (18:25 +0100)
diff --git a/resample_effect.cpp b/resample_effect.cpp

index dbc57883897e6fc4ba3488661c6376c8d186d480..4af60de7d3c47c6089a560f0d23c2ce62594cd81 100644 (file)
--- a/resample_effect.cpp
+++ b/resample_effect.cpp
@@ -10,6 +10,7 @@
  
  #include "effect_chain.h"
  #include "effect_util.h"
  
  #include "effect_chain.h"
  #include "effect_util.h"
+#include "fp16.h"
  #include "resample_effect.h"
  #include "util.h"
  
  #include "resample_effect.h"
  #include "util.h"
  
@@ -142,7 +143,7 @@ void ResampleEffect::inform_input_size(unsigned input_num, unsigned width, unsig
         input_height = height;
         update_size();
  }
         input_height = height;
         update_size();
  }
-               
+
  void ResampleEffect::update_size()
  {
         bool ok = true;
  void ResampleEffect::update_size()
  {
         bool ok = true;
@@ -325,26 +326,36 @@ void SingleResamplePassEffect::update_texture(GLuint glsl_program_num, const str
  
         // Now that we know the right width, actually combine the samples.
         float *bilinear_weights = new float[dst_samples * src_bilinear_samples * 2];
  
         // Now that we know the right width, actually combine the samples.
         float *bilinear_weights = new float[dst_samples * src_bilinear_samples * 2];
+       fp16_int_t *bilinear_weights_fp16 = new fp16_int_t[dst_samples * src_bilinear_samples * 2];
         for (unsigned y = 0; y < dst_samples; ++y) {
         for (unsigned y = 0; y < dst_samples; ++y) {
+               float *bilinear_weights_ptr = bilinear_weights + (y * src_bilinear_samples) * 2;
+               fp16_int_t *bilinear_weights_fp16_ptr = bilinear_weights_fp16 + (y * src_bilinear_samples) * 2;
                 unsigned num_samples_saved = combine_samples(
                         weights + (y * src_samples) * 2,
                 unsigned num_samples_saved = combine_samples(
                         weights + (y * src_samples) * 2,
-                       bilinear_weights + (y * src_bilinear_samples) * 2,
+                       bilinear_weights_ptr,
                         src_samples,
                         src_samples - src_bilinear_samples);
                 assert(int(src_samples) - int(num_samples_saved) == src_bilinear_samples);
  
                         src_samples,
                         src_samples - src_bilinear_samples);
                 assert(int(src_samples) - int(num_samples_saved) == src_bilinear_samples);
  
+               // Convert to fp16.
+               for (int i = 0; i < src_bilinear_samples; ++i) {
+                       bilinear_weights_fp16_ptr[i * 2 + 0] = fp64_to_fp16(bilinear_weights_ptr[i * 2 + 0]);
+                       bilinear_weights_fp16_ptr[i * 2 + 1] = fp64_to_fp16(bilinear_weights_ptr[i * 2 + 1]);
+               }
+
                 // Normalize so that the sum becomes one. Note that we do it twice;
                 // this sometimes helps a tiny little bit when we have many samples.
                 for (int normalize_pass = 0; normalize_pass < 2; ++normalize_pass) {
                 // Normalize so that the sum becomes one. Note that we do it twice;
                 // this sometimes helps a tiny little bit when we have many samples.
                 for (int normalize_pass = 0; normalize_pass < 2; ++normalize_pass) {
-                       float sum = 0.0;
+                       double sum = 0.0;
                         for (int i = 0; i < src_bilinear_samples; ++i) {
                         for (int i = 0; i < src_bilinear_samples; ++i) {
-                               sum += bilinear_weights[(y * src_bilinear_samples + i) * 2 + 0];
+                               sum += fp16_to_fp64(bilinear_weights_fp16_ptr[i * 2 + 0]);
                         }
                         for (int i = 0; i < src_bilinear_samples; ++i) {
                         }
                         for (int i = 0; i < src_bilinear_samples; ++i) {
-                               bilinear_weights[(y * src_bilinear_samples + i) * 2 + 0] /= sum;
+                               bilinear_weights_fp16_ptr[i * 2 + 0] = fp64_to_fp16(
+                                       fp16_to_fp64(bilinear_weights_fp16_ptr[i * 2 + 0]) / sum);
                         }
                 }
                         }
                 }
-       }       
+       }
  
         // Encode as a two-component texture. Note the GL_REPEAT.
         glActiveTexture(GL_TEXTURE0 + *sampler_num);
  
         // Encode as a two-component texture. Note the GL_REPEAT.
         glActiveTexture(GL_TEXTURE0 + *sampler_num);
@@ -357,11 +368,12 @@ void SingleResamplePassEffect::update_texture(GLuint glsl_program_num, const str
         check_error();
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
         check_error();
         check_error();
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
         check_error();
-       glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16F, src_bilinear_samples, dst_samples, 0, GL_RG, GL_FLOAT, bilinear_weights);
+       glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16F, src_bilinear_samples, dst_samples, 0, GL_RG, GL_HALF_FLOAT, bilinear_weights_fp16);
         check_error();
  
         delete[] weights;
         delete[] bilinear_weights;
         check_error();
  
         delete[] weights;
         delete[] bilinear_weights;
+       delete[] bilinear_weights_fp16;
  }
  
  void SingleResamplePassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
  }
  
  void SingleResamplePassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
diff --git a/resample_effect_test.cpp b/resample_effect_test.cpp

index 971e694347e28d8e61760b13abb5027341413a83..211e6d123e65d93dbcc075f60ea8371b1eaa5736 100644 (file)
--- a/resample_effect_test.cpp
+++ b/resample_effect_test.cpp
@@ -203,12 +203,10 @@ TEST(ResampleEffectTest, HeavyResampleGetsSumRight) {
         ASSERT_TRUE(resample_effect->set_int("height", dheight));
         tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
  
         ASSERT_TRUE(resample_effect->set_int("height", dheight));
         tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
  
-       // Require that we are within 10-bit accuracy. Note that this is for
-       // one pass only; some cards that don't have correct fp32 -> fp16
-       // rounding in the intermediate framebuffers will go outside this after
-       // a 2D resize. This limit is tight enough that it will be good enough
-       // for 8-bit accuracy, though.
-       expect_equal(expected_data, out_data, dwidth, dheight, 0.5 / 1023.0);
+       // Require that we are within 10-bit accuracy. Note that this limit is for
+       // one pass only, but the limit is tight enough that it should be good enough
+       // for 10-bit accuracy even after two passes.
+       expect_equal(expected_data, out_data, dwidth, dheight, 0.1 / 1023.0);
  }
  
  }  // namespace movit
  }
  
  }  // namespace movit
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sun, 9 Mar 2014 17:22:18 +0000 (18:22 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sun, 9 Mar 2014 17:25:37 +0000 (18:25 +0100)
resample_effect.cpp		patch \| blob \| history
resample_effect_test.cpp		patch \| blob \| history