Implement GammaCompressionEffect using ALU ops instead of texture lookups.

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Thu, 9 Jan 2014 23:51:52 +0000 (00:51 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Thu, 9 Jan 2014 23:51:52 +0000 (00:51 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Thu, 9 Jan 2014 23:51:52 +0000 (00:51 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Thu, 9 Jan 2014 23:51:52 +0000 (00:51 +0100)
diff --git a/gamma_compression_effect.cpp b/gamma_compression_effect.cpp

index 5581fca32c38c14e15fa7905460761a68d271ddb..b1a16a6f33af1523dcbcd81eaccb77d31f583b0f 100644 (file)
--- a/gamma_compression_effect.cpp
+++ b/gamma_compression_effect.cpp
@@ -3,14 +3,13 @@
  #include <assert.h>
  
  #include "gamma_compression_effect.h"
+#include "effect_util.h"
  #include "util.h"
  
  GammaCompressionEffect::GammaCompressionEffect()
         : destination_curve(GAMMA_LINEAR)
  {
         register_int("destination_curve", (int *)&destination_curve);
-       memset(compression_curve, 0, sizeof(compression_curve));
-       register_1d_texture("compression_curve_tex", compression_curve, COMPRESSION_CURVE_SIZE);
  }
  
  std::string GammaCompressionEffect::output_fragment_shader()
@@ -18,39 +17,96 @@ std::string GammaCompressionEffect::output_fragment_shader()
         if (destination_curve == GAMMA_LINEAR) {
                 return read_file("identity.frag");
         }
-       if (destination_curve == GAMMA_sRGB) {
-               for (unsigned i = 0; i < COMPRESSION_CURVE_SIZE; ++i) {
-                       float x = i / (float)(COMPRESSION_CURVE_SIZE - 1);
-                       if (x < 0.0031308f) {
-                               compression_curve[i] = 12.92f * x;
-                       } else {
-                               compression_curve[i] = 1.055f * pow(x, 1.0f / 2.4f) - 0.055f;
-                       }
-               }
-               invalidate_1d_texture("compression_curve_tex");
-               return read_file("gamma_compression_effect.frag");
-       }
-       if (destination_curve == GAMMA_REC_709 ||  // And Rec. 601, and 10-bit Rec. 2020.
+       if (destination_curve == GAMMA_sRGB ||
+           destination_curve == GAMMA_REC_709 ||  // Also includes Rec. 601, and 10-bit Rec. 2020.
             destination_curve == GAMMA_REC_2020_12_BIT) {
-               // Rec. 2020, page 3.
-               float alpha, beta;
-               if (destination_curve == GAMMA_REC_2020_12_BIT) {
-                       alpha = 1.0993f;
-                       beta = 0.0181f;
-               } else {
-                       alpha = 1.099f;
-                       beta = 0.018f;
-               }
-               for (unsigned i = 0; i < COMPRESSION_CURVE_SIZE; ++i) {
-                       float x = i / (float)(COMPRESSION_CURVE_SIZE - 1);
-                       if (x < beta) {
-                               compression_curve[i] = 4.5f * x;
-                       } else {
-                               compression_curve[i] = alpha * pow(x, 0.45f) - (alpha - 1.0f);
-                       }
-               }
-               invalidate_1d_texture("compression_curve_tex");
                 return read_file("gamma_compression_effect.frag");
         }
         assert(false);
  }
+
+void GammaCompressionEffect::set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num)
+{
+       Effect::set_gl_state(glsl_program_num, prefix, sampler_num);
+
+       // See GammaExpansionEffect for more details about the approximations in use;
+       // we will primarily deal with the differences here.
+       //
+       // Like in expansion, we have a piecewise curve that for very low values
+       // (up to some β) are linear. Above β, we have a power curve that looks
+       // like this:
+       //
+       //   y = ɑ x^ɣ - (ɑ - 1)
+       //
+       // Like in expansion, we want to approximate this by some minimax polynomial
+       // in the range β..1. However, in this case, ɣ is typically around 0.4, and
+       // x^0.4 is actually very hard to approximate accurately in this range.
+       // We do a little trick by instead asking for a polynomial of s=sqrt(x),
+       // which means we instead need something like s^0.8, which is much easier.
+       // This warps the input space a bit as seen by the minimax algorithm,
+       // but since we are optimizing for _maximum_ error and not _average_,
+       // we should not add any extra weighting factors.
+       //
+       // However, since we have problems reaching the desired accuracy (~25%
+       // of a pixel level), especially for sRGB, we modify w(x) from
+       // GammaExpansionEffect to remove the special handling of the area
+       // around β; it is not really as useful when the next step is just a
+       // dither and round anyway. We keep it around 1, though, since that
+       // seems to hurt less.
+       //
+       // The Maple commands this time around become (again using sRGB as an example):
+       //
+       // > alpha := 1.055;
+       // > beta := 0.0031308;
+       // > gamma_ := 1.0/2.4;
+       // > w := x -> piecewise(x > 0.999, 10, 1);
+       // > numapprox[minimax](alpha * (x^2)^gamma_ - (alpha - 1), x=sqrt(beta)..1, [4,0], w(x^2), 'maxerror');
+       //
+       // Since the error here is possible to interpret on a uniform scale,
+       // we also show it as a value relative to a 8-, 10- or 12-bit pixel level,
+       // as appropriate.
+
+       if (destination_curve == GAMMA_sRGB) {
+               // From the Wikipedia article on sRGB; ɑ (called a+1 there) = 1.055,
+               // β = 0.0031308, ɣ = 1/2.4.
+               // maxerror      = 0.000785 = 0.200 * 255
+               // error at 1.0  = 0.000078 = 0.020 * 255
+               set_uniform_float(glsl_program_num, prefix, "linear_scale", 12.92);
+               set_uniform_float(glsl_program_num, prefix, "c0", -0.03679675939);
+               set_uniform_float(glsl_program_num, prefix, "c1", 1.443803073);
+               set_uniform_float(glsl_program_num, prefix, "c2", -0.9239780987);
+               set_uniform_float(glsl_program_num, prefix, "c3", 0.8060491596);
+               set_uniform_float(glsl_program_num, prefix, "c4", -0.2891558568);
+               set_uniform_float(glsl_program_num, prefix, "beta", 0.0031308);
+       }
+       if (destination_curve == GAMMA_REC_709) {  // Also includes Rec. 601, and 10-bit Rec. 2020.
+               // Rec. 2020, page 3; ɑ = 1.099, β = 0.018, ɣ = 0.45.
+               // maxerror      = 0.000131 = 0.033 * 255 = 0.134 * 1023
+               // error at 1.0  = 0.000013 = 0.003 * 255 = 0.013 * 1023
+               set_uniform_float(glsl_program_num, prefix, "linear_scale", 4.5);
+               set_uniform_float(glsl_program_num, prefix, "c0", -0.08541688528);
+               set_uniform_float(glsl_program_num, prefix, "c1", 1.292793370);
+               set_uniform_float(glsl_program_num, prefix, "c2", -0.4070417645);
+               set_uniform_float(glsl_program_num, prefix, "c3", 0.2923891828);
+               set_uniform_float(glsl_program_num, prefix, "c4", -0.09273699351);
+               set_uniform_float(glsl_program_num, prefix, "beta", 0.018);
+       }
+       if (destination_curve == GAMMA_REC_2020_12_BIT) {
+               // Rec. 2020, page 3; ɑ = 1.0993, β = 0.0181, ɣ = 0.45.
+               // maxerror      = 0.000130 = 0.533 * 4095
+               // error at 1.0  = 0.000013 = 0.053 * 4095
+               //
+               // Note that this error is above one half of a pixel level,
+               // which means that a few values will actually be off in the lowest
+               // bit. (Removing the constraint for x=1 will only take this down
+               // from 0.553 to 0.501; adding a fifth order can get it down to
+               // 0.167, although this assumes working in fp64 and not fp32.)
+               set_uniform_float(glsl_program_num, prefix, "linear_scale", 4.5);
+               set_uniform_float(glsl_program_num, prefix, "c0", -0.08569685663);
+               set_uniform_float(glsl_program_num, prefix, "c1", 1.293000900);
+               set_uniform_float(glsl_program_num, prefix, "c2", -0.4067291321);
+               set_uniform_float(glsl_program_num, prefix, "c3", 0.2919741179);
+               set_uniform_float(glsl_program_num, prefix, "c4", -0.09256205770);
+               set_uniform_float(glsl_program_num, prefix, "beta", 0.0181);
+       }
+}
diff --git a/gamma_compression_effect.frag b/gamma_compression_effect.frag

index 27e014e1182bc30d1b6a0abde0a3055c75e28bf1..e1e786cdf91426174011568b1367b913f3ff9d80 100644 (file)
--- a/gamma_compression_effect.frag
+++ b/gamma_compression_effect.frag
@@ -1,11 +1,24 @@
-// Compress to sRGB gamma curve.
+// Compress gamma curve.
+
+uniform float PREFIX(linear_scale);
+uniform float PREFIX(c0), PREFIX(c1), PREFIX(c2), PREFIX(c3), PREFIX(c4);
+uniform float PREFIX(beta);
  
  vec4 FUNCNAME(vec2 tc) {
         vec4 x = INPUT(tc);
  
-       x.r = texture1D(PREFIX(compression_curve_tex), x.r).x;
-       x.g = texture1D(PREFIX(compression_curve_tex), x.g).x;
-       x.b = texture1D(PREFIX(compression_curve_tex), x.b).x;
+       // We could reasonably get values outside (0.0, 1.0), but the formulas below
+       // are not valid outside that range, so clamp before we do anything else.
+       x.rgb = clamp(x.rgb, 0.0, 1.0);
+
+       vec3 a = x.rgb * PREFIX(linear_scale);
+
+       // Fourth-order polynomial approximation to pow(). See the .cpp file for details.
+       vec3 s = sqrt(x.rgb);
+       vec3 b = PREFIX(c0) + (PREFIX(c1) + (PREFIX(c2) + (PREFIX(c3) + PREFIX(c4) * s) * s) * s) * s;
+
+       vec3 f = vec3(greaterThan(x.rgb, vec3(PREFIX(beta))));
+       x = vec4(mix(a, b, f), x.a);
  
         return x;
  }
diff --git a/gamma_compression_effect.h b/gamma_compression_effect.h

index 235850a9d75550d7adb5f144e977cc6ebfc8ebd4..342e02630b480e7a36358e0411f2f041d28c549d 100644 (file)
--- a/gamma_compression_effect.h
+++ b/gamma_compression_effect.h
@@ -14,8 +14,6 @@
  #include "effect.h"
  #include "image_format.h"
  
-#define COMPRESSION_CURVE_SIZE 4096
-
  class GammaCompressionEffect : public Effect {
  private:
         // Should not be instantiated by end users.
@@ -25,6 +23,7 @@ private:
  public:
         virtual std::string effect_type_id() const { return "GammaCompressionEffect"; }
         std::string output_fragment_shader();
+       virtual void set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num);
  
         virtual bool needs_srgb_primaries() const { return false; }
  
@@ -34,7 +33,6 @@ public:
  
  private:
         GammaCurve destination_curve;
-       float compression_curve[COMPRESSION_CURVE_SIZE];
  };
  
  #endif // !defined(_MOVIT_GAMMA_COMPRESSION_EFFECT_H)
diff --git a/gamma_compression_effect_test.cpp b/gamma_compression_effect_test.cpp

index e65acee34e4974b6c8a2a500a15556b5255c982e..465d23948cd623808b20af119839959893fb7800 100644 (file)
--- a/gamma_compression_effect_test.cpp
+++ b/gamma_compression_effect_test.cpp
@@ -2,7 +2,10 @@
  //
  // Pretty much the inverse of the GammaExpansionEffect tests;
  // EffectChainTest tests that they are actually inverses.
+// However, the accuracy tests are somewhat simpler, since we
+// only need to care about absolute errors and not relative.
  
+#include <math.h>
  #include <GL/glew.h>
  #include "gtest/gtest.h"
  #include "image_format.h"
@@ -12,16 +15,18 @@ TEST(GammaCompressionEffectTest, sRGB_KeyValues) {
         float data[] = {
                 0.0f, 1.0f,
                 0.00309f, 0.00317f,   // On either side of the discontinuity.
+               -0.5f, 1.5f,          // To check clamping.
         };
         float expected_data[] = {
                 0.0f, 1.0f,
                 0.040f, 0.041f,
+               0.0f, 1.0f,
         };
         float out_data[4];
-       EffectChainTester tester(data, 2, 2, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+       EffectChainTester tester(data, 2, 3, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
         tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_sRGB);
  
-       expect_equal(expected_data, out_data, 2, 2);
+       expect_equal(expected_data, out_data, 2, 3);
  }
  
  TEST(GammaCompressionEffectTest, sRGB_RampAlwaysIncreases) {
@@ -38,6 +43,33 @@ TEST(GammaCompressionEffectTest, sRGB_RampAlwaysIncreases) {
         }
  }
  
+TEST(GammaCompressionEffectTest, sRGB_Accuracy) {
+       float data[256], expected_data[256], out_data[256];
+
+       for (int i = 0; i < 256; ++i) {
+               double x = i / 255.0;
+
+               expected_data[i] = x;
+
+               // From the Wikipedia article on sRGB.
+               if (x < 0.04045) {
+                       data[i] = x / 12.92;
+               } else {
+                       data[i] = pow((x + 0.055) / 1.055, 2.4);
+               }
+       }
+
+       EffectChainTester tester(data, 256, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA32F);
+       tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_sRGB);
+
+       // Maximum absolute error is 25% of one pixel level. For comparison,
+       // a straightforward ALU solution (using a branch and pow()), used as a
+       // “high anchor” to indicate limitations of float arithmetic etc.,
+       // reaches maximum absolute error of 3.7% of one pixel level
+       // and rms of 3.2e-6.
+       expect_equal(expected_data, out_data, 256, 1, 0.25 / 255.0, 1e-4);
+}
+
  TEST(GammaCompressionEffectTest, Rec709_KeyValues) {
         float data[] = {
                 0.0f, 1.0f,
@@ -68,23 +100,113 @@ TEST(GammaCompressionEffectTest, Rec709_RampAlwaysIncreases) {
         }
  }
  
+TEST(GammaCompressionEffectTest, Rec709_Accuracy) {
+       float data[256], expected_data[256], out_data[256];
+
+       for (int i = 0; i < 256; ++i) {
+               double x = i / 255.0;
+
+               expected_data[i] = x;
+
+               // Rec. 2020, page 3.
+               if (x < 0.018 * 4.5) {
+                       data[i] = x / 4.5;
+               } else {
+                       data[i] = pow((x + 0.099) / 1.099, 1.0 / 0.45);
+               }
+       }
+
+       EffectChainTester tester(data, 256, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA32F);
+       tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_REC_709);
+
+       // Maximum absolute error is 25% of one pixel level. For comparison,
+       // a straightforward ALU solution (using a branch and pow()), used as a
+       // “high anchor” to indicate limitations of float arithmetic etc.,
+       // reaches maximum absolute error of 3.7% of one pixel level
+       // and rms of 3.5e-6.
+       expect_equal(expected_data, out_data, 256, 1, 0.25 / 255.0, 1e-5);
+}
+
+// This test tests the same gamma ramp as Rec709_Accuracy, but with 10-bit
+// input range and somewhat looser error bounds. (One could claim that this is
+// already on the limit of what we can reasonably do with fp16 input, if you
+// look at the local relative error.)
+TEST(GammaCompressionEffectTest, Rec2020_10Bit_Accuracy) {
+       float data[1024], expected_data[1024], out_data[1024];
+
+       for (int i = 0; i < 1024; ++i) {
+               double x = i / 1023.0;
+
+               expected_data[i] = x;
+
+               // Rec. 2020, page 3.
+               if (x < 0.018 * 4.5) {
+                       data[i] = x / 4.5;
+               } else {
+                       data[i] = pow((x + 0.099) / 1.099, 1.0 / 0.45);
+               }
+       }
+
+       EffectChainTester tester(data, 1024, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA32F);
+       tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_REC_2020_10_BIT);
+
+       // Maximum absolute error is 30% of one pixel level. For comparison,
+       // a straightforward ALU solution (using a branch and pow()), used as a
+       // “high anchor” to indicate limitations of float arithmetic etc.,
+       // reaches maximum absolute error of 25.2% of one pixel level
+       // and rms of 1.8e-6, so this is probably mostly related to input precision.
+       expect_equal(expected_data, out_data, 1024, 1, 0.30 / 1023.0, 1e-5);
+}
+
  TEST(GammaCompressionEffectTest, Rec2020_12BitIsVeryCloseToRec709) {
-       float data[256];
-       for (unsigned i = 0; i < 256; ++i) {
-               data[i] = i / 255.0f;
+       float data[4096];
+       for (unsigned i = 0; i < 4096; ++i) {
+               data[i] = i / 4095.0f;
         }
-       float out_data_709[256];
-       float out_data_2020[256];
+       float out_data_709[4096];
+       float out_data_2020[4096];
  
-       EffectChainTester tester(data, 256, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+       EffectChainTester tester(data, 4096, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
         tester.run(out_data_709, GL_RED, COLORSPACE_sRGB, GAMMA_REC_709);
-       EffectChainTester tester2(data, 256, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+       EffectChainTester tester2(data, 4096, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
         tester2.run(out_data_2020, GL_RED, COLORSPACE_sRGB, GAMMA_REC_2020_12_BIT);
  
         double sqdiff = 0.0;
-       for (unsigned i = 0; i < 256; ++i) {
-               EXPECT_NEAR(out_data_709[i], out_data_2020[i], 1e-3);
+       for (unsigned i = 0; i < 4096; ++i) {
+               EXPECT_NEAR(out_data_709[i], out_data_2020[i], 0.001);
                 sqdiff += (out_data_709[i] - out_data_2020[i]) * (out_data_709[i] - out_data_2020[i]);
         }
         EXPECT_GT(sqdiff, 1e-6);
  }
+
+// The fp16 _input_ provided by FlatInput is not enough to distinguish between
+// all of the possible 12-bit input values (every other level translates to the
+// same value). Thus, this test has extremely loose bounds; if we ever decide
+// to start supporting fp32, we should re-run this and tighten them a lot.
+TEST(GammaCompressionEffectTest, Rec2020_12Bit_Inaccuracy) {
+       float data[4096], expected_data[4096], out_data[4096];
+
+       for (int i = 0; i < 4096; ++i) {
+               double x = i / 4095.0;
+
+               expected_data[i] = x;
+
+               // Rec. 2020, page 3.
+               if (x < 0.0181 * 4.5) {
+                       data[i] = x / 4.5;
+               } else {
+                       data[i] = pow((x + 0.0993) / 1.0993, 1.0 / 0.45);
+               }
+       }
+
+       EffectChainTester tester(data, 4096, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA32F);
+       tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_REC_2020_12_BIT);
+
+       // Maximum absolute error is 120% of one pixel level. For comparison,
+       // a straightforward ALU solution (using a branch and pow()), used as a
+       // “high anchor” to indicate limitations of float arithmetic etc.,
+       // reaches maximum absolute error of 71.1% of one pixel level
+       // and rms of 0.9e-6, so this is probably a combination of input
+       // precision and inaccuracies in the polynomial approximation.
+       expect_equal(expected_data, out_data, 4096, 1, 1.2 / 4095.0, 1e-5);
+}
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Thu, 9 Jan 2014 23:51:52 +0000 (00:51 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Thu, 9 Jan 2014 23:51:52 +0000 (00:51 +0100)
gamma_compression_effect.cpp		patch \| blob \| history
gamma_compression_effect.frag		patch \| blob \| history
gamma_compression_effect.h		patch \| blob \| history
gamma_compression_effect_test.cpp		patch \| blob \| history