From: Steinar H. Gunderson <sgunderson@bigfoot.com>
Date: Sun, 28 Feb 2016 00:53:55 +0000 (+0100)
Subject: Allow storing values in intermediate framebuffers as sqrt(x).
X-Git-Tag: 1.4.0~9
X-Git-Url: https://git.sesse.net/?p=movit;a=commitdiff_plain;h=2d3b64d51be3e8f38a8c224f558de4ceb222e799

Allow storing values in intermediate framebuffers as sqrt(x).

Together with GL_RGB10_A2, this would seem to be an even better tradeoff for
many chains than GL_SRGB8_ALPHA8 is, as long as you don't need intermediate
alpha. (We verify its accuracy with a unit test.)

This changes the API for specifying intermediate framebuffers, but that API
was never in a release, so it should be fine.

Also document a rather obscure problem where, if you can actually hold on to
non-linear values across a bounce buffer, you don't really want to store them
in sRGB encoding. (The square-root version actually avoids this problem.
I guess we could snoop on the type and do a similar thing if we see it's an
GL_SRGB* encoding, but it seems so obscure that we can ignore it for now.)
---

diff --git a/effect_chain.cpp b/effect_chain.cpp
index 1d9aea8..b0a1477 100644
--- a/effect_chain.cpp
+++ b/effect_chain.cpp
@@ -32,13 +32,14 @@ using namespace std;
 
 namespace movit {
 
-EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool, GLenum intermediate_format)
+EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool)
 	: aspect_nom(aspect_nom),
 	  aspect_denom(aspect_denom),
 	  output_color_rgba(false),
 	  output_color_ycbcr(false),
 	  dither_effect(NULL),
-	  intermediate_format(intermediate_format),
+	  intermediate_format(GL_RGBA16F),
+	  intermediate_transformation(NO_FRAMEBUFFER_TRANSFORMATION),
 	  num_dither_bits(0),
 	  output_origin(OUTPUT_ORIGIN_BOTTOM_LEFT),
 	  finalized(false),
@@ -332,7 +333,14 @@ void EffectChain::compile_glsl_program(Phase *phase)
 	
 		frag_shader += string("uniform sampler2D tex_") + effect_id + ";\n";
 		frag_shader += string("vec4 ") + effect_id + "(vec2 tc) {\n";
-		frag_shader += "\treturn tex2D(tex_" + string(effect_id) + ", tc);\n";
+		frag_shader += "\tvec4 tmp = tex2D(tex_" + string(effect_id) + ", tc);\n";
+
+		if (intermediate_transformation == SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION &&
+		    phase->inputs[i]->output_node->output_gamma_curve == GAMMA_LINEAR) {
+			frag_shader += "\ttmp.rgb *= tmp.rgb;\n";
+		}
+
+		frag_shader += "\treturn tmp;\n";
 		frag_shader += "}\n";
 		frag_shader += "\n";
 
@@ -415,6 +423,15 @@ void EffectChain::compile_glsl_program(Phase *phase)
 			frag_shader_outputs.push_back("RGBA");
 		}
 	}
+
+	// If we're bouncing to a temporary texture, signal transformation if desired.
+	if (!phase->output_node->outgoing_links.empty()) {
+		if (intermediate_transformation == SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION &&
+		    phase->output_node->output_gamma_curve == GAMMA_LINEAR) {
+			frag_shader += "#define SQUARE_ROOT_TRANSFORMATION 1\n";
+		}
+	}
+
 	frag_shader.append(read_file("footer.frag"));
 
 	// Collect uniforms from all effects and output them. Note that this needs
diff --git a/effect_chain.h b/effect_chain.h
index 718d5ff..071f9eb 100644
--- a/effect_chain.h
+++ b/effect_chain.h
@@ -99,6 +99,19 @@ enum OutputOrigin {
 	OUTPUT_ORIGIN_TOP_LEFT,
 };
 
+// Transformation to apply (if any) to pixel data in temporary buffers.
+// See set_intermediate_format() below for more information.
+enum FramebufferTransformation {
+	// The default; just store the value. This is what you usually want.
+	NO_FRAMEBUFFER_TRANSFORMATION,
+
+	// If the values are in linear light, store sqrt(x) to the framebuffer
+	// instead of x itself, of course undoing it with xÂ² on read. Useful as
+	// a rough approximation to the sRGB curve. (If the values are not in
+	// linear light, just store them as-is.)
+	SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION,
+};
+
 // A node in the graph; basically an effect and some associated information.
 class Node {
 public:
@@ -194,7 +207,7 @@ public:
 	// will create its own that is not shared with anything else. Does not take
 	// ownership of the passed-in ResourcePool, but will naturally take ownership
 	// of its own internal one if created.
-	EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool = NULL, GLenum intermediate_format = GL_RGBA16F);
+	EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool = NULL);
 	~EffectChain();
 
 	// User API:
@@ -277,6 +290,43 @@ public:
 		this->output_origin = output_origin;
 	}
 
+	// Set intermediate format for framebuffers used when we need to bounce
+	// to a temporary texture. The default, GL_RGBA16F, is good for most uses;
+	// it is precise, has good range, and is relatively efficient. However,
+	// if you need even more speed and your chain can do with some loss of
+	// accuracy, you can change the format here (before calling finalize).
+	// Calculations between bounce buffers are still in 32-bit floating-point
+	// no matter what you specify.
+	//
+	// Of special interest is GL_SRGB8_ALPHA8, which stores sRGB-encoded RGB
+	// and linear alpha; this is half the memory bandwidth og GL_RGBA16F,
+	// while retaining reasonable precision for typical image data. It will,
+	// however, cause some gamut clipping if your colorspace is far from sRGB,
+	// as it cannot represent values outside [0,1]. NOTE: If you construct
+	// a chain where you end up bouncing pixels in non-linear light this
+	// will not do the wrong thing. However, it's hard to see how this
+	// could happen in a non-contrived chain; few effects ever need texture
+	// bounce or resizing without also combining multiple pixels, which
+	// really needs linear light and thus triggers a conversion before the
+	// bounce.
+	//
+	// If you don't need alpha (or can do with very little of it), GL_RGB10_A2
+	// is even better, as it has two more bits for each color component. There
+	// is no GL_SRGB10, unfortunately, so on its own, it is somewhat worse than
+	// GL_SRGB8, but you can set <transformation> to SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION,
+	// and sqrt(x) will be stored instead of x. This is a rough approximation to
+	// the sRGB curve, and reduces maximum error (in sRGB distance) by almost an
+	// order of magnitude, well below what you can get from 8-bit true sRGB.
+	// (Note that this strategy avoids the problem with bounced non-linear data
+	// above, since the square root is turned off in that case.)
+	void set_intermediate_format(
+		GLenum intermediate_format,
+		FramebufferTransformation transformation = NO_FRAMEBUFFER_TRANSFORMATION)
+	{
+		this->intermediate_format = intermediate_format;
+		this->intermediate_transformation = transformation;
+	}
+
 	void finalize();
 
 	// Measure the GPU time used for each actual phase during rendering.
@@ -439,6 +489,7 @@ private:
 	std::vector<Phase *> phases;
 
 	GLenum intermediate_format;
+	FramebufferTransformation intermediate_transformation;
 	unsigned num_dither_bits;
 	OutputOrigin output_origin;
 	bool finalized;
diff --git a/effect_chain_test.cpp b/effect_chain_test.cpp
index adf832a..5ffb105 100644
--- a/effect_chain_test.cpp
+++ b/effect_chain_test.cpp
@@ -1308,7 +1308,8 @@ TEST(EffectChainTest, sRGBIntermediate) {
 		0.0f, 0.5f, 0.0f, 1.0f,
 	};
 	float out_data[4];
-	EffectChainTester tester(data, 1, 1, FORMAT_RGBA_PREMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA16F_ARB, GL_SRGB8);
+	EffectChainTester tester(data, 1, 1, FORMAT_RGBA_PREMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_LINEAR);
+	tester.get_chain()->set_intermediate_format(GL_SRGB8);
 	tester.get_chain()->add_effect(new IdentityEffect());
 	tester.get_chain()->add_effect(new BouncingIdentityEffect());
 	tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR);
@@ -1319,4 +1320,100 @@ TEST(EffectChainTest, sRGBIntermediate) {
 	    << "Expected sRGB to be able to represent 0.5 approximately (got " << out_data[1] << ")";
 }
 
+// An effect that is like IdentityEffect, but also does not require linear light.
+class PassThroughEffect : public IdentityEffect {
+public:
+	PassThroughEffect() {}
+	virtual string effect_type_id() const { return "PassThroughEffect"; }
+	virtual bool needs_linear_light() const { return false; }
+	AlphaHandling alpha_handling() const { return DONT_CARE_ALPHA_TYPE; }
+};
+
+// Same, just also bouncing.
+class BouncingPassThroughEffect : public BouncingIdentityEffect {
+public:
+	BouncingPassThroughEffect() {}
+	virtual string effect_type_id() const { return "BouncingPassThroughEffect"; }
+	virtual bool needs_linear_light() const { return false; }
+	bool needs_texture_bounce() const { return true; }
+	AlphaHandling alpha_handling() const { return DONT_CARE_ALPHA_TYPE; }
+};
+
+TEST(EffectChainTest, Linear10bitIntermediateAccuracy) {
+	// Note that we do the comparison in sRGB space, which is what we
+	// typically would want; however, we do the sRGB conversion ourself
+	// to avoid compounding errors from shader conversions into the
+	// analysis.
+	const int size = 4096;  // 12-bit.
+	float linear_data[size], data[size], out_data[size];
+
+	for (int i = 0; i < size; ++i) {
+		linear_data[i] = i / double(size - 1);
+		data[i] = srgb_to_linear(linear_data[i]);
+	}
+
+	EffectChainTester tester(data, size, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA32F);
+	tester.get_chain()->set_intermediate_format(GL_RGB10_A2);
+	tester.get_chain()->add_effect(new IdentityEffect());
+	tester.get_chain()->add_effect(new BouncingIdentityEffect());
+	tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+
+	for (int i = 0; i < size; ++i) {
+		out_data[i] = linear_to_srgb(out_data[i]);
+	}
+
+	// This maximum error is pretty bad; about 6.5 levels of a 10-bit sRGB
+	// framebuffer.
+	expect_equal(linear_data, out_data, size, 1, 7e-3, 2e-5);
+}
+
+TEST(EffectChainTest, SquareRoot10bitIntermediateAccuracy) {
+	// Note that we do the comparison in sRGB space, which is what we
+	// typically would want; however, we do the sRGB conversion ourself
+	// to avoid compounding errors from shader conversions into the
+	// analysis.
+	const int size = 4096;  // 12-bit.
+	float linear_data[size], data[size], out_data[size];
+
+	for (int i = 0; i < size; ++i) {
+		linear_data[i] = i / double(size - 1);
+		data[i] = srgb_to_linear(linear_data[i]);
+	}
+
+	EffectChainTester tester(data, size, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA32F);
+	tester.get_chain()->set_intermediate_format(GL_RGB10_A2, SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION);
+	tester.get_chain()->add_effect(new IdentityEffect());
+	tester.get_chain()->add_effect(new BouncingIdentityEffect());
+	tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+
+	for (int i = 0; i < size; ++i) {
+		out_data[i] = linear_to_srgb(out_data[i]);
+	}
+
+	// This maximum error is much better; about 0.7 levels of a 10-bit sRGB
+	// framebuffer (ideal would be 0.5). That is an order of magnitude better
+	// than in the linear test above. The RMS error is much better, too.
+	expect_equal(linear_data, out_data, size, 1, 7e-4, 5e-6);
+}
+
+TEST(EffectChainTest, SquareRootIntermediateIsTurnedOffForNonLinearData) {
+	const int size = 256;  // 8-bit.
+	float data[size], out_data[size];
+
+	for (int i = 0; i < size; ++i) {
+		data[i] = i / double(size - 1);
+	}
+
+	EffectChainTester tester(data, size, 1, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_REC_601, GL_RGBA32F);
+	tester.get_chain()->set_intermediate_format(GL_RGB8, SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION);
+	tester.get_chain()->add_effect(new PassThroughEffect());
+	tester.get_chain()->add_effect(new BouncingPassThroughEffect());
+	tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_REC_601);
+
+	// The data should be passed through nearly exactly, since there is no effect
+	// on the path that requires linear light. (Actually, it _is_ exact modulo
+	// fp32 errors, but the error bounds is strictly _less than_, not zero.)
+	expect_equal(data, out_data, size, 1, 1e-6, 1e-6);
+}
+
 }  // namespace movit
diff --git a/footer.frag b/footer.frag
index 4976136..339ef7b 100644
--- a/footer.frag
+++ b/footer.frag
@@ -13,6 +13,10 @@
 #define YCBCR_ALSO_OUTPUT_RGBA 0
 #endif
 
+#ifndef SQUARE_ROOT_TRANSFORMATION
+#define SQUARE_ROOT_TRANSFORMATION 0
+#endif
+
 #if YCBCR_OUTPUT_PLANAR
 out vec4 Y;
 out vec4 Cb;
@@ -38,6 +42,11 @@ void main()
 	vec4 color0 = INPUT(tc);
 #endif
 
+#if SQUARE_ROOT_TRANSFORMATION
+	// Make sure we don't give negative values to sqrt.
+	color0.rgb = sqrt(max(color0.rgb, 0.0));
+#endif
+
 #if YCBCR_OUTPUT_PLANAR
 	Y = color0.rrra;
 	Cb = color0.ggga;
diff --git a/test_util.cpp b/test_util.cpp
index 05eac31..9d12a12 100644
--- a/test_util.cpp
+++ b/test_util.cpp
@@ -46,9 +46,8 @@ void vertical_flip(T *data, unsigned width, unsigned height)
 
 EffectChainTester::EffectChainTester(const float *data, unsigned width, unsigned height,
                                      MovitPixelFormat pixel_format, Colorspace color_space, GammaCurve gamma_curve,
-                                     GLenum framebuffer_format,
-                                     GLenum intermediate_format)
-	: chain(width, height, get_static_pool(), intermediate_format),
+                                     GLenum framebuffer_format)
+	: chain(width, height, get_static_pool()),
 	  width(width),
 	  height(height),
 	  framebuffer_format(framebuffer_format),
diff --git a/test_util.h b/test_util.h
index 43d0216..728f84b 100644
--- a/test_util.h
+++ b/test_util.h
@@ -15,8 +15,7 @@ public:
 	                  MovitPixelFormat pixel_format = FORMAT_GRAYSCALE,
 	                  Colorspace color_space = COLORSPACE_sRGB,
 	                  GammaCurve gamma_curve = GAMMA_LINEAR,
-	                  GLenum framebuffer_format = GL_RGBA16F_ARB,
-	                  GLenum intermediate_format = GL_RGBA16F_ARB);
+	                  GLenum framebuffer_format = GL_RGBA16F_ARB);
 	~EffectChainTester();
 	
 	EffectChain *get_chain() { return &chain; }
diff --git a/version.h b/version.h
index 599f977..aad8b87 100644
--- a/version.h
+++ b/version.h
@@ -5,6 +5,6 @@
 // changes, even within git versions. There is no specific version
 // documentation outside the regular changelogs, though.
 
-#define MOVIT_VERSION 21
+#define MOVIT_VERSION 22
 
 #endif // !defined(_MOVIT_VERSION_H)