From 6bec5fc3abc5f57c6cddec2148626cca9a94bedf Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Sun, 4 Oct 2015 02:37:56 +0200
Subject: [PATCH] Allow dual Y'CbCr/RGBA outputs.

The intended use case is to have Y'CbCr for encoding output but keep
RGBA around for easier preview. This causes a few effects to need to
send arrays around; it's a bit ugly to special-case them like this,
but I'm concerned about going generic wrt. how good various shader
compilers are to optimize if we went full multi-model everywhere
(without having tested, though).

ABI break due to changed EffectChain size.
---
 dither_effect.frag               | 26 +++++++++++-
 dither_effect_test.cpp           |  3 ++
 effect_chain.cpp                 | 21 +++++++---
 effect_chain.h                   | 14 ++++---
 footer.130.frag                  | 29 +++++++++----
 footer.300es.frag                | 29 +++++++++----
 version.h                        |  2 +-
 ycbcr_conversion_effect.frag     |  9 ++++
 ycbcr_conversion_effect_test.cpp | 72 ++++++++++++++++++++++++++++++++
 9 files changed, 179 insertions(+), 26 deletions(-)

diff --git a/dither_effect.frag b/dither_effect.frag
index b12ecd7..f271b63 100644
--- a/dither_effect.frag
+++ b/dither_effect.frag
@@ -3,14 +3,36 @@
 // uniform vec2 PREFIX(tc_scale);
 // uniform float PREFIX(round_fac), PREFIX(inv_round_fac);
 
+#if YCBCR_ALSO_OUTPUT_RGBA
+
+// There are two values to dither; otherwise, exactly the same as the algorithm below
+// (so comments are not duplicated).
+
+vec4[2] FUNCNAME(vec2 tc) {
+	vec4[2] result = INPUT(tc);
+	float d = tex2D(PREFIX(dither_tex), tc * PREFIX(tc_scale)).x;
+	result[0].rgb += vec3(d);
+	result[1].rgb += vec3(d);
+
+#if NEED_EXPLICIT_ROUND
+	result[0] = round(result[0] * vec4(PREFIX(round_fac))) * vec4(PREFIX(inv_round_fac));
+	result[1] = round(result[1] * vec4(PREFIX(round_fac))) * vec4(PREFIX(inv_round_fac));
+#endif
+
+	return result;
+}
+
+#else
+
 vec4 FUNCNAME(vec2 tc) {
 	vec4 result = INPUT(tc);
+	float d = tex2D(PREFIX(dither_tex), tc * PREFIX(tc_scale)).x;
 
 	// Don't dither alpha; the case of alpha=255 (1.0) is very important to us,
 	// and if there's any inaccuracy earlier in the chain so that it becomes e.g.
 	// 254.8, it's better to just get it rounded off than to dither and have it
 	// possibly get down to 254. This is not the case for the color components.
-	result.rgb += tex2D(PREFIX(dither_tex), tc * PREFIX(tc_scale)).xxx;
+	result.rgb += vec3(d);
 
 	// NEED_EXPLICIT_ROUND will be #defined to 1 if the GPU has inaccurate
 	// fp32 -> int8 framebuffer rounding, and 0 otherwise.
@@ -20,3 +42,5 @@ vec4 FUNCNAME(vec2 tc) {
 
 	return result;
 }
+
+#endif
diff --git a/dither_effect_test.cpp b/dither_effect_test.cpp
index 1b72ecb..a604fac 100644
--- a/dither_effect_test.cpp
+++ b/dither_effect_test.cpp
@@ -1,4 +1,7 @@
 // Unit tests for DitherEffect.
+//
+// Note: Dithering of multiple outputs is tested (somewhat weakly)
+// in YCbCrConversionEffectTest.
 
 #include <epoxy/gl.h>
 #include <math.h>
diff --git a/effect_chain.cpp b/effect_chain.cpp
index 5c4b522..751a6be 100644
--- a/effect_chain.cpp
+++ b/effect_chain.cpp
@@ -37,6 +37,8 @@ namespace movit {
 EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool)
 	: aspect_nom(aspect_nom),
 	  aspect_denom(aspect_denom),
+	  output_color_rgba(false),
+	  output_color_ycbcr(false),
 	  dither_effect(NULL),
 	  num_dither_bits(0),
 	  output_origin(OUTPUT_ORIGIN_BOTTOM_LEFT),
@@ -77,18 +79,20 @@ Input *EffectChain::add_input(Input *input)
 void EffectChain::add_output(const ImageFormat &format, OutputAlphaFormat alpha_format)
 {
 	assert(!finalized);
+	assert(!output_color_rgba);
 	output_format = format;
 	output_alpha_format = alpha_format;
-	output_color_type = OUTPUT_COLOR_RGB;
+	output_color_rgba = true;
 }
 
 void EffectChain::add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format,
                                    const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting)
 {
 	assert(!finalized);
+	assert(!output_color_ycbcr);
 	output_format = format;
 	output_alpha_format = alpha_format;
-	output_color_type = OUTPUT_COLOR_YCBCR;
+	output_color_ycbcr = true;
 	output_ycbcr_format = ycbcr_format;
 	output_ycbcr_splitting = output_splitting;
 
@@ -365,7 +369,7 @@ void EffectChain::compile_glsl_program(Phase *phase)
 	frag_shader += string("#define INPUT ") + phase->effect_ids[phase->effects.back()] + "\n";
 
 	// If we're the last phase, add the right #defines for Y'CbCr multi-output as needed.
-	if (phase->output_node->outgoing_links.empty() && output_color_type == OUTPUT_COLOR_YCBCR) {
+	if (phase->output_node->outgoing_links.empty() && output_color_ycbcr) {
 		switch (output_ycbcr_splitting) {
 		case YCBCR_OUTPUT_INTERLEAVED:
 			// No #defines set.
@@ -379,6 +383,13 @@ void EffectChain::compile_glsl_program(Phase *phase)
 		default:
 			assert(false);
 		}
+
+		if (output_color_rgba) {
+			// Note: Needs to come in the header, because not only the
+			// output needs to see it (YCbCrConversionEffect and DitherEffect
+			// do, too).
+			frag_shader_header += "#define YCBCR_ALSO_OUTPUT_RGBA 1\n";
+		}
 	}
 	frag_shader.append(read_version_dependent_file("footer", "frag"));
 
@@ -1519,8 +1530,8 @@ void EffectChain::fix_output_gamma()
 // gamma-encoded data.
 void EffectChain::add_ycbcr_conversion_if_needed()
 {
-	assert(output_color_type == OUTPUT_COLOR_RGB || output_color_type == OUTPUT_COLOR_YCBCR);
-	if (output_color_type != OUTPUT_COLOR_YCBCR) {
+	assert(output_color_rgba || output_color_ycbcr);
+	if (!output_color_ycbcr) {
 		return;
 	}
 	Node *output = find_output_node();
diff --git a/effect_chain.h b/effect_chain.h
index 2e89c3b..9fb1d4e 100644
--- a/effect_chain.h
+++ b/effect_chain.h
@@ -222,12 +222,17 @@ public:
 	}
 	Effect *add_effect(Effect *effect, const std::vector<Effect *> &inputs);
 
-	// Adds an RGB output. Note that you can only have one output.
+	// Adds an RGBA output. Note that you can have at most one RGBA output and one
+	// Y'CbCr output (see below for details).
 	void add_output(const ImageFormat &format, OutputAlphaFormat alpha_format);
 
 	// Adds an YCbCr output. Note that you can only have one output.
 	// Currently, only chunked packed output is supported, and only 4:4:4
 	// (so chroma_subsampling_x and chroma_subsampling_y must both be 1).
+	//
+	// If you have both RGBA and Y'CbCr output, the RGBA output will come
+	// in the last draw buffer. Also, <format> and <alpha_format> must be
+	// identical between the two.
 	void add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format,
 	                      const YCbCrFormat &ycbcr_format,
 			      YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED);
@@ -388,10 +393,9 @@ private:
 	ImageFormat output_format;
 	OutputAlphaFormat output_alpha_format;
 
-	enum OutputColorType { OUTPUT_COLOR_RGB, OUTPUT_COLOR_YCBCR };
-	OutputColorType output_color_type;
-	YCbCrFormat output_ycbcr_format;              // If output_color_type == OUTPUT_COLOR_YCBCR.
-	YCbCrOutputSplitting output_ycbcr_splitting;  // If output_color_type == OUTPUT_COLOR_YCBCR.
+	bool output_color_rgba, output_color_ycbcr;
+	YCbCrFormat output_ycbcr_format;              // If output_color_ycbcr is true.
+	YCbCrOutputSplitting output_ycbcr_splitting;  // If output_color_ycbcr is true.
 
 	std::vector<Node *> nodes;
 	std::map<Effect *, Node *> node_map;
diff --git a/footer.130.frag b/footer.130.frag
index 9921b34..04c8e7c 100644
--- a/footer.130.frag
+++ b/footer.130.frag
@@ -9,17 +9,32 @@ out vec4 Chroma;
 out vec4 FragColor;
 #endif
 
+#if YCBCR_ALSO_OUTPUT_RGBA
+out vec4 RGBA;
+#endif
+
 void main()
 {
-	vec4 color = INPUT(tc);
+#if YCBCR_ALSO_OUTPUT_RGBA
+	vec4 color[2] = INPUT(tc);
+	vec4 color0 = color[0];
+	vec4 color1 = color[1];
+#else
+	vec4 color0 = INPUT(tc);
+#endif
+
 #if YCBCR_OUTPUT_PLANAR
-	Y = color.rrra;
-	Cb = color.ggga;
-	Cr = color.bbba;
+	Y = color0.rrra;
+	Cb = color0.ggga;
+	Cr = color0.bbba;
 #elif YCBCR_OUTPUT_SPLIT_Y_AND_CBCR
-	Y = color.rrra;
-	Chroma = color.gbba;
+	Y = color0.rrra;
+	Chroma = color0.gbba;
 #else
-	FragColor = color;
+	FragColor = color0;
+#endif
+
+#if YCBCR_ALSO_OUTPUT_RGBA
+	RGBA = color1;
 #endif
 }
diff --git a/footer.300es.frag b/footer.300es.frag
index 9921b34..04c8e7c 100644
--- a/footer.300es.frag
+++ b/footer.300es.frag
@@ -9,17 +9,32 @@ out vec4 Chroma;
 out vec4 FragColor;
 #endif
 
+#if YCBCR_ALSO_OUTPUT_RGBA
+out vec4 RGBA;
+#endif
+
 void main()
 {
-	vec4 color = INPUT(tc);
+#if YCBCR_ALSO_OUTPUT_RGBA
+	vec4 color[2] = INPUT(tc);
+	vec4 color0 = color[0];
+	vec4 color1 = color[1];
+#else
+	vec4 color0 = INPUT(tc);
+#endif
+
 #if YCBCR_OUTPUT_PLANAR
-	Y = color.rrra;
-	Cb = color.ggga;
-	Cr = color.bbba;
+	Y = color0.rrra;
+	Cb = color0.ggga;
+	Cr = color0.bbba;
 #elif YCBCR_OUTPUT_SPLIT_Y_AND_CBCR
-	Y = color.rrra;
-	Chroma = color.gbba;
+	Y = color0.rrra;
+	Chroma = color0.gbba;
 #else
-	FragColor = color;
+	FragColor = color0;
+#endif
+
+#if YCBCR_ALSO_OUTPUT_RGBA
+	RGBA = color1;
 #endif
 }
diff --git a/version.h b/version.h
index 782bfee..f129e9a 100644
--- a/version.h
+++ b/version.h
@@ -5,6 +5,6 @@
 // changes, even within git versions. There is no specific version
 // documentation outside the regular changelogs, though.
 
-#define MOVIT_VERSION 7
+#define MOVIT_VERSION 8
 
 #endif // !defined(_MOVIT_VERSION_H)
diff --git a/ycbcr_conversion_effect.frag b/ycbcr_conversion_effect.frag
index 6bc29b1..ea5de45 100644
--- a/ycbcr_conversion_effect.frag
+++ b/ycbcr_conversion_effect.frag
@@ -2,7 +2,11 @@ uniform sampler2D PREFIX(tex_y);
 uniform sampler2D PREFIX(tex_cb);
 uniform sampler2D PREFIX(tex_cr);
 
+#if YCBCR_ALSO_OUTPUT_RGBA
+vec4[2] FUNCNAME(vec2 tc) {
+#else
 vec4 FUNCNAME(vec2 tc) {
+#endif
 	vec4 rgba = INPUT(tc);
 	vec4 ycbcr_a;
 
@@ -20,5 +24,10 @@ vec4 FUNCNAME(vec2 tc) {
 #endif
 
 	ycbcr_a.a = rgba.a;
+
+#if YCBCR_ALSO_OUTPUT_RGBA
+	return vec4[2](ycbcr_a, rgba);
+#else
 	return ycbcr_a;
+#endif
 }
diff --git a/ycbcr_conversion_effect_test.cpp b/ycbcr_conversion_effect_test.cpp
index e127b4d..bea04c5 100644
--- a/ycbcr_conversion_effect_test.cpp
+++ b/ycbcr_conversion_effect_test.cpp
@@ -300,4 +300,76 @@ TEST(YCbCrConversionEffectTest, SplitLumaAndChroma) {
 	expect_equal(expected_cbcr, out_cbcr, width * 4, height);
 }
 
+TEST(YCbCrConversionEffectTest, OutputChunkyAndRGBA) {
+	const int width = 1;
+	const int height = 5;
+
+	// Pure-color test inputs, calculated with the formulas in Rec. 601
+	// section 2.5.4.
+	unsigned char y[width * height] = {
+		16, 235, 81, 145, 41,
+	};
+	unsigned char cb[width * height] = {
+		128, 128, 90, 54, 240,
+	};
+	unsigned char cr[width * height] = {
+		128, 128, 240, 34, 110,
+	};
+	unsigned char expected_ycbcr[width * height * 4] = {
+		// The same data, just rearranged.
+		 16, 128, 128, 255,
+		235, 128, 128, 255,
+		 81,  90, 240, 255,
+		145,  54,  34, 255,
+		 41, 240, 110, 255
+	};
+	unsigned char expected_rgba[width * height * 4] = {
+		  0,   0,   0, 255,
+		255, 255, 255, 255,
+		255,   0,   0, 255,
+		  0, 255,   0, 255,
+		  0,   0, 255, 255,
+	};
+
+	unsigned char out_ycbcr[width * height * 4];
+	unsigned char out_rgba[width * height * 4];
+
+	EffectChainTester tester(NULL, width, height, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA8);
+
+	ImageFormat format;
+	format.color_space = COLORSPACE_sRGB;
+	format.gamma_curve = GAMMA_sRGB;
+
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_601;
+	ycbcr_format.full_range = false;
+	ycbcr_format.num_levels = 256;
+	ycbcr_format.chroma_subsampling_x = 1;
+	ycbcr_format.chroma_subsampling_y = 1;
+	ycbcr_format.cb_x_position = 0.5f;
+	ycbcr_format.cb_y_position = 0.5f;
+	ycbcr_format.cr_x_position = 0.5f;
+	ycbcr_format.cr_y_position = 0.5f;
+
+	tester.add_output(format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+	tester.add_ycbcr_output(format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_format);
+
+	YCbCrInput *input = new YCbCrInput(format, ycbcr_format, width, height);
+	input->set_pixel_data(0, y);
+	input->set_pixel_data(1, cb);
+	input->set_pixel_data(2, cr);
+	tester.get_chain()->add_input(input);
+
+	// Note: We don't test that the values actually get dithered,
+	// just that the shader compiles and doesn't mess up badly.
+	tester.get_chain()->set_dither_bits(8);
+
+	tester.run(out_ycbcr, out_rgba, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+	expect_equal(expected_ycbcr, out_ycbcr, width * 4, height);
+
+	// Y'CbCr isn't 100% accurate (the input values are rounded),
+	// so we need some leeway.
+	expect_equal(expected_rgba, out_rgba, 4 * width, height, 7, 255 * 0.002);
+}
+
 }  // namespace movit
-- 
2.39.2