From e8499e3e9892a74c7882af4be14ccdc1e3d92c2b Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Sat, 11 Feb 2017 22:13:02 +0100
Subject: [PATCH] Allow adjusting the output Y'CbCr coefficients after
 finalize.

Primarily useful for Nageru, which may have to switch output modes runtime.
Pretty much the same speed (just a single extra branch on a boolean uniform),
as constants and uniforms are typically the same speed and we're generally
ALU-bound.
---
 effect_chain.cpp                 | 23 +++++++++++++
 effect_chain.h                   |  6 ++++
 version.h                        |  2 +-
 ycbcr_conversion_effect.cpp      | 34 +++++++++++++------
 ycbcr_conversion_effect.frag     | 20 ++++++------
 ycbcr_conversion_effect.h        | 13 ++++++++
 ycbcr_conversion_effect_test.cpp | 56 ++++++++++++++++++++++++++++++++
 ycbcr_input_test.cpp             | 30 +++++++++++++++++
 8 files changed, 163 insertions(+), 21 deletions(-)

diff --git a/effect_chain.cpp b/effect_chain.cpp
index 2ba42e9..4d13d3f 100644
--- a/effect_chain.cpp
+++ b/effect_chain.cpp
@@ -110,6 +110,29 @@ void EffectChain::add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat
 	assert(ycbcr_format.chroma_subsampling_y == 1);
 }
 
+void EffectChain::change_ycbcr_output_format(const YCbCrFormat &ycbcr_format)
+{
+	assert(output_color_ycbcr);
+	assert(output_ycbcr_format.chroma_subsampling_x == ycbcr_format.chroma_subsampling_x);
+	assert(output_ycbcr_format.chroma_subsampling_y == ycbcr_format.chroma_subsampling_y);
+	assert(fabs(output_ycbcr_format.cb_x_position - ycbcr_format.cb_x_position) < 1e-3);
+	assert(fabs(output_ycbcr_format.cb_y_position - ycbcr_format.cb_y_position) < 1e-3);
+	assert(fabs(output_ycbcr_format.cr_x_position - ycbcr_format.cr_x_position) < 1e-3);
+	assert(fabs(output_ycbcr_format.cr_y_position - ycbcr_format.cr_y_position) < 1e-3);
+
+	output_ycbcr_format = ycbcr_format;
+	if (finalized) {
+		// Find the YCbCrConversionEffect node. We don't store it to avoid
+		// an unneeded ABI break (this can be fixed on next break).
+		for (Node *node : nodes) {
+			if (node->effect->effect_type_id() == "YCbCrConversionEffect") {
+				YCbCrConversionEffect *effect = (YCbCrConversionEffect *)(node->effect);
+				effect->change_output_format(ycbcr_format);
+			}
+		}
+	}
+}
+
 Node *EffectChain::add_node(Effect *effect)
 {
 	for (unsigned i = 0; i < nodes.size(); ++i) {
diff --git a/effect_chain.h b/effect_chain.h
index ea4926b..7e0cd9f 100644
--- a/effect_chain.h
+++ b/effect_chain.h
@@ -274,6 +274,12 @@ public:
 	                      const YCbCrFormat &ycbcr_format,
 			      YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED);
 
+	// Change Y'CbCr output format. (This can be done also after finalize()).
+	// Note that you are not allowed to change subsampling parameters;
+	// however, you can change the color space parameters, ie.,
+	// luma_coefficients, full_range and num_levels.
+	void change_ycbcr_output_format(const YCbCrFormat &ycbcr_format);
+
 	// Set number of output bits, to scale the dither.
 	// 8 is the right value for most outputs.
 	// The default, 0, is a special value that means no dither.
diff --git a/version.h b/version.h
index 3c3989a..9a79127 100644
--- a/version.h
+++ b/version.h
@@ -5,6 +5,6 @@
 // changes, even within git versions. There is no specific version
 // documentation outside the regular changelogs, though.
 
-#define MOVIT_VERSION 23
+#define MOVIT_VERSION 24
 
 #endif // !defined(_MOVIT_VERSION_H)
diff --git a/ycbcr_conversion_effect.cpp b/ycbcr_conversion_effect.cpp
index 74ca789..8d11acf 100644
--- a/ycbcr_conversion_effect.cpp
+++ b/ycbcr_conversion_effect.cpp
@@ -18,30 +18,44 @@ namespace movit {
 YCbCrConversionEffect::YCbCrConversionEffect(const YCbCrFormat &ycbcr_format)
 	: ycbcr_format(ycbcr_format)
 {
+	register_uniform_mat3("ycbcr_matrix", &uniform_ycbcr_matrix);
+	register_uniform_vec3("offset", uniform_offset);
+	register_uniform_bool("clamp_range", &uniform_clamp_range);
+
+	// Only used when clamp_range is true.
+	register_uniform_vec3("ycbcr_min", uniform_ycbcr_min);
+	register_uniform_vec3("ycbcr_max", uniform_ycbcr_max);
 }
 
 string YCbCrConversionEffect::output_fragment_shader()
 {
-	float offset[3];
+	return read_file("ycbcr_conversion_effect.frag");
+}
+
+void YCbCrConversionEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
+{
+	Effect::set_gl_state(glsl_program_num, prefix, sampler_num);
+
 	Matrix3d ycbcr_to_rgb;
-	compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb);
+	compute_ycbcr_matrix(ycbcr_format, uniform_offset, &ycbcr_to_rgb);
 
-        string frag_shader = output_glsl_mat3("PREFIX(ycbcr_matrix)", ycbcr_to_rgb.inverse());
-        frag_shader += output_glsl_vec3("PREFIX(offset)", offset[0], offset[1], offset[2]);
+	uniform_ycbcr_matrix = ycbcr_to_rgb.inverse();
 
 	if (ycbcr_format.full_range) {
 		// The card will clamp for us later.
-		frag_shader += "#define YCBCR_CLAMP_RANGE 0\n";
+		uniform_clamp_range = false;
 	} else {
-		frag_shader += "#define YCBCR_CLAMP_RANGE 1\n";
+		uniform_clamp_range = true;
 
 		// These limits come from BT.601 page 8, or BT.701, page 5.
 		// TODO: Use num_levels. Currently we support 8-bit levels only.
-		frag_shader += output_glsl_vec3("PREFIX(ycbcr_min)", 16.0 / 255.0, 16.0 / 255.0, 16.0 / 255.0);
-		frag_shader += output_glsl_vec3("PREFIX(ycbcr_max)", 235.0 / 255.0, 240.0 / 255.0, 240.0 / 255.0);
+		uniform_ycbcr_min[0] = 16.0 / 255.0;
+		uniform_ycbcr_min[1] = 16.0 / 255.0;
+		uniform_ycbcr_min[2] = 16.0 / 255.0;
+		uniform_ycbcr_max[0] = 235.0 / 255.0;
+		uniform_ycbcr_max[1] = 240.0 / 255.0;
+		uniform_ycbcr_max[2] = 240.0 / 255.0;
 	}
-
-	return frag_shader + read_file("ycbcr_conversion_effect.frag");
 }
 
 }  // namespace movit
diff --git a/ycbcr_conversion_effect.frag b/ycbcr_conversion_effect.frag
index 4ef3801..ef289df 100644
--- a/ycbcr_conversion_effect.frag
+++ b/ycbcr_conversion_effect.frag
@@ -17,16 +17,16 @@ vec4 FUNCNAME(vec2 tc) {
 
 	ycbcr_a.rgb = PREFIX(ycbcr_matrix) * rgba.rgb + PREFIX(offset);
 
-#if YCBCR_CLAMP_RANGE
-	// If we use limited-range Y'CbCr, the card's usual 0â255 clamping
-	// won't be enough, so we need to clamp ourselves here.
-	//
-	// We clamp before dither, which is a bit unfortunate, since
-	// it means dither can take us out of the clamped range again.
-	// However, since DitherEffect never adds enough dither to change
-	// the quantized levels, we will be fine in practice.
-	ycbcr_a.rgb = clamp(ycbcr_a.rgb, PREFIX(ycbcr_min), PREFIX(ycbcr_max));
-#endif
+	if (PREFIX(clamp_range)) {
+		// If we use limited-range Y'CbCr, the card's usual 0â255 clamping
+		// won't be enough, so we need to clamp ourselves here.
+		//
+		// We clamp before dither, which is a bit unfortunate, since
+		// it means dither can take us out of the clamped range again.
+		// However, since DitherEffect never adds enough dither to change
+		// the quantized levels, we will be fine in practice.
+		ycbcr_a.rgb = clamp(ycbcr_a.rgb, PREFIX(ycbcr_min), PREFIX(ycbcr_max));
+	}
 
 	ycbcr_a.a = rgba.a;
 
diff --git a/ycbcr_conversion_effect.h b/ycbcr_conversion_effect.h
index 46113bf..ab31fd6 100644
--- a/ycbcr_conversion_effect.h
+++ b/ycbcr_conversion_effect.h
@@ -6,6 +6,7 @@
 // and/or convert to planar somehow else.
 
 #include <epoxy/gl.h>
+#include <Eigen/Core>
 #include <string>
 
 #include "effect.h"
@@ -23,11 +24,23 @@ private:
 public:
 	virtual std::string effect_type_id() const { return "YCbCrConversionEffect"; }
 	std::string output_fragment_shader();
+	void set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num);
 	virtual AlphaHandling alpha_handling() const { return DONT_CARE_ALPHA_TYPE; }
 	virtual bool one_to_one_sampling() const { return true; }
 
+	// Should not be called by end users; call
+	// EffectChain::change_ycbcr_output_format() instead.
+	void change_output_format(const YCbCrFormat &ycbcr_format) {
+		this->ycbcr_format = ycbcr_format;
+	}
+
 private:
 	YCbCrFormat ycbcr_format;
+
+	Eigen::Matrix3d uniform_ycbcr_matrix;
+	float uniform_offset[3];
+	bool uniform_clamp_range;
+	float uniform_ycbcr_min[3], uniform_ycbcr_max[3];
 };
 
 }  // namespace movit
diff --git a/ycbcr_conversion_effect_test.cpp b/ycbcr_conversion_effect_test.cpp
index 10085ab..c876267 100644
--- a/ycbcr_conversion_effect_test.cpp
+++ b/ycbcr_conversion_effect_test.cpp
@@ -381,4 +381,60 @@ TEST(YCbCrConversionEffectTest, OutputChunkyAndRGBA) {
 	expect_equal(expected_rgba, out_rgba, 4 * width, height, 7, 255 * 0.002);
 }
 
+// Very similar to PlanarOutput.
+TEST(YCbCrConversionEffectTest, ChangeOutputFormat) {
+	const int width = 1;
+	const int height = 5;
+
+	// Pure-color test inputs, calculated with the formulas in Rec. 601
+	// section 2.5.4.
+	unsigned char y[width * height] = {
+		16, 235, 81, 145, 41,
+	};
+	unsigned char cb[width * height] = {
+		128, 128, 90, 54, 240,
+	};
+	unsigned char cr[width * height] = {
+		128, 128, 240, 34, 110,
+	};
+
+	unsigned char out_y[width * height], out_cb[width * height], out_cr[width * height];
+
+	EffectChainTester tester(NULL, width, height, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA8);
+
+	ImageFormat format;
+	format.color_space = COLORSPACE_sRGB;
+	format.gamma_curve = GAMMA_sRGB;
+
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_709;  // Deliberately wrong at first.
+	ycbcr_format.full_range = false;
+	ycbcr_format.num_levels = 256;
+	ycbcr_format.chroma_subsampling_x = 1;
+	ycbcr_format.chroma_subsampling_y = 1;
+	ycbcr_format.cb_x_position = 0.5f;
+	ycbcr_format.cb_y_position = 0.5f;
+	ycbcr_format.cr_x_position = 0.5f;
+	ycbcr_format.cr_y_position = 0.5f;
+
+	tester.add_ycbcr_output(format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_format, YCBCR_OUTPUT_PLANAR);
+
+	ycbcr_format.luma_coefficients = YCBCR_REC_601;
+	YCbCrInput *input = new YCbCrInput(format, ycbcr_format, width, height);
+	input->set_pixel_data(0, y);
+	input->set_pixel_data(1, cb);
+	input->set_pixel_data(2, cr);
+	tester.get_chain()->add_input(input);
+
+	tester.run(out_y, out_cb, out_cr, GL_RED, COLORSPACE_sRGB, GAMMA_sRGB);
+
+	// Now change the output format to match what we gave the input, and re-run.
+	tester.get_chain()->change_ycbcr_output_format(ycbcr_format);
+	tester.run(out_y, out_cb, out_cr, GL_RED, COLORSPACE_sRGB, GAMMA_sRGB);
+
+	expect_equal(y, out_y, width, height);
+	expect_equal(cb, out_cb, width, height);
+	expect_equal(cr, out_cr, width, height);
+}
+
 }  // namespace movit
diff --git a/ycbcr_input_test.cpp b/ycbcr_input_test.cpp
index a5032af..c932a53 100644
--- a/ycbcr_input_test.cpp
+++ b/ycbcr_input_test.cpp
@@ -688,6 +688,36 @@ TEST(YCbCrTest, WikipediaJPEGMatrices) {
 	EXPECT_NEAR(128.0, offset[2] * 255.0, 1e-3);
 }
 
+TEST(YCbCrTest, BlackmagicForwardMatrix) {
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_709;
+	ycbcr_format.full_range = false;
+	ycbcr_format.num_levels = 256;
+
+	float offset[3];
+	Eigen::Matrix3d ycbcr_to_rgb;
+	compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb);
+
+	Eigen::Matrix3d rgb_to_ycbcr = ycbcr_to_rgb.inverse();
+
+	// Values from DeckLink SDK documentation.
+	EXPECT_NEAR( 0.183, rgb_to_ycbcr(0,0), 1e-3);
+	EXPECT_NEAR( 0.614, rgb_to_ycbcr(0,1), 1e-3);
+	EXPECT_NEAR( 0.062, rgb_to_ycbcr(0,2), 1e-3);
+
+	EXPECT_NEAR(-0.101, rgb_to_ycbcr(1,0), 1e-3);
+	EXPECT_NEAR(-0.338, rgb_to_ycbcr(1,1), 1e-3);
+	EXPECT_NEAR( 0.439, rgb_to_ycbcr(1,2), 1e-3);
+
+	EXPECT_NEAR( 0.439, rgb_to_ycbcr(2,0), 1e-3);
+	EXPECT_NEAR(-0.399, rgb_to_ycbcr(2,1), 1e-3);
+	EXPECT_NEAR(-0.040, rgb_to_ycbcr(2,2), 1e-3);
+
+	EXPECT_NEAR( 16.0, offset[0] * 255.0, 1e-3);
+	EXPECT_NEAR(128.0, offset[1] * 255.0, 1e-3);
+	EXPECT_NEAR(128.0, offset[2] * 255.0, 1e-3);
+}
+
 TEST(YCbCrInputTest, NoData) {
 	const int width = 1;
 	const int height = 5;
-- 
2.39.2