From cdbed3b89528ee2f640613219fa3c77a0ad8fef5 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Tue, 14 Feb 2017 00:18:59 +0100
Subject: [PATCH] Support interleaved (chunky) 4:4:4 in YCbCrInput.

---
 ycbcr_input.cpp      | 19 ++++++++++++-----
 ycbcr_input.frag     | 14 ++++++++-----
 ycbcr_input.h        | 18 +++++++++-------
 ycbcr_input_test.cpp | 49 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/ycbcr_input.cpp b/ycbcr_input.cpp
index b0ca692..4a0ec0e 100644
--- a/ycbcr_input.cpp
+++ b/ycbcr_input.cpp
@@ -38,7 +38,11 @@ YCbCrInput::YCbCrInput(const ImageFormat &image_format,
 
 	register_uniform_sampler2d("tex_y", &uniform_tex_y);
 
-	if (ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) {
+	if (ycbcr_input_splitting == YCBCR_INPUT_INTERLEAVED) {
+		num_channels = 1;
+		assert(ycbcr_format.chroma_subsampling_x == 1);
+		assert(ycbcr_format.chroma_subsampling_y == 1);
+	} else if (ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) {
 		num_channels = 2;
 		register_uniform_sampler2d("tex_cbcr", &uniform_tex_cb);
 	} else {
@@ -64,7 +68,10 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const string& prefix, uns
 
 		if (texture_num[channel] == 0 && (pbos[channel] != 0 || pixel_data[channel] != NULL)) {
 			GLenum format, internal_format;
-			if (channel == 1 && ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) {
+			if (channel == 0 && ycbcr_input_splitting == YCBCR_INPUT_INTERLEAVED) {
+				format = GL_RGB;
+				internal_format = GL_RGB8;
+			} else if (channel == 1 && ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) {
 				format = GL_RG;
 				internal_format = GL_RG8;
 			} else {
@@ -135,13 +142,15 @@ string YCbCrInput::output_fragment_shader()
 		ycbcr_format.cr_y_position, ycbcr_format.chroma_subsampling_y, heights[2]);
 	frag_shader += output_glsl_vec2("PREFIX(cr_offset)", cr_offset_x, cr_offset_y);
 
-	if (ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) {
+	if (ycbcr_input_splitting == YCBCR_INPUT_INTERLEAVED) {
+		frag_shader += "#define Y_CB_CR_SAME_TEXTURE 1\n";
+	} else if (ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) {
 		char buf[256];
-		snprintf(buf, sizeof(buf), "#define CB_CR_SAME_TEXTURE 1\n#define CB_CR_OFFSETS_EQUAL %d\n",
+		snprintf(buf, sizeof(buf), "#define Y_CB_CR_SAME_TEXTURE 0\n#define CB_CR_SAME_TEXTURE 1\n#define CB_CR_OFFSETS_EQUAL %d\n",
 			(fabs(ycbcr_format.cb_x_position - ycbcr_format.cr_x_position) < 1e-6));
 		frag_shader += buf;
 	} else {
-		frag_shader += "#define CB_CR_SAME_TEXTURE 0\n";
+		frag_shader += "#define Y_CB_CR_SAME_TEXTURE 0\n#define CB_CR_SAME_TEXTURE 0\n";
 	}
 
 	frag_shader += read_file("ycbcr_input.frag");
diff --git a/ycbcr_input.frag b/ycbcr_input.frag
index c57c6d1..29f809d 100644
--- a/ycbcr_input.frag
+++ b/ycbcr_input.frag
@@ -11,17 +11,21 @@ vec4 FUNCNAME(vec2 tc) {
 	tc.y = 1.0 - tc.y;
 
 	vec3 ycbcr;
+#if Y_CB_CR_SAME_TEXTURE
+	ycbcr = tex2D(PREFIX(tex_y), tc).xyz;
+#else
 	ycbcr.x = tex2D(PREFIX(tex_y), tc).x;
-#if CB_CR_SAME_TEXTURE
-#if CB_CR_OFFSETS_EQUAL
+  #if CB_CR_SAME_TEXTURE
+    #if CB_CR_OFFSETS_EQUAL
 	ycbcr.yz = tex2D(PREFIX(tex_cbcr), tc + PREFIX(cb_offset)).xy;
-#else
+    #else
 	ycbcr.y = tex2D(PREFIX(tex_cbcr), tc + PREFIX(cb_offset)).x;
 	ycbcr.z = tex2D(PREFIX(tex_cbcr), tc + PREFIX(cr_offset)).x;
-#endif
-#else
+    #endif
+  #else
 	ycbcr.y = tex2D(PREFIX(tex_cb), tc + PREFIX(cb_offset)).x;
 	ycbcr.z = tex2D(PREFIX(tex_cr), tc + PREFIX(cr_offset)).x;
+  #endif
 #endif
 
 	ycbcr -= PREFIX(offset);
diff --git a/ycbcr_input.h b/ycbcr_input.h
index d0e71e2..22208e0 100644
--- a/ycbcr_input.h
+++ b/ycbcr_input.h
@@ -1,9 +1,10 @@
 #ifndef _MOVIT_YCBCR_INPUT_H
 #define _MOVIT_YCBCR_INPUT_H 1
 
-// YCbCrInput is for handling planar 8-bit Y'CbCr (also sometimes, usually rather
-// imprecisely, called âYUVâ), which is typically what you get from a video decoder.
-// It upsamples planes as needed, using the default linear upsampling OpenGL gives you.
+// YCbCrInput is for handling planar or 4:4:4 interleaved 8-bit Y'CbCr
+// (also sometimes, usually rather imprecisely, called âYUVâ), which is typically
+// what you get from a video decoder. It upsamples planes as needed, using the
+// default linear upsampling OpenGL gives you.
 
 #include <epoxy/gl.h>
 #include <assert.h>
@@ -19,10 +20,8 @@ namespace movit {
 
 class ResourcePool;
 
-// Whether the data is fully planar (Y', Cb and Cr in one texture each)
-// or not. Note that this input does currently not support fully interleaved
-// data (Y', Cb and Cr next to each other), as 4:4:4 interleaved Y'CbCr seems
-// to be rare; however, YCbCr422InterleavedInput supports the important special
+// Whether the data is planar (Y', Cb and Cr in one texture each)
+// or not. Note that YCbCr422InterleavedInput supports the important special
 // case of 4:2:2 interleaved.
 enum YCbCrInputSplitting {
 	// The standard, default case; Y', Cb and Cr in one texture each.
@@ -33,6 +32,11 @@ enum YCbCrInputSplitting {
 	// If you specify this mode, the âCrâ pointer texture will be unused
 	// (the âCbâ texture contains both).
 	YCBCR_INPUT_SPLIT_Y_AND_CBCR,
+
+	// Y', Cb and Cr interleaved in the same texture (the âYâ texture;
+	// âCbâ and âCrâ are unused). This means you cannot have any subsampling;
+	// 4:4:4 only.
+	YCBCR_INPUT_INTERLEAVED,
 };
 
 class YCbCrInput : public Input {
diff --git a/ycbcr_input_test.cpp b/ycbcr_input_test.cpp
index c932a53..019cc07 100644
--- a/ycbcr_input_test.cpp
+++ b/ycbcr_input_test.cpp
@@ -69,6 +69,55 @@ TEST(YCbCrInputTest, Simple444) {
 	expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002);
 }
 
+TEST(YCbCrInputTest, Interleaved444) {
+	const int width = 1;
+	const int height = 5;
+
+	// Same data as Simple444, just rearranged.
+	unsigned char data[width * height * 3] = {
+		 16, 128, 128,
+		235, 128, 128,
+		 81,  90, 240,
+		145,  54,  34,
+		 41, 240, 110,
+	};
+	float expected_data[4 * width * height] = {
+		0.0, 0.0, 0.0, 1.0,
+		1.0, 1.0, 1.0, 1.0,
+		1.0, 0.0, 0.0, 1.0,
+		0.0, 1.0, 0.0, 1.0,
+		0.0, 0.0, 1.0, 1.0,
+	};
+	float out_data[4 * width * height];
+
+	EffectChainTester tester(NULL, width, height);
+
+	ImageFormat format;
+	format.color_space = COLORSPACE_sRGB;
+	format.gamma_curve = GAMMA_sRGB;
+
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_601;
+	ycbcr_format.full_range = false;
+	ycbcr_format.num_levels = 256;
+	ycbcr_format.chroma_subsampling_x = 1;
+	ycbcr_format.chroma_subsampling_y = 1;
+	ycbcr_format.cb_x_position = 0.5f;
+	ycbcr_format.cb_y_position = 0.5f;
+	ycbcr_format.cr_x_position = 0.5f;
+	ycbcr_format.cr_y_position = 0.5f;
+
+	YCbCrInput *input = new YCbCrInput(format, ycbcr_format, width, height, YCBCR_INPUT_INTERLEAVED);
+	input->set_pixel_data(0, data);
+	tester.get_chain()->add_input(input);
+
+	tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+	// Y'CbCr isn't 100% accurate (the input values are rounded),
+	// so we need some leeway.
+	expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002);
+}
+
 TEST(YCbCrInputTest, FullRangeRec601) {
 	const int width = 1;
 	const int height = 5;
-- 
2.39.2