From 25162b5457057af3ebcc1649571eeeb923e90098 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Mon, 13 Feb 2017 00:55:14 +0100
Subject: [PATCH] Support 10- and 12-bit Y'CbCr output.

We don't have any input support yet; the constants are put in place,
but we also need some work on supporting (semi-)adequate input formats.
---
 test_util.cpp                    | 30 ++++++++++++++++-
 test_util.h                      |  2 ++
 version.h                        |  2 +-
 ycbcr.cpp                        | 27 +++++++--------
 ycbcr.h                          | 11 ++++---
 ycbcr_conversion_effect.cpp      | 35 +++++++++++++++-----
 ycbcr_conversion_effect_test.cpp | 56 ++++++++++++++++++++++++++++++++
 7 files changed, 135 insertions(+), 28 deletions(-)
diff --git a/test_util.cpp b/test_util.cpp
index 9d12a12..fadb2d7 100644
--- a/test_util.cpp
+++ b/test_util.cpp
@@ -133,6 +133,11 @@ void EffectChainTester::run(unsigned char *out_data, unsigned char *out_data2, u
 	internal_run(out_data, out_data2, out_data3, GL_UNSIGNED_BYTE, format, color_space, gamma_curve, alpha_format);
 }
 
+void EffectChainTester::run_10_10_10_2(uint32_t *out_data, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format)
+{
+	internal_run<uint32_t>(out_data, NULL, NULL, GL_UNSIGNED_INT_2_10_10_10_REV, format, color_space, gamma_curve, alpha_format);
+}
+
 template<class T>
 void EffectChainTester::internal_run(T *out_data, T *out_data2, T *out_data3, GLenum internal_format, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format)
 {
@@ -145,6 +150,8 @@ void EffectChainTester::internal_run(T *out_data, T *out_data2, T *out_data3, GL
 		type = GL_UNSIGNED_BYTE;
 	} else if (framebuffer_format == GL_RGBA16F || framebuffer_format == GL_RGBA32F) {
 		type = GL_FLOAT;
+	} else if (framebuffer_format == GL_RGB10_A2) {
+		type = GL_UNSIGNED_INT_2_10_10_10_REV;
 	} else {
 		// Add more here as needed.
 		assert(false);
@@ -220,7 +227,7 @@ void EffectChainTester::internal_run(T *out_data, T *out_data2, T *out_data3, GL
 			check_error();
 		}
 
-		if (format == GL_RGBA) {
+		if (format == GL_RGBA && sizeof(*ptr) == 1) {
 			vertical_flip(ptr, width * 4, height);
 		} else {
 			vertical_flip(ptr, width, height);
@@ -327,6 +334,27 @@ void expect_equal(const unsigned char *ref, const unsigned char *result, unsigne
 	delete[] result_float;
 }
 
+void expect_equal(const int *ref, const int *result, unsigned width, unsigned height, unsigned largest_difference_limit, float rms_limit)
+{
+	assert(width > 0);
+	assert(height > 0);
+
+	float *ref_float = new float[width * height];
+	float *result_float = new float[width * height];
+
+	for (unsigned y = 0; y < height; ++y) {
+		for (unsigned x = 0; x < width; ++x) {
+			ref_float[y * width + x] = ref[y * width + x];
+			result_float[y * width + x] = result[y * width + x];
+		}
+	}
+
+	expect_equal(ref_float, result_float, width, height, largest_difference_limit, rms_limit);
+
+	delete[] ref_float;
+	delete[] result_float;
+}
+
 void test_accuracy(const float *expected, const float *result, unsigned num_values, double absolute_error_limit, double relative_error_limit, double local_relative_error_limit, double rms_limit)
 {
 	double squared_difference = 0.0;
diff --git a/test_util.h b/test_util.h
index 728f84b..4b7c05b 100644
--- a/test_util.h
+++ b/test_util.h
@@ -27,6 +27,7 @@ public:
 	void run(unsigned char *out_data, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
 	void run(unsigned char *out_data, unsigned char *out_data2, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
 	void run(unsigned char *out_data, unsigned char *out_data2, unsigned char *out_data3, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+	void run_10_10_10_2(uint32_t *out_data, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
 	void add_output(const ImageFormat &format, OutputAlphaFormat alpha_format);
 	void add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format, const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED);
 
@@ -45,6 +46,7 @@ private:
 
 void expect_equal(const float *ref, const float *result, unsigned width, unsigned height, float largest_difference_limit = 1.5 / 255.0, float rms_limit = 0.2 / 255.0);
 void expect_equal(const unsigned char *ref, const unsigned char *result, unsigned width, unsigned height, unsigned largest_difference_limit = 1, float rms_limit = 0.2);
+void expect_equal(const int *ref, const int *result, unsigned width, unsigned height, unsigned largest_difference_limit = 1, float rms_limit = 0.2);
 void test_accuracy(const float *expected, const float *result, unsigned num_values, double absolute_error_limit, double relative_error_limit, double local_relative_error_limit, double rms_limit);
 
 // Convert an sRGB encoded value (0.0 to 1.0, inclusive) to linear light.
diff --git a/version.h b/version.h
index 9a79127..291eb5d 100644
--- a/version.h
+++ b/version.h
@@ -5,6 +5,6 @@
 // changes, even within git versions. There is no specific version
 // documentation outside the regular changelogs, though.
 
-#define MOVIT_VERSION 24
+#define MOVIT_VERSION 25
 
 #endif // !defined(_MOVIT_VERSION_H)
diff --git a/ycbcr.cpp b/ycbcr.cpp
index 5005912..eaa2ee8 100644
--- a/ycbcr.cpp
+++ b/ycbcr.cpp
@@ -89,25 +89,26 @@ void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycb
 		assert(false);
 	}
 
+	const int num_levels = ycbcr_format.num_levels;
 	if (ycbcr_format.full_range) {
-		// TODO: Use num_levels.
-		offset[0] = 0.0 / 255.0;
-		offset[1] = 128.0 / 255.0;
-		offset[2] = 128.0 / 255.0;
+		offset[0] = 0.0 / (num_levels - 1);
+		offset[1] = double(num_levels / 2) / (num_levels - 1);  // E.g. 128/255.
+		offset[2] = double(num_levels / 2) / (num_levels - 1);
 
 		scale[0] = 1.0;
 		scale[1] = 1.0;
 		scale[2] = 1.0;
 	} else {
-		// Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 4.
-		// TODO: Use num_levels.
-		offset[0] = 16.0 / 255.0;
-		offset[1] = 128.0 / 255.0;
-		offset[2] = 128.0 / 255.0;
-
-		scale[0] = 255.0 / 219.0;
-		scale[1] = 255.0 / 224.0;
-		scale[2] = 255.0 / 224.0;
+		// Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 5.
+		// Rec. 2020 contains the most generic formulas, which we use here.
+		const double s = num_levels / 256.0;  // 2^(n-8) in Rec. 2020 parlance.
+		offset[0] = (s * 16.0) / (num_levels - 1);
+		offset[1] = (s * 128.0) / (num_levels - 1);
+		offset[2] = (s * 128.0) / (num_levels - 1);
+
+		scale[0] = double(num_levels - 1) / (s * 219.0);
+		scale[1] = double(num_levels - 1) / (s * 224.0);
+		scale[2] = double(num_levels - 1) / (s * 224.0);
 	}
 
 	// Matrix to convert RGB to YCbCr. See e.g. Rec. 601.
diff --git a/ycbcr.h b/ycbcr.h
index 4eb9e73..e4ca43f 100644
--- a/ycbcr.h
+++ b/ycbcr.h
@@ -29,9 +29,10 @@
 // range, 10-bit goes out of range (white gets to 942), while if you select
 // 10-bit range, 8-bit gets only to 234, making true white impossible.
 //
-// We currently support the 8-bit ranges only, since all of our Y'CbCr
-// handling effects happen to support only 8-bit at the moment. We will need
-// to fix this eventually, though, with an added field to YCbCrFormat.
+// Thus, you will need to specify the actual precision of the Y'CbCr source
+// (or destination); the num_levels field is the right place. Most people
+// will want to simply set this to 256, as 8-bit Y'CbCr is the most common,
+// but the right value will naturally depend on your input.
 
 #include "image_format.h"
 
@@ -48,8 +49,8 @@ struct YCbCrFormat {
 	// JPEG uses the Rec. 601 luma coefficients, but full range.
 	bool full_range;
 
-	// Currently unused, but should be set to 256 for future expansion,
-	// indicating 8-bit interpretation (see file-level comment).
+	// Set to 2^n for n-bit Y'CbCr (e.g. 256 for 8-bit Y'CbCr).
+	// See file-level comment.
 	int num_levels;
 
 	// Sampling factors for chroma components. For no subsampling (4:4:4),
diff --git a/ycbcr_conversion_effect.cpp b/ycbcr_conversion_effect.cpp
index 8d11acf..fd2cd07 100644
--- a/ycbcr_conversion_effect.cpp
+++ b/ycbcr_conversion_effect.cpp
@@ -47,14 +47,33 @@ void YCbCrConversionEffect::set_gl_state(GLuint glsl_program_num, const string &
 	} else {
 		uniform_clamp_range = true;
 
-		// These limits come from BT.601 page 8, or BT.701, page 5.
-		// TODO: Use num_levels. Currently we support 8-bit levels only.
-		uniform_ycbcr_min[0] = 16.0 / 255.0;
-		uniform_ycbcr_min[1] = 16.0 / 255.0;
-		uniform_ycbcr_min[2] = 16.0 / 255.0;
-		uniform_ycbcr_max[0] = 235.0 / 255.0;
-		uniform_ycbcr_max[1] = 240.0 / 255.0;
-		uniform_ycbcr_max[2] = 240.0 / 255.0;
+		if (ycbcr_format.num_levels == 256) {  // 8-bit.
+			// These limits come from BT.601 page 8, or BT.709, page 5.
+			uniform_ycbcr_min[0] = 16.0 / 255.0;
+			uniform_ycbcr_min[1] = 16.0 / 255.0;
+			uniform_ycbcr_min[2] = 16.0 / 255.0;
+			uniform_ycbcr_max[0] = 235.0 / 255.0;
+			uniform_ycbcr_max[1] = 240.0 / 255.0;
+			uniform_ycbcr_max[2] = 240.0 / 255.0;
+		} else if (ycbcr_format.num_levels == 1024) {  // 10-bit.
+			// BT.709, page 5, or BT.2020, page 6.
+			uniform_ycbcr_min[0] = 64.0 / 1023.0;
+			uniform_ycbcr_min[1] = 64.0 / 1023.0;
+			uniform_ycbcr_min[2] = 64.0 / 1023.0;
+			uniform_ycbcr_max[0] = 940.0 / 1023.0;
+			uniform_ycbcr_max[1] = 960.0 / 1023.0;
+			uniform_ycbcr_max[2] = 960.0 / 1023.0;
+		} else if (ycbcr_format.num_levels == 4096) {  // 12-bit.
+			// BT.2020, page 6.
+			uniform_ycbcr_min[0] = 256.0 / 4095.0;
+			uniform_ycbcr_min[1] = 256.0 / 4095.0;
+			uniform_ycbcr_min[2] = 256.0 / 4095.0;
+			uniform_ycbcr_max[0] = 3760.0 / 4095.0;
+			uniform_ycbcr_max[1] = 3840.0 / 4095.0;
+			uniform_ycbcr_max[2] = 3840.0 / 4095.0;
+		} else {
+			assert(false);
+		}
 	}
 }
 
diff --git a/ycbcr_conversion_effect_test.cpp b/ycbcr_conversion_effect_test.cpp
index c876267..e532ad3 100644
--- a/ycbcr_conversion_effect_test.cpp
+++ b/ycbcr_conversion_effect_test.cpp
@@ -437,4 +437,60 @@ TEST(YCbCrConversionEffectTest, ChangeOutputFormat) {
 	expect_equal(cr, out_cr, width, height);
 }
 
+TEST(YCbCrConversionEffectTest, TenBitOutput) {
+	const int width = 1;
+	const int height = 5;
+
+	// Pure-color test inputs.
+	float data[width * height * 4] = {
+		0.0f, 0.0f, 0.0f, 1.0f,
+		1.0f, 1.0f, 1.0f, 1.0f,
+		1.0f, 0.0f, 0.0f, 1.0f,
+		0.0f, 1.0f, 0.0f, 1.0f,
+		0.0f, 0.0f, 1.0f, 1.0f,
+	};
+	uint32_t out_data[width * height];
+	int expanded_out_data[width * height * 4];
+	int expected_data[width * height * 4] = {
+		// Expected results, calculated using formulas 3.2, 3.3 and 3.4
+		// from Rec. 709. (Except the first two, which are obvious
+		// given the 64â940 range of luminance.)
+		 64, 512, 512, 3,
+		940, 512, 512, 3,
+		250, 409, 960, 3,
+		691, 167, 105, 3,
+		127, 960, 471, 3,
+	};
+
+	EffectChainTester tester(NULL, width, height, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGB10_A2);
+	tester.add_input(data, FORMAT_RGBA_POSTMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+	ImageFormat format;
+	format.color_space = COLORSPACE_sRGB;
+	format.gamma_curve = GAMMA_sRGB;
+
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_709;
+	ycbcr_format.full_range = false;
+	ycbcr_format.num_levels = 1024;
+	ycbcr_format.chroma_subsampling_x = 1;
+	ycbcr_format.chroma_subsampling_y = 1;
+	ycbcr_format.cb_x_position = 0.5f;
+	ycbcr_format.cb_y_position = 0.5f;
+	ycbcr_format.cr_x_position = 0.5f;
+	ycbcr_format.cr_y_position = 0.5f;
+
+	tester.add_ycbcr_output(format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_format);
+	tester.run_10_10_10_2(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+	// Unpack 10:10:10:2 to 32:32:32:32.
+	for (unsigned i = 0; i < width * height; ++i) {
+		expanded_out_data[i * 4 + 0] = out_data[i] & 0x3ff;
+		expanded_out_data[i * 4 + 1] = (out_data[i] >> 10) & 0x3ff;
+		expanded_out_data[i * 4 + 2] = (out_data[i] >> 20) & 0x3ff;
+		expanded_out_data[i * 4 + 3] = (out_data[i] >> 30);
+	}
+	expect_equal(expected_data, expanded_out_data, 4 * width, height);
+}
+
 }  // namespace movit
-- 
2.39.2