From f44c81569a268efea44f1f6df03a000711b18ffc Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 13 Mar 2017 20:12:50 +0100 Subject: [PATCH] Support 10-/12-bit Y'CbCr output packed in 16-bit. This mirrors our existing input support, and makes planar and semiplanar output possible for 10-/12-bit Y'CbCr. ABI break. API stays the same. --- effect_chain.cpp | 7 +++-- effect_chain.h | 11 ++++++-- test_util.cpp | 34 ++++++++++++++++++++--- test_util.h | 4 ++- version.h | 2 +- ycbcr.cpp | 20 +++++++++++++- ycbcr.h | 9 ++++++- ycbcr_conversion_effect.cpp | 10 ++++--- ycbcr_conversion_effect.h | 3 ++- ycbcr_conversion_effect_test.cpp | 46 ++++++++++++++++++++++++++++++++ ycbcr_input.cpp | 15 +---------- 11 files changed, 132 insertions(+), 29 deletions(-) diff --git a/effect_chain.cpp b/effect_chain.cpp index 90fadb7..efd4abc 100644 --- a/effect_chain.cpp +++ b/effect_chain.cpp @@ -97,7 +97,8 @@ void EffectChain::add_output(const ImageFormat &format, OutputAlphaFormat alpha_ } void EffectChain::add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format, - const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting) + const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting, + GLenum output_type) { assert(!finalized); assert(num_output_color_ycbcr < 2); @@ -111,8 +112,10 @@ void EffectChain::add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat assert(output_ycbcr_format.num_levels == ycbcr_format.num_levels); assert(output_ycbcr_format.chroma_subsampling_x == 1); assert(output_ycbcr_format.chroma_subsampling_y == 1); + assert(output_ycbcr_type == output_type); } else { output_ycbcr_format = ycbcr_format; + output_ycbcr_type = output_type; } output_ycbcr_splitting[num_output_color_ycbcr++] = output_splitting; @@ -1644,7 +1647,7 @@ void EffectChain::add_ycbcr_conversion_if_needed() return; } Node *output = find_output_node(); - ycbcr_conversion_effect_node = add_node(new YCbCrConversionEffect(output_ycbcr_format)); + ycbcr_conversion_effect_node = add_node(new YCbCrConversionEffect(output_ycbcr_format, output_ycbcr_type)); connect_nodes(output, ycbcr_conversion_effect_node); } diff --git a/effect_chain.h b/effect_chain.h index ffa9389..b753ad4 100644 --- a/effect_chain.h +++ b/effect_chain.h @@ -264,7 +264,7 @@ public: void add_output(const ImageFormat &format, OutputAlphaFormat alpha_format); // Adds an YCbCr output. Note that you can only have at most two Y'CbCr - // outputs, and they must have the same . + // outputs, and they must have the same and . // (This limitation may be lifted in the future, to allow e.g. simultaneous // 8- and 10-bit output. Currently, multiple Y'CbCr outputs are only // useful in some very limited circumstances, like if one texture goes @@ -272,13 +272,19 @@ public: // // Only 4:4:4 output is supported due to fragment shader limitations, // so chroma_subsampling_x and chroma_subsampling_y must both be 1. + // should match the data type of the FBO you are rendering to, + // so that if you use 16-bit output (GL_UNSIGNED_SHORT), you will get + // 8-, 10- or 12-bit output correctly as determined by . + // Using e.g. ycbcr_format.num_levels == 1024 with GL_UNSIGNED_BYTE is + // nonsensical and invokes undefined behavior. // // If you have both RGBA and Y'CbCr output(s), the RGBA output will come // in the last draw buffer. Also, and must be // identical between the two. void add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format, const YCbCrFormat &ycbcr_format, - YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED); + YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED, + GLenum output_type = GL_UNSIGNED_BYTE); // Change Y'CbCr output format. (This can be done also after finalize()). // Note that you are not allowed to change subsampling parameters; @@ -494,6 +500,7 @@ private: bool output_color_rgba; int num_output_color_ycbcr; // Max 2. YCbCrFormat output_ycbcr_format; // If num_output_color_ycbcr is > 0. + GLenum output_ycbcr_type; // If num_output_color_ycbcr is > 0. YCbCrOutputSplitting output_ycbcr_splitting[2]; // If num_output_color_ycbcr is > N. std::vector nodes; diff --git a/test_util.cpp b/test_util.cpp index ae95847..4543ee3 100644 --- a/test_util.cpp +++ b/test_util.cpp @@ -143,6 +143,11 @@ void EffectChainTester::run(unsigned char *out_data, unsigned char *out_data2, u internal_run(out_data, out_data2, out_data3, out_data4, GL_UNSIGNED_BYTE, format, color_space, gamma_curve, alpha_format); } +void EffectChainTester::run(uint16_t *out_data, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format) +{ + internal_run(out_data, NULL, NULL, NULL, GL_UNSIGNED_SHORT, format, color_space, gamma_curve, alpha_format); +} + void EffectChainTester::run_10_10_10_2(uint32_t *out_data, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format) { internal_run(out_data, NULL, NULL, NULL, GL_UNSIGNED_INT_2_10_10_10_REV, format, color_space, gamma_curve, alpha_format); @@ -158,6 +163,8 @@ void EffectChainTester::internal_run(T *out_data, T *out_data2, T *out_data3, T GLuint type; if (framebuffer_format == GL_RGBA8) { type = GL_UNSIGNED_BYTE; + } else if (framebuffer_format == GL_RGBA16) { + type = GL_UNSIGNED_SHORT; } else if (framebuffer_format == GL_RGBA16F || framebuffer_format == GL_RGBA32F) { type = GL_FLOAT; } else if (framebuffer_format == GL_RGB10_A2) { @@ -239,7 +246,7 @@ void EffectChainTester::internal_run(T *out_data, T *out_data2, T *out_data3, T check_error(); } - if (format == GL_RGBA && (type == GL_UNSIGNED_BYTE || type == GL_FLOAT)) { + if (format == GL_RGBA && (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_SHORT || type == GL_FLOAT)) { vertical_flip(ptr, width * 4, height); } else { vertical_flip(ptr, width, height); @@ -258,9 +265,9 @@ void EffectChainTester::add_output(const ImageFormat &format, OutputAlphaFormat output_added = true; } -void EffectChainTester::add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format, const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting) +void EffectChainTester::add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format, const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting, GLenum type) { - chain.add_ycbcr_output(format, alpha_format, ycbcr_format, output_splitting); + chain.add_ycbcr_output(format, alpha_format, ycbcr_format, output_splitting, type); output_added = true; } @@ -346,6 +353,27 @@ void expect_equal(const unsigned char *ref, const unsigned char *result, unsigne delete[] result_float; } +void expect_equal(const uint16_t *ref, const uint16_t *result, unsigned width, unsigned height, unsigned largest_difference_limit, float rms_limit) +{ + assert(width > 0); + assert(height > 0); + + float *ref_float = new float[width * height]; + float *result_float = new float[width * height]; + + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x) { + ref_float[y * width + x] = ref[y * width + x]; + result_float[y * width + x] = result[y * width + x]; + } + } + + expect_equal(ref_float, result_float, width, height, largest_difference_limit, rms_limit); + + delete[] ref_float; + delete[] result_float; +} + void expect_equal(const int *ref, const int *result, unsigned width, unsigned height, unsigned largest_difference_limit, float rms_limit) { assert(width > 0); diff --git a/test_util.h b/test_util.h index e0195be..2f76dea 100644 --- a/test_util.h +++ b/test_util.h @@ -29,9 +29,10 @@ public: void run(unsigned char *out_data, unsigned char *out_data2, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED); void run(unsigned char *out_data, unsigned char *out_data2, unsigned char *out_data3, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED); void run(unsigned char *out_data, unsigned char *out_data2, unsigned char *out_data3, unsigned char *out_data4, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED); + void run(uint16_t *out_data, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED); void run_10_10_10_2(uint32_t *out_data, GLenum format, Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format = OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED); void add_output(const ImageFormat &format, OutputAlphaFormat alpha_format); - void add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format, const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED); + void add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format, const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED, GLenum output_type = GL_UNSIGNED_BYTE); private: void finalize_chain(Colorspace color_space, GammaCurve gamma_curve, OutputAlphaFormat alpha_format); @@ -48,6 +49,7 @@ private: void expect_equal(const float *ref, const float *result, unsigned width, unsigned height, float largest_difference_limit = 1.5 / 255.0, float rms_limit = 0.2 / 255.0); void expect_equal(const unsigned char *ref, const unsigned char *result, unsigned width, unsigned height, unsigned largest_difference_limit = 1, float rms_limit = 0.2); +void expect_equal(const uint16_t *ref, const uint16_t *result, unsigned width, unsigned height, unsigned largest_difference_limit = 1, float rms_limit = 0.2); void expect_equal(const int *ref, const int *result, unsigned width, unsigned height, unsigned largest_difference_limit = 1, float rms_limit = 0.2); void test_accuracy(const float *expected, const float *result, unsigned num_values, double absolute_error_limit, double relative_error_limit, double local_relative_error_limit, double rms_limit); diff --git a/version.h b/version.h index 165efc5..cc87124 100644 --- a/version.h +++ b/version.h @@ -5,6 +5,6 @@ // changes, even within git versions. There is no specific version // documentation outside the regular changelogs, though. -#define MOVIT_VERSION 27 +#define MOVIT_VERSION 28 #endif // !defined(_MOVIT_VERSION_H) diff --git a/ycbcr.cpp b/ycbcr.cpp index 8ae8d34..8c4f780 100644 --- a/ycbcr.cpp +++ b/ycbcr.cpp @@ -59,7 +59,7 @@ float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned res // Given , compute the values needed to turn Y'CbCr into R'G'B'; // first subtract the returned offset, then left-multiply the returned matrix // (the scaling is already folded into it). -void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb) +void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb, GLenum type, double *scale_factor) { double coeff[3], scale[3]; @@ -136,6 +136,24 @@ void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycb // Fold in the scaling. *ycbcr_to_rgb *= Map(scale).asDiagonal(); + + if (type == GL_UNSIGNED_SHORT) { + // For 10-bit or 12-bit packed into 16-bit, we need to scale the values + // so that the max value goes from 1023 (or 4095) to 65535. We do this + // by folding the scaling into the conversion matrix, so it comes essentially + // for free. However, the offset is before the scaling (and thus assumes + // correctly scaled values), so we need to adjust that the other way. + double scale = 65535.0 / (ycbcr_format.num_levels - 1); + offset[0] /= scale; + offset[1] /= scale; + offset[2] /= scale; + *ycbcr_to_rgb *= scale; + if (scale_factor != NULL) { + *scale_factor = scale; + } + } else if (scale_factor != NULL) { + *scale_factor = 1.0; + } } } // namespace movit diff --git a/ycbcr.h b/ycbcr.h index 6f9f4c9..1c55c39 100644 --- a/ycbcr.h +++ b/ycbcr.h @@ -39,6 +39,7 @@ #include "image_format.h" +#include #include namespace movit { @@ -75,7 +76,13 @@ float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned res // Given , compute the values needed to turn Y'CbCr into R'G'B'; // first subtract the returned offset, then left-multiply the returned matrix // (the scaling is already folded into it). -void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float *offset, Eigen::Matrix3d *ycbcr_to_rgb); +// +// is the data type you're rendering from; normally, it would should match +// , but for the special case of 10- and 12-bit Y'CbCr, +// we support storing it in 16-bit formats, which incurs extra scaling factors. +// You can get that scaling factor in if you want. +void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float *offset, Eigen::Matrix3d *ycbcr_to_rgb, + GLenum type = GL_UNSIGNED_BYTE, double *scale_factor = NULL); } // namespace movit diff --git a/ycbcr_conversion_effect.cpp b/ycbcr_conversion_effect.cpp index 6d3e909..64d23cd 100644 --- a/ycbcr_conversion_effect.cpp +++ b/ycbcr_conversion_effect.cpp @@ -15,8 +15,8 @@ using namespace Eigen; namespace movit { -YCbCrConversionEffect::YCbCrConversionEffect(const YCbCrFormat &ycbcr_format) - : ycbcr_format(ycbcr_format) +YCbCrConversionEffect::YCbCrConversionEffect(const YCbCrFormat &ycbcr_format, GLenum type) + : ycbcr_format(ycbcr_format), type(type) { register_uniform_mat3("ycbcr_matrix", &uniform_ycbcr_matrix); register_uniform_vec3("offset", uniform_offset); @@ -37,7 +37,8 @@ void YCbCrConversionEffect::set_gl_state(GLuint glsl_program_num, const string & Effect::set_gl_state(glsl_program_num, prefix, sampler_num); Matrix3d ycbcr_to_rgb; - compute_ycbcr_matrix(ycbcr_format, uniform_offset, &ycbcr_to_rgb); + double scale_factor; + compute_ycbcr_matrix(ycbcr_format, uniform_offset, &ycbcr_to_rgb, type, &scale_factor); uniform_ycbcr_matrix = ycbcr_to_rgb.inverse(); @@ -74,6 +75,9 @@ void YCbCrConversionEffect::set_gl_state(GLuint glsl_program_num, const string & } else { assert(false); } + uniform_ycbcr_min[0] /= scale_factor; + uniform_ycbcr_min[1] /= scale_factor; + uniform_ycbcr_min[2] /= scale_factor; } } diff --git a/ycbcr_conversion_effect.h b/ycbcr_conversion_effect.h index ab31fd6..f57e5fa 100644 --- a/ycbcr_conversion_effect.h +++ b/ycbcr_conversion_effect.h @@ -18,7 +18,7 @@ class YCbCrConversionEffect : public Effect { private: // Should not be instantiated by end users; // call EffectChain::add_ycbcr_output() instead. - YCbCrConversionEffect(const YCbCrFormat &ycbcr_format); + YCbCrConversionEffect(const YCbCrFormat &ycbcr_format, GLenum type); friend class EffectChain; public: @@ -36,6 +36,7 @@ public: private: YCbCrFormat ycbcr_format; + GLenum type; Eigen::Matrix3d uniform_ycbcr_matrix; float uniform_offset[3]; diff --git a/ycbcr_conversion_effect_test.cpp b/ycbcr_conversion_effect_test.cpp index a35b9f1..27a10e3 100644 --- a/ycbcr_conversion_effect_test.cpp +++ b/ycbcr_conversion_effect_test.cpp @@ -576,4 +576,50 @@ TEST(YCbCrConversionEffectTest, TenBitOutput) { expect_equal(expected_data, expanded_out_data, 4 * width, height); } +TEST(YCbCrConversionEffectTest, TenBitOutputInSixteen) { + const int width = 1; + const int height = 5; + + // Same test inputs and outputs as TenBitOutput, except that alpha + // is 16 bits instead of two. + float data[width * height * 4] = { + 0.0f, 0.0f, 0.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 0.0f, 0.0f, 1.0f, + 0.0f, 1.0f, 0.0f, 1.0f, + 0.0f, 0.0f, 1.0f, 1.0f, + }; + uint16_t out_data[width * height * 4]; + uint16_t expected_data[width * height * 4] = { + 64, 512, 512, 65535, + 940, 512, 512, 65535, + 250, 409, 960, 65535, + 691, 167, 105, 65535, + 127, 960, 471, 65535, + }; + + EffectChainTester tester(NULL, width, height, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA16); + tester.add_input(data, FORMAT_RGBA_POSTMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_sRGB); + + ImageFormat format; + format.color_space = COLORSPACE_sRGB; + format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.luma_coefficients = YCBCR_REC_709; + ycbcr_format.full_range = false; + ycbcr_format.num_levels = 1024; + ycbcr_format.chroma_subsampling_x = 1; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.5f; + ycbcr_format.cb_y_position = 0.5f; + ycbcr_format.cr_x_position = 0.5f; + ycbcr_format.cr_y_position = 0.5f; + + tester.add_ycbcr_output(format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_format, YCBCR_OUTPUT_INTERLEAVED, GL_UNSIGNED_SHORT); + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB); + + expect_equal(expected_data, out_data, 4 * width, height); +} + } // namespace movit diff --git a/ycbcr_input.cpp b/ycbcr_input.cpp index 3e7cad9..e725750 100644 --- a/ycbcr_input.cpp +++ b/ycbcr_input.cpp @@ -153,20 +153,7 @@ string YCbCrInput::output_fragment_shader() { float offset[3]; Matrix3d ycbcr_to_rgb; - compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb); - - if (type == GL_UNSIGNED_SHORT) { - // For 10-bit or 12-bit packed into 16-bit, we need to scale the values - // so that the max value goes from 1023 (or 4095) to 65535. We do this - // by folding the scaling into the conversion matrix, so it comes essentially - // for free. However, the offset is before the scaling (and thus assumes - // correctly scaled values), so we need to adjust that the other way. - double scale = 65535.0 / (ycbcr_format.num_levels - 1); - offset[0] /= scale; - offset[1] /= scale; - offset[2] /= scale; - ycbcr_to_rgb *= scale; - } + compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb, type); string frag_shader; -- 2.39.2