From 76d3f4f3c75111cc8c59a08396c5ad60c712c9a5 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 14 Feb 2017 22:26:28 +0100 Subject: [PATCH] Add support for 10- and 12-bit planar Y'CbCr inputs. This is mostly for completeness; at least for 10-bit, 10:10:10:2 should be a faster format. However, it's nice to allow direct subsampled inputs _somehow_. --- resource_pool.cpp | 22 +++++++++++++++ ycbcr_input.cpp | 32 +++++++++++++++++++--- ycbcr_input.h | 20 +++++++++++--- ycbcr_input_test.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 8 deletions(-) diff --git a/resource_pool.cpp b/resource_pool.cpp index b560f15..5b04999 100644 --- a/resource_pool.cpp +++ b/resource_pool.cpp @@ -307,6 +307,7 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs switch (internal_format) { case GL_RGBA32F_ARB: case GL_RGBA16F_ARB: + case GL_RGBA16: case GL_RGBA8: case GL_RGB10_A2: case GL_SRGB8_ALPHA8: @@ -314,6 +315,7 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs break; case GL_RGB32F: case GL_RGB16F: + case GL_RGB16: case GL_R11F_G11F_B10F: case GL_RGB8: case GL_RGB10: @@ -324,11 +326,13 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs break; case GL_RG32F: case GL_RG16F: + case GL_RG16: case GL_RG8: format = GL_RG; break; case GL_R32F: case GL_R16F: + case GL_R16: case GL_R8: format = GL_RED; break; @@ -352,6 +356,12 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs case GL_R16F: type = GL_FLOAT; break; + case GL_RGBA16: + case GL_RGB16: + case GL_RG16: + case GL_R16: + type = GL_UNSIGNED_SHORT; + break; case GL_SRGB8_ALPHA8: case GL_SRGB8: case GL_RGBA8: @@ -618,6 +628,18 @@ size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format) case GL_RGB565: bytes_per_pixel = 2; break; + case GL_RGBA16: + bytes_per_pixel = 8; + break; + case GL_RGB16: + bytes_per_pixel = 6; + break; + case GL_RG16: + bytes_per_pixel = 4; + break; + case GL_R16: + bytes_per_pixel = 2; + break; default: // TODO: Add more here as needed. assert(false); diff --git a/ycbcr_input.cpp b/ycbcr_input.cpp index f8df0c1..f748f1c 100644 --- a/ycbcr_input.cpp +++ b/ycbcr_input.cpp @@ -74,19 +74,30 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const string& prefix, uns if (type == GL_UNSIGNED_INT_2_10_10_10_REV) { format = GL_RGBA; internal_format = GL_RGB10_A2; + } else if (type == GL_UNSIGNED_SHORT) { + format = GL_RGB; + internal_format = GL_RGB16; } else { assert(type == GL_UNSIGNED_BYTE); format = GL_RGB; internal_format = GL_RGB8; } } else if (channel == 1 && ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) { - assert(type == GL_UNSIGNED_BYTE); format = GL_RG; - internal_format = GL_RG8; + if (type == GL_UNSIGNED_SHORT) { + internal_format = GL_RG16; + } else { + assert(type == GL_UNSIGNED_BYTE); + internal_format = GL_RG8; + } } else { - assert(type == GL_UNSIGNED_BYTE); format = GL_RED; - internal_format = GL_R8; + if (type == GL_UNSIGNED_SHORT) { + internal_format = GL_R16; + } else { + assert(type == GL_UNSIGNED_BYTE); + internal_format = GL_R8; + } } // (Re-)upload the texture. @@ -135,6 +146,19 @@ string YCbCrInput::output_fragment_shader() Matrix3d ycbcr_to_rgb; compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb); + if (type == GL_UNSIGNED_SHORT) { + // For 10-bit or 12-bit packed into 16-bit, we need to scale the values + // so that the max value goes from 1023 (or 4095) to 65535. We do this + // by folding the scaling into the conversion matrix, so it comes essentially + // for free. However, the offset is before the scaling (and thus assumes + // correctly scaled values), so we need to adjust that the other way. + double scale = 65535.0 / (ycbcr_format.num_levels - 1); + offset[0] /= scale; + offset[1] /= scale; + offset[2] /= scale; + ycbcr_to_rgb *= scale; + } + string frag_shader; frag_shader = output_glsl_mat3("PREFIX(inv_ycbcr_matrix)", ycbcr_to_rgb); diff --git a/ycbcr_input.h b/ycbcr_input.h index 31ba42b..4e8f194 100644 --- a/ycbcr_input.h +++ b/ycbcr_input.h @@ -9,12 +9,14 @@ // * 8-bit semiplanar Y'CbCr (Y' in one plane, CbCr in another), // possibly subsampled. // * 8-bit interleaved (chunked) Y'CbCr, no subsampling (4:4:4 only). +// * All of the above in 10- and 12-bit versions, where each sample is +// stored in a 16-bit int (so the 6 or 4 top bits are wasted). // * 10-bit interleaved (chunked) Y'CbCr packed into 32-bit words // (10:10:10:2), no subsampling (4:4:4 only). // -// For the former case, it upsamples planes as needed, using the default linear -// upsampling OpenGL gives you. Note that YCbCr422InterleavedInput supports the -// important special case of 8-bit 4:2:2 interleaved. +// For the planar and semiplanar cases, it upsamples planes as needed, using +// the default linear upsampling OpenGL gives you. Note that YCbCr422InterleavedInput +// supports the important special case of 8-bit 4:2:2 interleaved. #include #include @@ -49,7 +51,8 @@ enum YCbCrInputSplitting { class YCbCrInput : public Input { public: - // Type can be GL_UNSIGNED_BYTE for 8-bit, or GL_UNSIGNED_INT_2_10_10_10_REV + // Type can be GL_UNSIGNED_BYTE for 8-bit, GL_UNSIGNED_SHORT for 10- or 12-bit + // (or 8-bit, although that's a bit useless), or GL_UNSIGNED_INT_2_10_10_10_REV // for 10-bit (YCBCR_INPUT_INTERLEAVED only). YCbCrInput(const ImageFormat &image_format, const YCbCrFormat &ycbcr_format, @@ -93,6 +96,15 @@ public: invalidate_pixel_data(); } + void set_pixel_data(unsigned channel, const uint16_t *pixel_data, GLuint pbo = 0) + { + assert(type == GL_UNSIGNED_SHORT); + assert(channel >= 0 && channel < num_channels); + this->pixel_data[channel] = reinterpret_cast(pixel_data); + this->pbos[channel] = pbo; + invalidate_pixel_data(); + } + void set_pixel_data(unsigned channel, const uint32_t *pixel_data, GLuint pbo = 0) { assert(type == GL_UNSIGNED_INT_2_10_10_10_REV); diff --git a/ycbcr_input_test.cpp b/ycbcr_input_test.cpp index 7792b28..1d1e23e 100644 --- a/ycbcr_input_test.cpp +++ b/ycbcr_input_test.cpp @@ -858,4 +858,69 @@ TEST(YCbCrInputTest, TenBitInterleaved) { expect_equal(expected_data, out_data, 4 * width, height, 0.002, 0.0003); } +TEST(YCbCrInputTest, TenBitPlanar) { + const int width = 1; + const int height = 5; + + // The same data as TenBitInterleaved, but split. + uint16_t y[width * height] = { + 64, + 940, + 250, + 691, + 127, + }; + uint16_t cb[width * height] = { + 512, + 512, + 409, + 167, + 960, + }; + uint16_t cr[width * height] = { + 512, + 512, + 960, + 105, + 471, + }; + float expected_data[4 * width * height] = { + 0.0, 0.0, 0.0, 1.0, + 1.0, 1.0, 1.0, 1.0, + 1.0, 0.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + 0.0, 0.0, 1.0, 1.0, + }; + float out_data[4 * width * height]; + + EffectChainTester tester(NULL, width, height); + + ImageFormat format; + format.color_space = COLORSPACE_sRGB; + format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.luma_coefficients = YCBCR_REC_709; + ycbcr_format.full_range = false; + ycbcr_format.num_levels = 1024; // 10-bit. + ycbcr_format.chroma_subsampling_x = 1; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.5f; + ycbcr_format.cb_y_position = 0.5f; + ycbcr_format.cr_x_position = 0.5f; + ycbcr_format.cr_y_position = 0.5f; + + YCbCrInput *input = new YCbCrInput(format, ycbcr_format, width, height, YCBCR_INPUT_PLANAR, GL_UNSIGNED_SHORT); + input->set_pixel_data(0, y); + input->set_pixel_data(1, cb); + input->set_pixel_data(2, cr); + tester.get_chain()->add_input(input); + + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB); + + // We can set much tighter limits on this than 8-bit Y'CbCr; + // even tighter than the default limits. + expect_equal(expected_data, out_data, 4 * width, height, 0.002, 0.0003); +} + } // namespace movit -- 2.39.2