Add support for 10- and 12-bit planar Y'CbCr inputs.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 14 Feb 2017 21:26:28 +0000 (22:26 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 14 Feb 2017 21:26:28 +0000 (22:26 +0100)
This is mostly for completeness; at least for 10-bit, 10:10:10:2
should be a faster format. However, it's nice to allow direct
subsampled inputs _somehow_.

resource_pool.cpp
ycbcr_input.cpp
ycbcr_input.h
ycbcr_input_test.cpp

index b560f15..5b04999 100644 (file)
@@ -307,6 +307,7 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs
        switch (internal_format) {
        case GL_RGBA32F_ARB:
        case GL_RGBA16F_ARB:
+       case GL_RGBA16:
        case GL_RGBA8:
        case GL_RGB10_A2:
        case GL_SRGB8_ALPHA8:
@@ -314,6 +315,7 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs
                break;
        case GL_RGB32F:
        case GL_RGB16F:
+       case GL_RGB16:
        case GL_R11F_G11F_B10F:
        case GL_RGB8:
        case GL_RGB10:
@@ -324,11 +326,13 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs
                break;
        case GL_RG32F:
        case GL_RG16F:
+       case GL_RG16:
        case GL_RG8:
                format = GL_RG;
                break;
        case GL_R32F:
        case GL_R16F:
+       case GL_R16:
        case GL_R8:
                format = GL_RED;
                break;
@@ -352,6 +356,12 @@ GLuint ResourcePool::create_2d_texture(GLint internal_format, GLsizei width, GLs
        case GL_R16F:
                type = GL_FLOAT;
                break;
+       case GL_RGBA16:
+       case GL_RGB16:
+       case GL_RG16:
+       case GL_R16:
+               type = GL_UNSIGNED_SHORT;
+               break;
        case GL_SRGB8_ALPHA8:
        case GL_SRGB8:
        case GL_RGBA8:
@@ -618,6 +628,18 @@ size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format)
        case GL_RGB565:
                bytes_per_pixel = 2;
                break;
+       case GL_RGBA16:
+               bytes_per_pixel = 8;
+               break;
+       case GL_RGB16:
+               bytes_per_pixel = 6;
+               break;
+       case GL_RG16:
+               bytes_per_pixel = 4;
+               break;
+       case GL_R16:
+               bytes_per_pixel = 2;
+               break;
        default:
                // TODO: Add more here as needed.
                assert(false);
index f8df0c1..f748f1c 100644 (file)
@@ -74,19 +74,30 @@ void YCbCrInput::set_gl_state(GLuint glsl_program_num, const string& prefix, uns
                                if (type == GL_UNSIGNED_INT_2_10_10_10_REV) {
                                        format = GL_RGBA;
                                        internal_format = GL_RGB10_A2;
+                               } else if (type == GL_UNSIGNED_SHORT) {
+                                       format = GL_RGB;
+                                       internal_format = GL_RGB16;
                                } else {
                                        assert(type == GL_UNSIGNED_BYTE);
                                        format = GL_RGB;
                                        internal_format = GL_RGB8;
                                }
                        } else if (channel == 1 && ycbcr_input_splitting == YCBCR_INPUT_SPLIT_Y_AND_CBCR) {
-                               assert(type == GL_UNSIGNED_BYTE);
                                format = GL_RG;
-                               internal_format = GL_RG8;
+                               if (type == GL_UNSIGNED_SHORT) {
+                                       internal_format = GL_RG16;
+                               } else {
+                                       assert(type == GL_UNSIGNED_BYTE);
+                                       internal_format = GL_RG8;
+                               }
                        } else {
-                               assert(type == GL_UNSIGNED_BYTE);
                                format = GL_RED;
-                               internal_format = GL_R8;
+                               if (type == GL_UNSIGNED_SHORT) {
+                                       internal_format = GL_R16;
+                               } else {
+                                       assert(type == GL_UNSIGNED_BYTE);
+                                       internal_format = GL_R8;
+                               }
                        }
 
                        // (Re-)upload the texture.
@@ -135,6 +146,19 @@ string YCbCrInput::output_fragment_shader()
        Matrix3d ycbcr_to_rgb;
        compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb);
 
+       if (type == GL_UNSIGNED_SHORT) {
+               // For 10-bit or 12-bit packed into 16-bit, we need to scale the values
+               // so that the max value goes from 1023 (or 4095) to 65535. We do this
+               // by folding the scaling into the conversion matrix, so it comes essentially
+               // for free. However, the offset is before the scaling (and thus assumes
+               // correctly scaled values), so we need to adjust that the other way.
+               double scale = 65535.0 / (ycbcr_format.num_levels - 1);
+               offset[0] /= scale;
+               offset[1] /= scale;
+               offset[2] /= scale;
+               ycbcr_to_rgb *= scale;
+       }
+
        string frag_shader;
 
        frag_shader = output_glsl_mat3("PREFIX(inv_ycbcr_matrix)", ycbcr_to_rgb);
index 31ba42b..4e8f194 100644 (file)
@@ -9,12 +9,14 @@
 //   * 8-bit semiplanar Y'CbCr (Y' in one plane, CbCr in another),
 //     possibly subsampled.
 //   * 8-bit interleaved (chunked) Y'CbCr, no subsampling (4:4:4 only).
+//   * All of the above in 10- and 12-bit versions, where each sample is
+//     stored in a 16-bit int (so the 6 or 4 top bits are wasted).
 //   * 10-bit interleaved (chunked) Y'CbCr packed into 32-bit words
 //     (10:10:10:2), no subsampling (4:4:4 only).
 //
-// For the former case, it upsamples planes as needed, using the default linear
-// upsampling OpenGL gives you. Note that YCbCr422InterleavedInput supports the
-// important special case of 8-bit 4:2:2 interleaved.
+// For the planar and semiplanar cases, it upsamples planes as needed, using
+// the default linear upsampling OpenGL gives you. Note that YCbCr422InterleavedInput
+// supports the important special case of 8-bit 4:2:2 interleaved.
 
 #include <epoxy/gl.h>
 #include <assert.h>
@@ -49,7 +51,8 @@ enum YCbCrInputSplitting {
 
 class YCbCrInput : public Input {
 public:
-       // Type can be GL_UNSIGNED_BYTE for 8-bit, or GL_UNSIGNED_INT_2_10_10_10_REV
+       // Type can be GL_UNSIGNED_BYTE for 8-bit, GL_UNSIGNED_SHORT for 10- or 12-bit
+       // (or 8-bit, although that's a bit useless), or GL_UNSIGNED_INT_2_10_10_10_REV
        // for 10-bit (YCBCR_INPUT_INTERLEAVED only).
        YCbCrInput(const ImageFormat &image_format,
                   const YCbCrFormat &ycbcr_format,
@@ -93,6 +96,15 @@ public:
                invalidate_pixel_data();
        }
 
+       void set_pixel_data(unsigned channel, const uint16_t *pixel_data, GLuint pbo = 0)
+       {
+               assert(type == GL_UNSIGNED_SHORT);
+               assert(channel >= 0 && channel < num_channels);
+               this->pixel_data[channel] = reinterpret_cast<const unsigned char *>(pixel_data);
+               this->pbos[channel] = pbo;
+               invalidate_pixel_data();
+       }
+
        void set_pixel_data(unsigned channel, const uint32_t *pixel_data, GLuint pbo = 0)
        {
                assert(type == GL_UNSIGNED_INT_2_10_10_10_REV);
index 7792b28..1d1e23e 100644 (file)
@@ -858,4 +858,69 @@ TEST(YCbCrInputTest, TenBitInterleaved) {
        expect_equal(expected_data, out_data, 4 * width, height, 0.002, 0.0003);
 }
 
+TEST(YCbCrInputTest, TenBitPlanar) {
+       const int width = 1;
+       const int height = 5;
+
+       // The same data as TenBitInterleaved, but split.
+       uint16_t y[width * height] = {
+                 64,
+                940,
+                250,
+                691,
+                127,
+       };
+       uint16_t cb[width * height] = {
+                512,
+                512,
+                409,
+                167,
+                960,
+       };
+       uint16_t cr[width * height] = {
+                512,
+                512,
+                960,
+                105,
+                471,
+       };
+       float expected_data[4 * width * height] = {
+               0.0, 0.0, 0.0, 1.0,
+               1.0, 1.0, 1.0, 1.0,
+               1.0, 0.0, 0.0, 1.0,
+               0.0, 1.0, 0.0, 1.0,
+               0.0, 0.0, 1.0, 1.0,
+       };
+       float out_data[4 * width * height];
+
+       EffectChainTester tester(NULL, width, height);
+
+       ImageFormat format;
+       format.color_space = COLORSPACE_sRGB;
+       format.gamma_curve = GAMMA_sRGB;
+
+       YCbCrFormat ycbcr_format;
+       ycbcr_format.luma_coefficients = YCBCR_REC_709;
+       ycbcr_format.full_range = false;
+       ycbcr_format.num_levels = 1024;  // 10-bit.
+       ycbcr_format.chroma_subsampling_x = 1;
+       ycbcr_format.chroma_subsampling_y = 1;
+       ycbcr_format.cb_x_position = 0.5f;
+       ycbcr_format.cb_y_position = 0.5f;
+       ycbcr_format.cr_x_position = 0.5f;
+       ycbcr_format.cr_y_position = 0.5f;
+
+       YCbCrInput *input = new YCbCrInput(format, ycbcr_format, width, height, YCBCR_INPUT_PLANAR, GL_UNSIGNED_SHORT);
+       input->set_pixel_data(0, y);
+       input->set_pixel_data(1, cb);
+       input->set_pixel_data(2, cr);
+       tester.get_chain()->add_input(input);
+
+       tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+       // We can set much tighter limits on this than 8-bit Y'CbCr;
+       // even tighter than the default limits.
+       expect_equal(expected_data, out_data, 4 * width, height, 0.002, 0.0003);
+}
+
 }  // namespace movit