From 9dcbd93164611ea111cc29519c18193d4f571ac1 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 7 Oct 2012 12:32:30 +0200 Subject: [PATCH] Add YCbCr input. Required a bit of reworking of the sRGB extension stuff, but seems to work fine. --- effect_chain.cpp | 74 ++++++++++------ effect_chain.h | 4 +- flat_input.cpp | 6 +- flat_input.h | 9 +- image_format.h | 5 ++ input.h | 9 +- ycbcr_input.cpp | 220 +++++++++++++++++++++++++++++++++++++++++++++++ ycbcr_input.frag | 23 +++++ ycbcr_input.h | 95 ++++++++++++++++++++ 9 files changed, 410 insertions(+), 35 deletions(-) create mode 100644 ycbcr_input.cpp create mode 100644 ycbcr_input.frag create mode 100644 ycbcr_input.h diff --git a/effect_chain.cpp b/effect_chain.cpp index 82e9c5d..13f4634 100644 --- a/effect_chain.cpp +++ b/effect_chain.cpp @@ -60,48 +60,70 @@ void EffectChain::add_effect_raw(Effect *effect, const std::vector &in output_color_space[effect] = output_color_space[last_added_effect()]; } -// Set the "use_srgb_texture_format" option on all inputs that feed into this node, -// and update the output_gamma_curve[] map as we go. -// -// NOTE: We assume that the only way we could actually get GAMMA_sRGB from an -// effect (except from GammaCompressionCurve, which should never be inserted -// into a chain when this is called) is by pass-through from a texture. -// Thus, we can simply feed the flag up towards all inputs. -void EffectChain::set_use_srgb_texture_format(Effect *effect) +void EffectChain::find_all_nonlinear_inputs(Effect *effect, + std::vector *nonlinear_inputs, + std::vector *intermediates) { assert(output_gamma_curve.count(effect) != 0); - assert(output_gamma_curve[effect] == GAMMA_sRGB); + if (output_gamma_curve[effect] == GAMMA_LINEAR) { + return; + } if (effect->num_inputs() == 0) { - effect->set_int("use_srgb_texture_format", 1); + nonlinear_inputs->push_back(static_cast(effect)); } else { + intermediates->push_back(effect); + assert(incoming_links.count(effect) == 1); std::vector deps = incoming_links[effect]; assert(effect->num_inputs() == deps.size()); for (unsigned i = 0; i < deps.size(); ++i) { - set_use_srgb_texture_format(deps[i]); - assert(output_gamma_curve[deps[i]] == GAMMA_LINEAR); + find_all_nonlinear_inputs(deps[i], nonlinear_inputs, intermediates); } } - output_gamma_curve[effect] = GAMMA_LINEAR; } Effect *EffectChain::normalize_to_linear_gamma(Effect *input) { - assert(output_gamma_curve.count(input) != 0); - if (output_gamma_curve[input] == GAMMA_sRGB) { - // TODO: check if the extension exists - set_use_srgb_texture_format(input); - output_gamma_curve[input] = GAMMA_LINEAR; + // Find out if all the inputs can be set to deliver sRGB inputs. + // If so, we can just ask them to do that instead of inserting a + // (possibly expensive) conversion operation. + // + // NOTE: We assume that effects generally don't mess with the gamma + // curve (except GammaCompressionEffect, which should never be + // inserted into a chain when this is called), so that we can just + // update the output gamma as we go. + // + // TODO: Setting this flag for one source might confuse a different + // part of the pipeline using the same source. + std::vector nonlinear_inputs; + std::vector intermediates; + find_all_nonlinear_inputs(input, &nonlinear_inputs, &intermediates); + + bool all_ok = true; + for (unsigned i = 0; i < nonlinear_inputs.size(); ++i) { + all_ok &= nonlinear_inputs[i]->can_output_linear_gamma(); + } + + if (all_ok) { + for (unsigned i = 0; i < nonlinear_inputs.size(); ++i) { + bool ok = nonlinear_inputs[i]->set_int("output_linear_gamma", 1); + assert(ok); + output_gamma_curve[nonlinear_inputs[i]] = GAMMA_LINEAR; + } + for (unsigned i = 0; i < intermediates.size(); ++i) { + output_gamma_curve[intermediates[i]] = GAMMA_LINEAR; + } return input; - } else { - GammaExpansionEffect *gamma_conversion = new GammaExpansionEffect(); - gamma_conversion->set_int("source_curve", output_gamma_curve[input]); - std::vector inputs; - inputs.push_back(input); - gamma_conversion->add_self_to_effect_chain(this, inputs); - output_gamma_curve[gamma_conversion] = GAMMA_LINEAR; - return gamma_conversion; } + + // OK, that didn't work. Insert a conversion effect. + GammaExpansionEffect *gamma_conversion = new GammaExpansionEffect(); + gamma_conversion->set_int("source_curve", output_gamma_curve[input]); + std::vector inputs; + inputs.push_back(input); + gamma_conversion->add_self_to_effect_chain(this, inputs); + output_gamma_curve[gamma_conversion] = GAMMA_LINEAR; + return gamma_conversion; } Effect *EffectChain::normalize_to_srgb(Effect *input) diff --git a/effect_chain.h b/effect_chain.h index 0b07b85..6795578 100644 --- a/effect_chain.h +++ b/effect_chain.h @@ -69,7 +69,9 @@ private: std::vector effects; // In order. }; - void set_use_srgb_texture_format(Effect *effect); + void find_all_nonlinear_inputs(Effect *effect, + std::vector *nonlinear_inputs, + std::vector *intermediates); Effect *normalize_to_linear_gamma(Effect *input); Effect *normalize_to_srgb(Effect *input); diff --git a/flat_input.cpp b/flat_input.cpp index 2541dc4..c7f417b 100644 --- a/flat_input.cpp +++ b/flat_input.cpp @@ -12,13 +12,13 @@ FlatInput::FlatInput(ImageFormat image_format, unsigned width, unsigned height) : image_format(image_format), needs_update(false), finalized(false), - use_srgb_texture_format(false), + output_linear_gamma(false), needs_mipmaps(false), width(width), height(height), pitch(width) { - register_int("use_srgb_texture_format", &use_srgb_texture_format); + register_int("output_linear_gamma", &output_linear_gamma); register_int("needs_mipmaps", &needs_mipmaps); } @@ -26,7 +26,7 @@ void FlatInput::finalize() { // Translate the input format to OpenGL's enums. GLenum internal_format; - if (use_srgb_texture_format) { + if (output_linear_gamma) { internal_format = GL_SRGB8; } else { internal_format = GL_RGBA8; diff --git a/flat_input.h b/flat_input.h index a906a5d..69c1a6f 100644 --- a/flat_input.h +++ b/flat_input.h @@ -14,13 +14,16 @@ public: // mipmap generation) at that point. void finalize(); + // TODO: Check that we actually have the required extension. + virtual bool can_output_linear_gamma() const { return true; } + std::string output_fragment_shader(); // Uploads the texture if it has changed since last time. void set_gl_state(GLuint glsl_program_num, const std::string& prefix, unsigned *sampler_num); - ColorSpace get_color_space() { return image_format.color_space; } - GammaCurve get_gamma_curve() { return image_format.gamma_curve; } + ColorSpace get_color_space() const { return image_format.color_space; } + GammaCurve get_gamma_curve() const { return image_format.gamma_curve; } // Tells the input where to fetch the actual pixel data. Note that if you change // this data, you must either call set_pixel_data() again (using the same pointer @@ -56,7 +59,7 @@ private: GLenum format; GLuint pbo, texture_num; bool needs_update, finalized; - int use_srgb_texture_format, needs_mipmaps; + int output_linear_gamma, needs_mipmaps; unsigned width, height, pitch, bytes_per_pixel; const unsigned char *pixel_data; }; diff --git a/image_format.h b/image_format.h index 346cdef..2c65724 100644 --- a/image_format.h +++ b/image_format.h @@ -17,6 +17,11 @@ enum GammaCurve { GAMMA_REC_709 = 2, // Same as Rec. 601. }; +enum YCbCrLumaCoefficients { + YCBCR_REC_601 = 0, + YCBCR_REC_709 = 1, +}; + struct ImageFormat { MovitPixelFormat pixel_format; ColorSpace color_space; diff --git a/input.h b/input.h index ac03e84..8687690 100644 --- a/input.h +++ b/input.h @@ -24,8 +24,13 @@ public: // mipmap generation) at that point. virtual void finalize() = 0; - virtual ColorSpace get_color_space() = 0; - virtual GammaCurve get_gamma_curve() = 0; + // Whether this input can deliver linear gamma directly if it's + // asked to. (If so, set the parameter “output_linear_gamma” + // to activate it.) + virtual bool can_output_linear_gamma() const = 0; + + virtual ColorSpace get_color_space() const = 0; + virtual GammaCurve get_gamma_curve() const = 0; }; #endif // !defined(_INPUT_H) diff --git a/ycbcr_input.cpp b/ycbcr_input.cpp new file mode 100644 index 0000000..a0b9abf --- /dev/null +++ b/ycbcr_input.cpp @@ -0,0 +1,220 @@ +#define GL_GLEXT_PROTOTYPES 1 + +#include +#include +#include +#include + +#include "ycbcr_input.h" +#include "util.h" + +YCbCrInput::YCbCrInput(const ImageFormat &image_format, + const YCbCrFormat &ycbcr_format, + unsigned width, unsigned height) + : image_format(image_format), + ycbcr_format(ycbcr_format), + needs_update(false), + finalized(false), + needs_mipmaps(false), + width(width), + height(height) +{ + pitch[0] = pitch[1] = pitch[2] = width; + + assert(width % ycbcr_format.chroma_subsampling_x == 0); + widths[0] = width; + widths[1] = width / ycbcr_format.chroma_subsampling_x; + widths[2] = width / ycbcr_format.chroma_subsampling_x; + + assert(height % ycbcr_format.chroma_subsampling_y == 0); + heights[0] = height; + heights[1] = height / ycbcr_format.chroma_subsampling_y; + heights[2] = height / ycbcr_format.chroma_subsampling_y; + + register_int("needs_mipmaps", &needs_mipmaps); +} + +void YCbCrInput::finalize() +{ + // Create PBOs to hold the textures holding the input image, and then the texture itself. + glGenBuffers(3, pbos); + check_error(); + glGenTextures(3, texture_num); + check_error(); + + for (unsigned channel = 0; channel < 3; ++channel) { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbos[channel]); + check_error(); + glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, pitch[channel] * heights[channel], NULL, GL_STREAM_DRAW); + check_error(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); + check_error(); + + glBindTexture(GL_TEXTURE_2D, texture_num[channel]); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch[channel]); + check_error(); + glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE8, widths[channel], heights[channel], 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL); + check_error(); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + check_error(); + } + + needs_update = false; + finalized = true; +} + +void YCbCrInput::set_gl_state(GLuint glsl_program_num, const std::string& prefix, unsigned *sampler_num) +{ + for (unsigned channel = 0; channel < 3; ++channel) { + glActiveTexture(GL_TEXTURE0 + *sampler_num + channel); + check_error(); + glBindTexture(GL_TEXTURE_2D, texture_num[channel]); + check_error(); + + if (needs_update) { + // Copy the pixel data into the PBO. + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbos[channel]); + check_error(); + void *mapped_pbo = glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY); + memcpy(mapped_pbo, pixel_data[channel], pitch[channel] * heights[channel]); + glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB); + check_error(); + + // Re-upload the texture from the PBO. + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch[channel]); + check_error(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], heights[channel], GL_LUMINANCE, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0)); + check_error(); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); + check_error(); + } + } + + // Bind samplers. + set_uniform_int(glsl_program_num, prefix, "tex_y", *sampler_num + 0); + set_uniform_int(glsl_program_num, prefix, "tex_cb", *sampler_num + 1); + set_uniform_int(glsl_program_num, prefix, "tex_cr", *sampler_num + 2); + + *sampler_num += 3; + needs_update = false; +} + +std::string YCbCrInput::output_fragment_shader() +{ + float coeff[3], offset[3], scale[3]; + + switch (ycbcr_format.luma_coefficients) { + case YCBCR_REC_601: + // Rec. 601, page 2. + coeff[0] = 0.299; + coeff[1] = 0.587; + coeff[2] = 0.114; + break; + + case YCBCR_REC_709: + // Rec. 709, page 19. + coeff[0] = 0.2126; + coeff[1] = 0.7152; + coeff[2] = 0.0722; + break; + default: + assert(false); + } + + if (ycbcr_format.full_range) { + offset[0] = 0.0 / 255.0; + offset[1] = 128.0 / 255.0; + offset[2] = 128.0 / 255.0; + + scale[0] = 1.0; + scale[1] = 1.0; + scale[2] = 1.0; + } else { + // Rec. 601, page 4; Rec. 709, page 19. + offset[0] = 16.0 / 255.0; + offset[1] = 128.0 / 255.0; + offset[2] = 128.0 / 255.0; + + scale[0] = 255.0 / 219.0; + scale[1] = 255.0 / 224.0; + scale[2] = 255.0 / 224.0; + } + + // Matrix to convert RGB to YCbCr. See e.g. Rec. 601. + Matrix3x3 rgb_to_ycbcr; + rgb_to_ycbcr[0] = coeff[0]; + rgb_to_ycbcr[3] = coeff[1]; + rgb_to_ycbcr[6] = coeff[2]; + + float cb_fac = (224.0 / 219.0) / (coeff[0] + coeff[1] + 1.0f - coeff[2]); + rgb_to_ycbcr[1] = -coeff[0] * cb_fac; + rgb_to_ycbcr[4] = -coeff[1] * cb_fac; + rgb_to_ycbcr[7] = (1.0f - coeff[2]) * cb_fac; + + float cr_fac = (224.0 / 219.0) / (1.0f - coeff[0] + coeff[1] + coeff[2]); + rgb_to_ycbcr[2] = (1.0f - coeff[0]) * cr_fac; + rgb_to_ycbcr[5] = -coeff[1] * cr_fac; + rgb_to_ycbcr[8] = -coeff[2] * cr_fac; + + // Inverting the matrix gives us what we need to go from YCbCr back to RGB. + Matrix3x3 ycbcr_to_rgb; + invert_3x3_matrix(rgb_to_ycbcr, ycbcr_to_rgb); + + std::string frag_shader; + + char buf[1024]; + sprintf(buf, + "const mat3 PREFIX(inv_ycbcr_matrix) = mat3(\n" + " %.8f, %.8f, %.8f,\n" + " %.8f, %.8f, %.8f,\n" + " %.8f, %.8f, %.8f);\n", + ycbcr_to_rgb[0], ycbcr_to_rgb[1], ycbcr_to_rgb[2], + ycbcr_to_rgb[3], ycbcr_to_rgb[4], ycbcr_to_rgb[5], + ycbcr_to_rgb[6], ycbcr_to_rgb[7], ycbcr_to_rgb[8]); + frag_shader = buf; + + sprintf(buf, "const vec3 PREFIX(offset) = vec3(%.8f, %.8f, %.8f);\n", + offset[0], offset[1], offset[2]); + frag_shader += buf; + + sprintf(buf, "const vec3 PREFIX(scale) = vec3(%.8f, %.8f, %.8f);\n", + scale[0], scale[1], scale[2]); + frag_shader += buf; + + // OpenGL has texel center in (0.5, 0.5), but different formats have + // chroma in various other places. If luma samples are X, the chroma + // sample is *, and subsampling is 3x3, the situation with chroma + // center in (0.5, 0.5) looks approximately like this: + // + // X X + // * + // X X + // + // If, on the other hand, chroma center is in (0.0, 0.5) (common + // for e.g. MPEG-4), the figure changes to: + // + // X X + // * + // X X + // + // Obviously, the chroma plane here needs to be moved to the left, + // which means _adding_ 0.5 to the texture coordinates when sampling + // chroma. + float chroma_offset_x = (0.5f - ycbcr_format.chroma_x_position) / widths[1]; + float chroma_offset_y = (0.5f - ycbcr_format.chroma_y_position) / heights[1]; + sprintf(buf, "const vec2 PREFIX(chroma_offset) = vec2(%.8f, %.8f);\n", + chroma_offset_x, chroma_offset_y); + frag_shader += buf; + + frag_shader += read_file("ycbcr_input.frag"); + return frag_shader; +} diff --git a/ycbcr_input.frag b/ycbcr_input.frag new file mode 100644 index 0000000..13e44b0 --- /dev/null +++ b/ycbcr_input.frag @@ -0,0 +1,23 @@ +uniform sampler2D PREFIX(tex_y); +uniform sampler2D PREFIX(tex_cb); +uniform sampler2D PREFIX(tex_cr); + +vec4 FUNCNAME(vec2 tc) { + // OpenGL's origin is bottom-left, but most graphics software assumes + // a top-left origin. Thus, for inputs that come from the user, + // we flip the y coordinate. + tc.y = 1.0 - tc.y; + + vec3 ycbcr; + ycbcr.x = texture2D(PREFIX(tex_y), tc).x; + ycbcr.y = texture2D(PREFIX(tex_cb), tc + PREFIX(chroma_offset)).x; + ycbcr.z = texture2D(PREFIX(tex_cr), tc + PREFIX(chroma_offset)).x; + + ycbcr -= PREFIX(offset); + ycbcr *= PREFIX(scale); + + vec4 rgba; + rgba.rgb = PREFIX(inv_ycbcr_matrix) * ycbcr; + rgba.a = 1.0; + return rgba; +} diff --git a/ycbcr_input.h b/ycbcr_input.h new file mode 100644 index 0000000..fd9db27 --- /dev/null +++ b/ycbcr_input.h @@ -0,0 +1,95 @@ +#ifndef _YCBCR_INPUT_H +#define _YCBCR_INPUT_H 1 + +// YCbCrInput is for handling planar 8-bit Y'CbCr (also sometimes, usually rather +// imprecisely, called “YUV”), which is typically what you get from a video decoder. +// It upsamples planes as needed, using the default linear upsampling OpenGL gives you. + +#include "input.h" + +struct YCbCrFormat { + // Which formula for Y' to use. + YCbCrLumaCoefficients luma_coefficients; + + // If true, assume Y'CbCr coefficients are full-range, ie. go from 0 to 255 + // instead of the limited 220/225 steps in classic MPEG. For instance, + // JPEG uses the Rec. 601 luma coefficients, but full range. + bool full_range; + + // Sampling factors for chroma components. For no subsampling (4:4:4), + // set both to 1. + unsigned chroma_subsampling_x, chroma_subsampling_y; + + // Positioning of the chroma samples. MPEG-1 and JPEG is (0.5, 0.5); + // MPEG-2 and newer typically are (0.0, 0.5). + float chroma_x_position, chroma_y_position; +}; + +class YCbCrInput : public Input { +public: + YCbCrInput(const ImageFormat &image_format, + const YCbCrFormat &ycbcr_format, + unsigned width, unsigned height); + + // Create the texture itself. We cannot do this in the constructor, + // because we don't necessarily know all the settings (sRGB texture, + // mipmap generation) at that point. + void finalize(); + + virtual bool can_output_linear_gamma() const { return false; } + + std::string output_fragment_shader(); + + // Uploads the texture if it has changed since last time. + void set_gl_state(GLuint glsl_program_num, const std::string& prefix, unsigned *sampler_num); + + ColorSpace get_color_space() const { return image_format.color_space; } + GammaCurve get_gamma_curve() const { return image_format.gamma_curve; } + + // Tells the input where to fetch the actual pixel data. Note that if you change + // this data, you must either call set_pixel_data() again (using the same pointer + // is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded + // on subsequent frames. + void set_pixel_data(unsigned channel, const unsigned char *pixel_data) + { + assert(channel >= 0 && channel < 3); + this->pixel_data[channel] = pixel_data; + invalidate_pixel_data(); + } + + void invalidate_pixel_data() + { + needs_update = true; + } + + const unsigned char *get_pixel_data(unsigned channel) const + { + assert(channel >= 0 && channel < 3); + return pixel_data[channel]; + } + + void set_pitch(unsigned channel, unsigned pitch) { + assert(!finalized); + assert(channel >= 0 && channel < 3); + this->pitch[channel] = pitch; + } + + unsigned get_pitch(unsigned channel) { + assert(channel >= 0 && channel < 3); + return pitch[channel]; + } + +private: + ImageFormat image_format; + YCbCrFormat ycbcr_format; + GLuint pbos[3], texture_num[3]; + bool needs_update, finalized; + + int needs_mipmaps; + + unsigned width, height, widths[3], heights[3]; + const unsigned char *pixel_data[3]; + unsigned pitch[3]; +}; + +#endif // !defined(_YCBCR_INPUT_H) -- 2.39.2