From ba60914d4e5eda7b28af700bf43e9699b7aa720d Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Thu, 30 Jul 2015 01:38:38 +0200 Subject: [PATCH] Add an effect for 4:2:2 interleaved YCbCr input (UYVY). This is primarily motivated by the fact that DeckLink uses this format natively. --- .gitignore | 1 + Makefile.in | 3 +- resource_pool.cpp | 3 + util.cpp | 10 ++ util.h | 3 +- ycbcr.cpp | 126 +++++++++++++++++++ ycbcr.h | 44 +++++++ ycbcr_422interleaved_input.cpp | 149 +++++++++++++++++++++++ ycbcr_422interleaved_input.frag | 32 +++++ ycbcr_422interleaved_input.h | 130 ++++++++++++++++++++ ycbcr_422interleaved_input_test.cpp | 180 ++++++++++++++++++++++++++++ ycbcr_input.cpp | 121 +------------------ ycbcr_input.h | 20 +--- 13 files changed, 681 insertions(+), 141 deletions(-) create mode 100644 ycbcr.cpp create mode 100644 ycbcr.h create mode 100644 ycbcr_422interleaved_input.cpp create mode 100644 ycbcr_422interleaved_input.frag create mode 100644 ycbcr_422interleaved_input.h create mode 100644 ycbcr_422interleaved_input_test.cpp diff --git a/.gitignore b/.gitignore index 547ccdb..b73db80 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ glow_effect_test padding_effect_test flat_input_test ycbcr_input_test +ycbcr_422interleaved_input_test complex_modulate_effect_test fft_pass_effect_test fp16_test diff --git a/Makefile.in b/Makefile.in index 5cb7fa9..fe46ec3 100644 --- a/Makefile.in +++ b/Makefile.in @@ -47,6 +47,7 @@ DEMO_OBJS=demo.o # Inputs. TESTED_INPUTS = flat_input TESTED_INPUTS += ycbcr_input +TESTED_INPUTS += ycbcr_422interleaved_input INPUTS = $(TESTED_INPUTS) $(UNTESTED_INPUTS) @@ -87,7 +88,7 @@ EFFECTS = $(TESTED_EFFECTS) $(UNTESTED_EFFECTS) # Unit tests. TESTS=effect_chain_test fp16_test $(TESTED_INPUTS:=_test) $(TESTED_EFFECTS:=_test) -LIB_OBJS=effect_util.o util.o widgets.o effect.o effect_chain.o init.o resource_pool.o fp16.o $(INPUTS:=.o) $(EFFECTS:=.o) +LIB_OBJS=effect_util.o util.o widgets.o effect.o effect_chain.o init.o resource_pool.o fp16.o ycbcr.o $(INPUTS:=.o) $(EFFECTS:=.o) # Default target: all: libmovit.la $(TESTS) diff --git a/resource_pool.cpp b/resource_pool.cpp index eba7923..7f81376 100644 --- a/resource_pool.cpp +++ b/resource_pool.cpp @@ -461,6 +461,9 @@ size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format) case GL_R16F: bytes_per_pixel = 2; break; + case GL_RG8: + bytes_per_pixel = 2; + break; case GL_R8: bytes_per_pixel = 1; break; diff --git a/util.cpp b/util.cpp index 3ebf162..a6175b4 100644 --- a/util.cpp +++ b/util.cpp @@ -183,6 +183,16 @@ string output_glsl_mat3(const string &name, const Eigen::Matrix3d &m) return ss.str(); } +string output_glsl_float(const string &name, float x) +{ + // Use stringstream to be independent of the current locale in a thread-safe manner. + stringstream ss; + ss.imbue(locale("C")); + ss.precision(8); + ss << "const float " << name << " = " << x << ";\n"; + return ss.str(); +} + string output_glsl_vec2(const string &name, float x, float y) { // Use stringstream to be independent of the current locale in a thread-safe manner. diff --git a/util.h b/util.h index f57af77..e102f21 100644 --- a/util.h +++ b/util.h @@ -38,7 +38,8 @@ void print_3x3_matrix(const Eigen::Matrix3d &m); // Output a GLSL 3x3 matrix declaration. std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m); -// Output GLSL 2-length and 3-length vector declarations. +// Output GLSL scalar, 2-length and 3-length vector declarations. +std::string output_glsl_float(const std::string &name, float x); std::string output_glsl_vec2(const std::string &name, float x, float y); std::string output_glsl_vec3(const std::string &name, float x, float y, float z); diff --git a/ycbcr.cpp b/ycbcr.cpp new file mode 100644 index 0000000..f0124ea --- /dev/null +++ b/ycbcr.cpp @@ -0,0 +1,126 @@ +#include +#include + +#include "ycbcr.h" + +using namespace Eigen; + +namespace movit { + +// OpenGL has texel center in (0.5, 0.5), but different formats have +// chroma in various other places. If luma samples are X, the chroma +// sample is *, and subsampling is 3x3, the situation with chroma +// center in (0.5, 0.5) looks approximately like this: +// +// X X +// * +// X X +// +// If, on the other hand, chroma center is in (0.0, 0.5) (common +// for e.g. MPEG-4), the figure changes to: +// +// X X +// * +// X X +// +// In other words, (0.0, 0.0) means that the chroma sample is exactly +// co-sited on top of the top-left luma sample. Note, however, that +// this is _not_ 0.5 texels to the left, since the OpenGL's texel center +// is in (0.5, 0.5); it is in (0.25, 0.25). In a sense, the four luma samples +// define a square where chroma position (0.0, 0.0) is in texel position +// (0.25, 0.25) and chroma position (1.0, 1.0) is in texel position (0.75, 0.75) +// (the outer border shows the borders of the texel itself, ie. from +// (0, 0) to (1, 1)): +// +// --------- +// | | +// | X---X | +// | | * | | +// | X---X | +// | | +// --------- +// +// Also note that if we have no subsampling, the square will have zero +// area and the chroma position does not matter at all. +float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution) +{ + float local_chroma_pos = (0.5 + pos * (subsampling_factor - 1)) / subsampling_factor; + return (0.5 - local_chroma_pos) / resolution; +} + +// Given , compute the values needed to turn Y'CbCr into R'G'B'; +// first subtract the returned offset, then left-multiply the returned matrix +// (the scaling is already folded into it). +void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb) +{ + double coeff[3], scale[3]; + + switch (ycbcr_format.luma_coefficients) { + case YCBCR_REC_601: + // Rec. 601, page 2. + coeff[0] = 0.299; + coeff[1] = 0.587; + coeff[2] = 0.114; + break; + + case YCBCR_REC_709: + // Rec. 709, page 19. + coeff[0] = 0.2126; + coeff[1] = 0.7152; + coeff[2] = 0.0722; + break; + + case YCBCR_REC_2020: + // Rec. 2020, page 4. + coeff[0] = 0.2627; + coeff[1] = 0.6780; + coeff[2] = 0.0593; + break; + + default: + assert(false); + } + + if (ycbcr_format.full_range) { + offset[0] = 0.0 / 255.0; + offset[1] = 128.0 / 255.0; + offset[2] = 128.0 / 255.0; + + scale[0] = 1.0; + scale[1] = 1.0; + scale[2] = 1.0; + } else { + // Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 4. + offset[0] = 16.0 / 255.0; + offset[1] = 128.0 / 255.0; + offset[2] = 128.0 / 255.0; + + scale[0] = 255.0 / 219.0; + scale[1] = 255.0 / 224.0; + scale[2] = 255.0 / 224.0; + } + + // Matrix to convert RGB to YCbCr. See e.g. Rec. 601. + Matrix3d rgb_to_ycbcr; + rgb_to_ycbcr(0,0) = coeff[0]; + rgb_to_ycbcr(0,1) = coeff[1]; + rgb_to_ycbcr(0,2) = coeff[2]; + + float cb_fac = (224.0 / 219.0) / (coeff[0] + coeff[1] + 1.0f - coeff[2]); + rgb_to_ycbcr(1,0) = -coeff[0] * cb_fac; + rgb_to_ycbcr(1,1) = -coeff[1] * cb_fac; + rgb_to_ycbcr(1,2) = (1.0f - coeff[2]) * cb_fac; + + float cr_fac = (224.0 / 219.0) / (1.0f - coeff[0] + coeff[1] + coeff[2]); + rgb_to_ycbcr(2,0) = (1.0f - coeff[0]) * cr_fac; + rgb_to_ycbcr(2,1) = -coeff[1] * cr_fac; + rgb_to_ycbcr(2,2) = -coeff[2] * cr_fac; + + // Inverting the matrix gives us what we need to go from YCbCr back to RGB. + *ycbcr_to_rgb = rgb_to_ycbcr.inverse(); + + // Fold in the scaling. + *ycbcr_to_rgb *= Map(scale).asDiagonal(); +} + +} // namespace movit diff --git a/ycbcr.h b/ycbcr.h new file mode 100644 index 0000000..7e5891f --- /dev/null +++ b/ycbcr.h @@ -0,0 +1,44 @@ +#ifndef _MOVIT_YCBCR_H +#define _MOVIT_YCBCR_H 1 + +// Shared utility functions between YCbCrInput and YCbCr422InterleavedInput. + +#include "image_format.h" + +#include + +namespace movit { + +struct YCbCrFormat { + // Which formula for Y' to use. + YCbCrLumaCoefficients luma_coefficients; + + // If true, assume Y'CbCr coefficients are full-range, ie. go from 0 to 255 + // instead of the limited 220/225 steps in classic MPEG. For instance, + // JPEG uses the Rec. 601 luma coefficients, but full range. + bool full_range; + + // Sampling factors for chroma components. For no subsampling (4:4:4), + // set both to 1. + unsigned chroma_subsampling_x, chroma_subsampling_y; + + // Positioning of the chroma samples. MPEG-1 and JPEG is (0.5, 0.5); + // MPEG-2 and newer typically are (0.0, 0.5). + float cb_x_position, cb_y_position; + float cr_x_position, cr_y_position; +}; + +// Convert texel sampling offset for the given chroma channel, given that +// chroma position is (0..1), we are downsampling this chroma channel +// by a factor of and the texture we are sampling from +// is pixels wide/high. +float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution); + +// Given , compute the values needed to turn Y'CbCr into R'G'B'; +// first subtract the returned offset, then left-multiply the returned matrix +// (the scaling is already folded into it). +void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float *offset, Eigen::Matrix3d *ycbcr_to_rgb); + +} // namespace movit + +#endif // !defined(_MOVIT_YCBCR_INPUT_H) diff --git a/ycbcr_422interleaved_input.cpp b/ycbcr_422interleaved_input.cpp new file mode 100644 index 0000000..b634289 --- /dev/null +++ b/ycbcr_422interleaved_input.cpp @@ -0,0 +1,149 @@ +#include +#include +#include +#include + +#include "effect_util.h" +#include "resource_pool.h" +#include "util.h" +#include "ycbcr.h" +#include "ycbcr_422interleaved_input.h" + +using namespace Eigen; +using namespace std; + +namespace movit { + +YCbCr422InterleavedInput::YCbCr422InterleavedInput(const ImageFormat &image_format, + const YCbCrFormat &ycbcr_format, + unsigned width, unsigned height) + : image_format(image_format), + ycbcr_format(ycbcr_format), + width(width), + height(height), + resource_pool(NULL) +{ + pbo = 0; + texture_num[0] = texture_num[1] = 0; + + assert(ycbcr_format.chroma_subsampling_x == 2); + assert(ycbcr_format.chroma_subsampling_y == 1); + assert(width % ycbcr_format.chroma_subsampling_x == 0); + + widths[CHANNEL_LUMA] = width; + widths[CHANNEL_CHROMA] = width / ycbcr_format.chroma_subsampling_x; + pitches[CHANNEL_LUMA] = width; + pitches[CHANNEL_CHROMA] = width / ycbcr_format.chroma_subsampling_x; + + pixel_data = NULL; +} + +YCbCr422InterleavedInput::~YCbCr422InterleavedInput() +{ + for (unsigned channel = 0; channel < 2; ++channel) { + if (texture_num[channel] != 0) { + resource_pool->release_2d_texture(texture_num[channel]); + } + } +} + +void YCbCr422InterleavedInput::set_gl_state(GLuint glsl_program_num, const string& prefix, unsigned *sampler_num) +{ + for (unsigned channel = 0; channel < 2; ++channel) { + glActiveTexture(GL_TEXTURE0 + *sampler_num + channel); + check_error(); + + if (texture_num[channel] == 0) { + // (Re-)upload the texture. + GLuint format, internal_format; + if (channel == CHANNEL_LUMA) { + format = GL_RG; + internal_format = GL_RG8; + } else { + assert(channel == CHANNEL_CHROMA); + format = GL_RGBA; + internal_format = GL_RGBA8; + } + + texture_num[channel] = resource_pool->create_2d_texture(internal_format, widths[channel], height); + glBindTexture(GL_TEXTURE_2D, texture_num[channel]); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); + check_error(); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + check_error(); + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitches[channel]); + check_error(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], height, format, GL_UNSIGNED_BYTE, pixel_data); + check_error(); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + } else { + glBindTexture(GL_TEXTURE_2D, texture_num[channel]); + check_error(); + } + } + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); + check_error(); + + // Bind samplers. + set_uniform_int(glsl_program_num, prefix, "tex_y", *sampler_num + 0); + set_uniform_int(glsl_program_num, prefix, "tex_cbcr", *sampler_num + 1); + + *sampler_num += 2; +} + +string YCbCr422InterleavedInput::output_fragment_shader() +{ + float offset[3]; + Matrix3d ycbcr_to_rgb; + compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb); + + string frag_shader; + + frag_shader = output_glsl_mat3("PREFIX(inv_ycbcr_matrix)", ycbcr_to_rgb); + frag_shader += output_glsl_vec3("PREFIX(offset)", offset[0], offset[1], offset[2]); + + float cb_offset_x = compute_chroma_offset( + ycbcr_format.cb_x_position, ycbcr_format.chroma_subsampling_x, widths[CHANNEL_CHROMA]); + float cr_offset_x = compute_chroma_offset( + ycbcr_format.cr_x_position, ycbcr_format.chroma_subsampling_x, widths[CHANNEL_CHROMA]); + frag_shader += output_glsl_float("PREFIX(cb_offset_x)", cb_offset_x); + frag_shader += output_glsl_float("PREFIX(cr_offset_x)", cr_offset_x); + + char buf[256]; + sprintf(buf, "#define CB_CR_OFFSETS_EQUAL %d\n", + (fabs(ycbcr_format.cb_x_position - ycbcr_format.cr_x_position) < 1e-6)); + frag_shader += buf; + + frag_shader += read_file("ycbcr_422interleaved_input.frag"); + return frag_shader; +} + +void YCbCr422InterleavedInput::invalidate_pixel_data() +{ + for (unsigned channel = 0; channel < 2; ++channel) { + if (texture_num[channel] != 0) { + resource_pool->release_2d_texture(texture_num[channel]); + texture_num[channel] = 0; + } + } +} + +bool YCbCr422InterleavedInput::set_int(const std::string& key, int value) +{ + if (key == "needs_mipmaps") { + // We currently do not support this. + return (value == 0); + } + return Effect::set_int(key, value); +} + +} // namespace movit diff --git a/ycbcr_422interleaved_input.frag b/ycbcr_422interleaved_input.frag new file mode 100644 index 0000000..66762a8 --- /dev/null +++ b/ycbcr_422interleaved_input.frag @@ -0,0 +1,32 @@ +uniform sampler2D PREFIX(tex_y); +uniform sampler2D PREFIX(tex_cbcr); + +vec4 FUNCNAME(vec2 tc) { + // OpenGL's origin is bottom-left, but most graphics software assumes + // a top-left origin. Thus, for inputs that come from the user, + // we flip the y coordinate. + tc.y = 1.0 - tc.y; + + vec3 ycbcr; + ycbcr.x = tex2D(PREFIX(tex_y), tc).y; +#if CB_CR_OFFSETS_EQUAL + vec2 tc_cbcr = tc; + tc_cbcr.x += PREFIX(cb_offset_x); + ycbcr.yz = tex2D(PREFIX(tex_cbcr), tc_cbcr).xz; +#else + vec2 tc_cb = tc; + tc_cb.x += PREFIX(cb_offset_x); + ycbcr.y = tex2D(PREFIX(tex_cbcr), tc_cb).x; + + vec2 tc_cr = tc; + tc_cr.x += PREFIX(cr_offset_x); + ycbcr.z = tex2D(PREFIX(tex_cbcr), tc_cr).z; +#endif + + ycbcr -= PREFIX(offset); + + vec4 rgba; + rgba.rgb = PREFIX(inv_ycbcr_matrix) * ycbcr; + rgba.a = 1.0; + return rgba; +} diff --git a/ycbcr_422interleaved_input.h b/ycbcr_422interleaved_input.h new file mode 100644 index 0000000..b346986 --- /dev/null +++ b/ycbcr_422interleaved_input.h @@ -0,0 +1,130 @@ +#ifndef _MOVIT_YCBCR_422INTERLEAVED_INPUT_H +#define _MOVIT_YCBCR_422INTERLEAVED_INPUT_H 1 + +// YCbCr422InterleavedInput is for handling 4:2:2 interleaved 8-bit Y'CbCr, +// which you can get from e.g. certain capture cards. (Most other Y'CbCr +// encodings are planar, which is handled by YCbCrInput.) Currently we only +// handle the UYVY variant, although YUY2 should be easy to support if needed. +// +// Horizontal chroma placement is freely choosable as with YCbCrInput, +// but BT.601 (which at least DeckLink claims to conform to, under the +// name CCIR 601) seems to specify chroma positioning to the far left +// (that is 0.0); BT.601 Annex 1 (page 7) says “C R and C B samples co-sited +// with odd (1st, 3rd, 5th, etc.) Y samples in each line”, and I assume they do +// not start counting from 0 when they use the “1st” moniker. +// +// Interpolation is bilinear as in YCbCrInput (done by the GPU's normal +// scaling, except for the Y channel which of course needs some fiddling), +// and is done in non-linear light (since that's what everything specifies, +// except Rec. 2020 lets you choose between the two). A higher-quality +// choice would be to use a single pass of ResampleEffect to scale the +// chroma, but for now we are consistent between the two. +// +// There is a disparity between the interleaving and the way OpenGL typically +// expects to sample. In lieu of accessible hardware support (a lot of hardware +// supports native interleaved 4:2:2 sampling, but OpenGL drivers seem to +// rarely support it), we simply upload the same data twice; once as a +// full-width RG texture (from which we sample luma) and once as a half-width +// RGBA texture (from which we sample chroma). We throw away half of the color +// channels each time, so bandwidth is wasted, but it makes for a very +// uncomplicated shader. + +#include +#include + +#include "effect.h" +#include "effect_chain.h" +#include "image_format.h" +#include "input.h" +#include "ycbcr.h" + +namespace movit { + +class ResourcePool; + +class YCbCr422InterleavedInput : public Input { +public: + // must be consistent with 4:2:2 sampling; specifically: + // + // * chroma_subsampling_x must be 2. + // * chroma_subsampling_y must be 1. + // + // must obviously be an even number. It is the true width of the image + // in pixels, ie., the number of horizontal luma samples. + YCbCr422InterleavedInput(const ImageFormat &image_format, + const YCbCrFormat &ycbcr_format, + unsigned width, unsigned height); + ~YCbCr422InterleavedInput(); + + virtual std::string effect_type_id() const { return "YCbCr422InterleavedInput"; } + + virtual bool can_output_linear_gamma() const { return false; } + virtual AlphaHandling alpha_handling() const { return OUTPUT_BLANK_ALPHA; } + + std::string output_fragment_shader(); + + // Uploads the texture if it has changed since last time. + void set_gl_state(GLuint glsl_program_num, const std::string& prefix, unsigned *sampler_num); + + unsigned get_width() const { return width; } + unsigned get_height() const { return height; } + Colorspace get_color_space() const { return image_format.color_space; } + GammaCurve get_gamma_curve() const { return image_format.gamma_curve; } + virtual bool can_supply_mipmaps() const { return false; } + + // Tells the input where to fetch the actual pixel data. Note that if you change + // this data, you must either call set_pixel_data() again (using the same pointer + // is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded + // on subsequent frames. + // + // The data can either be a regular pointer (if pbo==0), or a byte offset + // into a PBO. The latter will allow you to start uploading the texture data + // asynchronously to the GPU, if you have any CPU-intensive work between the + // call to set_pixel_data() and the actual rendering. Also, since we upload + // the data twice, using a PBO can save texture upload bandwidth. In either case, + // the pointer (and PBO, if set) has to be valid at the time of the render call. + void set_pixel_data(const unsigned char *pixel_data, GLuint pbo = 0) + { + this->pixel_data = pixel_data; + this->pbo = pbo; + invalidate_pixel_data(); + } + + void invalidate_pixel_data(); + + void set_pitch(unsigned pitch) { + assert(pitch % ycbcr_format.chroma_subsampling_x == 0); + pitches[CHANNEL_LUMA] = pitch; + pitches[CHANNEL_CHROMA] = pitch / ycbcr_format.chroma_subsampling_x; + invalidate_pixel_data(); + } + + virtual void inform_added(EffectChain *chain) + { + resource_pool = chain->get_resource_pool(); + } + + bool set_int(const std::string& key, int value); + +private: + ImageFormat image_format; + YCbCrFormat ycbcr_format; + GLuint pbo; + + // Luma texture is 0, chroma texture is 1. + enum Channel { + CHANNEL_LUMA, + CHANNEL_CHROMA + }; + GLuint texture_num[2]; + GLuint widths[2]; + unsigned pitches[2]; + + unsigned width, height; + const unsigned char *pixel_data; + ResourcePool *resource_pool; +}; + +} // namespace movit + +#endif // !defined(_MOVIT_YCBCR_422INTERLEAVED_INPUT_H) diff --git a/ycbcr_422interleaved_input_test.cpp b/ycbcr_422interleaved_input_test.cpp new file mode 100644 index 0000000..d47bb90 --- /dev/null +++ b/ycbcr_422interleaved_input_test.cpp @@ -0,0 +1,180 @@ +// Unit tests for YCbCr422InterleavedInput. + +#include +#include + +#include "effect_chain.h" +#include "gtest/gtest.h" +#include "test_util.h" +#include "util.h" +#include "ycbcr_422interleaved_input.h" + +namespace movit { + +// Adapted from the Simple444 test from YCbCrInputTest. +TEST(YCbCr422InterleavedInputTest, Simple422) { + const int width = 2; + const int height = 5; + + // Pure-color test inputs, calculated with the formulas in Rec. 601 + // section 2.5.4. + unsigned char uyvy[width * height * 2] = { + /*U=*/128, /*Y=*/ 16, /*V=*/128, /*Y=*/ 16, + /*U=*/128, /*Y=*/235, /*V=*/128, /*Y=*/235, + /*U=*/ 90, /*Y=*/ 81, /*V=*/240, /*Y=*/ 81, + /*U=*/ 54, /*Y=*/145, /*V=*/ 34, /*Y=*/145, + /*U=*/240, /*Y=*/ 41, /*V=*/110, /*Y=*/ 41, + }; + + float expected_data[4 * width * height] = { + 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, + }; + float out_data[4 * width * height]; + + EffectChainTester tester(NULL, width, height); + + ImageFormat format; + format.color_space = COLORSPACE_sRGB; + format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.luma_coefficients = YCBCR_REC_601; + ycbcr_format.full_range = false; + ycbcr_format.chroma_subsampling_x = 2; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.0f; // Doesn't really matter here, since Y is constant. + ycbcr_format.cb_y_position = 0.5f; + ycbcr_format.cr_x_position = 0.0f; + ycbcr_format.cr_y_position = 0.5f; + + YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height); + input->set_pixel_data(uyvy); + tester.get_chain()->add_input(input); + + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB); + + // Y'CbCr isn't 100% accurate (the input values are rounded), + // so we need some leeway. + expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002); +} + +// Adapted from the YCbCrInput test of the same name. +TEST(YCbCr422InterleavedInputTest, DifferentCbAndCrPositioning) { + const int width = 4; + const int height = 4; + + unsigned char uyvy[width * height * 2] = { + /*U=*/ 64, /*Y=*/126, /*V=*/ 48, /*Y=*/126, /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126, + /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126, /*U=*/192, /*Y=*/126, /*V=*/208, /*Y=*/126, + /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126, /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126, + /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126, /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126, + }; + + // Chroma samples in this case are always co-sited with a luma sample; + // their associated color values and position are marked off in comments. + float expected_data_blue[width * height] = { + 0.000 /* 0.0 */, 0.250, 0.500 /* 0.5 */, 0.500, + 0.500 /* 0.5 */, 0.750, 1.000 /* 1.0 */, 1.000, + 0.500 /* 0.5 */, 0.500, 0.500 /* 0.5 */, 0.500, + 0.500 /* 0.5 */, 0.500, 0.500 /* 0.5 */, 0.500, + }; + float expected_data_red[width * height] = { + 0.000, 0.000 /* 0.0 */, 0.250, 0.500 /* 0.5 */, + 0.500, 0.500 /* 0.5 */, 0.750, 1.000 /* 1.0 */, + 0.500, 0.500 /* 0.5 */, 0.500, 0.500 /* 0.5 */, + 0.500, 0.500 /* 0.5 */, 0.500, 0.500 /* 0.5 */, + }; + float out_data[width * height]; + + EffectChainTester tester(NULL, width, height); + + ImageFormat format; + format.color_space = COLORSPACE_sRGB; + format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.luma_coefficients = YCBCR_REC_601; + ycbcr_format.full_range = false; + ycbcr_format.chroma_subsampling_x = 2; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.0f; + ycbcr_format.cb_y_position = 0.5f; + ycbcr_format.cr_x_position = 1.0f; + ycbcr_format.cr_y_position = 0.5f; + + YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height); + input->set_pixel_data(uyvy); + tester.get_chain()->add_input(input); + + // Y'CbCr isn't 100% accurate (the input values are rounded), + // so we need some leeway. + tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_sRGB); + expect_equal(expected_data_red, out_data, width, height, 0.02, 0.002); + + tester.run(out_data, GL_BLUE, COLORSPACE_sRGB, GAMMA_sRGB); + expect_equal(expected_data_blue, out_data, width, height, 0.01, 0.001); +} + +TEST(YCbCr422InterleavedInputTest, PBO) { + const int width = 2; + const int height = 5; + + // Pure-color test inputs, calculated with the formulas in Rec. 601 + // section 2.5.4. + unsigned char uyvy[width * height * 2] = { + /*U=*/128, /*Y=*/ 16, /*V=*/128, /*Y=*/ 16, + /*U=*/128, /*Y=*/235, /*V=*/128, /*Y=*/235, + /*U=*/ 90, /*Y=*/ 81, /*V=*/240, /*Y=*/ 81, + /*U=*/ 54, /*Y=*/145, /*V=*/ 34, /*Y=*/145, + /*U=*/240, /*Y=*/ 41, /*V=*/110, /*Y=*/ 41, + }; + + float expected_data[4 * width * height] = { + 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, + 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, + }; + float out_data[4 * width * height]; + + GLuint pbo; + glGenBuffers(1, &pbo); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); + glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * 2, uyvy, GL_STREAM_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); + + EffectChainTester tester(NULL, width, height); + + ImageFormat format; + format.color_space = COLORSPACE_sRGB; + format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.luma_coefficients = YCBCR_REC_601; + ycbcr_format.full_range = false; + ycbcr_format.chroma_subsampling_x = 2; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.0f; // Doesn't really matter here, since Y is constant. + ycbcr_format.cb_y_position = 0.5f; + ycbcr_format.cr_x_position = 0.0f; + ycbcr_format.cr_y_position = 0.5f; + + YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height); + input->set_pixel_data((unsigned char *)BUFFER_OFFSET(0), pbo); + tester.get_chain()->add_input(input); + + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB); + + // Y'CbCr isn't 100% accurate (the input values are rounded), + // so we need some leeway. + expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002); + + glDeleteBuffers(1, &pbo); +} + +} // namespace movit diff --git a/ycbcr_input.cpp b/ycbcr_input.cpp index ee341f0..091880f 100644 --- a/ycbcr_input.cpp +++ b/ycbcr_input.cpp @@ -8,6 +8,7 @@ #include "effect_util.h" #include "resource_pool.h" #include "util.h" +#include "ycbcr.h" #include "ycbcr_input.h" using namespace Eigen; @@ -15,126 +16,6 @@ using namespace std; namespace movit { -namespace { - -// OpenGL has texel center in (0.5, 0.5), but different formats have -// chroma in various other places. If luma samples are X, the chroma -// sample is *, and subsampling is 3x3, the situation with chroma -// center in (0.5, 0.5) looks approximately like this: -// -// X X -// * -// X X -// -// If, on the other hand, chroma center is in (0.0, 0.5) (common -// for e.g. MPEG-4), the figure changes to: -// -// X X -// * -// X X -// -// In other words, (0.0, 0.0) means that the chroma sample is exactly -// co-sited on top of the top-left luma sample. Note, however, that -// this is _not_ 0.5 texels to the left, since the OpenGL's texel center -// is in (0.5, 0.5); it is in (0.25, 0.25). In a sense, the four luma samples -// define a square where chroma position (0.0, 0.0) is in texel position -// (0.25, 0.25) and chroma position (1.0, 1.0) is in texel position (0.75, 0.75) -// (the outer border shows the borders of the texel itself, ie. from -// (0, 0) to (1, 1)): -// -// --------- -// | | -// | X---X | -// | | * | | -// | X---X | -// | | -// --------- -// -// Also note that if we have no subsampling, the square will have zero -// area and the chroma position does not matter at all. -float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution) -{ - float local_chroma_pos = (0.5 + pos * (subsampling_factor - 1)) / subsampling_factor; - return (0.5 - local_chroma_pos) / resolution; -} - -// Given , compute the values needed to turn Y'CbCr into R'G'B'; -// first subtract the returned offset, then left-multiply the returned matrix -// (the scaling is already folded into it). -void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb) -{ - double coeff[3], scale[3]; - - switch (ycbcr_format.luma_coefficients) { - case YCBCR_REC_601: - // Rec. 601, page 2. - coeff[0] = 0.299; - coeff[1] = 0.587; - coeff[2] = 0.114; - break; - - case YCBCR_REC_709: - // Rec. 709, page 19. - coeff[0] = 0.2126; - coeff[1] = 0.7152; - coeff[2] = 0.0722; - break; - - case YCBCR_REC_2020: - // Rec. 2020, page 4. - coeff[0] = 0.2627; - coeff[1] = 0.6780; - coeff[2] = 0.0593; - break; - - default: - assert(false); - } - - if (ycbcr_format.full_range) { - offset[0] = 0.0 / 255.0; - offset[1] = 128.0 / 255.0; - offset[2] = 128.0 / 255.0; - - scale[0] = 1.0; - scale[1] = 1.0; - scale[2] = 1.0; - } else { - // Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 4. - offset[0] = 16.0 / 255.0; - offset[1] = 128.0 / 255.0; - offset[2] = 128.0 / 255.0; - - scale[0] = 255.0 / 219.0; - scale[1] = 255.0 / 224.0; - scale[2] = 255.0 / 224.0; - } - - // Matrix to convert RGB to YCbCr. See e.g. Rec. 601. - Matrix3d rgb_to_ycbcr; - rgb_to_ycbcr(0,0) = coeff[0]; - rgb_to_ycbcr(0,1) = coeff[1]; - rgb_to_ycbcr(0,2) = coeff[2]; - - float cb_fac = (224.0 / 219.0) / (coeff[0] + coeff[1] + 1.0f - coeff[2]); - rgb_to_ycbcr(1,0) = -coeff[0] * cb_fac; - rgb_to_ycbcr(1,1) = -coeff[1] * cb_fac; - rgb_to_ycbcr(1,2) = (1.0f - coeff[2]) * cb_fac; - - float cr_fac = (224.0 / 219.0) / (1.0f - coeff[0] + coeff[1] + coeff[2]); - rgb_to_ycbcr(2,0) = (1.0f - coeff[0]) * cr_fac; - rgb_to_ycbcr(2,1) = -coeff[1] * cr_fac; - rgb_to_ycbcr(2,2) = -coeff[2] * cr_fac; - - // Inverting the matrix gives us what we need to go from YCbCr back to RGB. - *ycbcr_to_rgb = rgb_to_ycbcr.inverse(); - - // Fold in the scaling. - *ycbcr_to_rgb *= Map(scale).asDiagonal(); -} - -} // namespace - YCbCrInput::YCbCrInput(const ImageFormat &image_format, const YCbCrFormat &ycbcr_format, unsigned width, unsigned height) diff --git a/ycbcr_input.h b/ycbcr_input.h index 4c7b3e9..97ad526 100644 --- a/ycbcr_input.h +++ b/ycbcr_input.h @@ -13,30 +13,12 @@ #include "effect_chain.h" #include "image_format.h" #include "input.h" +#include "ycbcr.h" namespace movit { class ResourcePool; -struct YCbCrFormat { - // Which formula for Y' to use. - YCbCrLumaCoefficients luma_coefficients; - - // If true, assume Y'CbCr coefficients are full-range, ie. go from 0 to 255 - // instead of the limited 220/225 steps in classic MPEG. For instance, - // JPEG uses the Rec. 601 luma coefficients, but full range. - bool full_range; - - // Sampling factors for chroma components. For no subsampling (4:4:4), - // set both to 1. - unsigned chroma_subsampling_x, chroma_subsampling_y; - - // Positioning of the chroma samples. MPEG-1 and JPEG is (0.5, 0.5); - // MPEG-2 and newer typically are (0.0, 0.5). - float cb_x_position, cb_y_position; - float cr_x_position, cr_y_position; -}; - class YCbCrInput : public Input { public: YCbCrInput(const ImageFormat &image_format, -- 2.39.2