]> git.sesse.net Git - movit/commitdiff
Add an effect for 4:2:2 interleaved YCbCr input (UYVY).
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Wed, 29 Jul 2015 23:38:38 +0000 (01:38 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Wed, 29 Jul 2015 23:38:38 +0000 (01:38 +0200)
This is primarily motivated by the fact that DeckLink uses this format
natively.

13 files changed:
.gitignore
Makefile.in
resource_pool.cpp
util.cpp
util.h
ycbcr.cpp [new file with mode: 0644]
ycbcr.h [new file with mode: 0644]
ycbcr_422interleaved_input.cpp [new file with mode: 0644]
ycbcr_422interleaved_input.frag [new file with mode: 0644]
ycbcr_422interleaved_input.h [new file with mode: 0644]
ycbcr_422interleaved_input_test.cpp [new file with mode: 0644]
ycbcr_input.cpp
ycbcr_input.h

index 547ccdb86de3885dba6e2432762dcd893b605b8d..b73db80471df51846c9dd2ae2d45f47ac1dca15b 100644 (file)
@@ -32,6 +32,7 @@ glow_effect_test
 padding_effect_test
 flat_input_test
 ycbcr_input_test
 padding_effect_test
 flat_input_test
 ycbcr_input_test
+ycbcr_422interleaved_input_test
 complex_modulate_effect_test
 fft_pass_effect_test
 fp16_test
 complex_modulate_effect_test
 fft_pass_effect_test
 fp16_test
index 5cb7fa9b127b53fbc84af13efd0ec5c3bb1a604d..fe46ec30a424c2e8d6e68a0a0d35f508da627ad4 100644 (file)
@@ -47,6 +47,7 @@ DEMO_OBJS=demo.o
 # Inputs.
 TESTED_INPUTS = flat_input
 TESTED_INPUTS += ycbcr_input
 # Inputs.
 TESTED_INPUTS = flat_input
 TESTED_INPUTS += ycbcr_input
+TESTED_INPUTS += ycbcr_422interleaved_input
 
 INPUTS = $(TESTED_INPUTS) $(UNTESTED_INPUTS)
 
 
 INPUTS = $(TESTED_INPUTS) $(UNTESTED_INPUTS)
 
@@ -87,7 +88,7 @@ EFFECTS = $(TESTED_EFFECTS) $(UNTESTED_EFFECTS)
 # Unit tests.
 TESTS=effect_chain_test fp16_test $(TESTED_INPUTS:=_test) $(TESTED_EFFECTS:=_test)
 
 # Unit tests.
 TESTS=effect_chain_test fp16_test $(TESTED_INPUTS:=_test) $(TESTED_EFFECTS:=_test)
 
-LIB_OBJS=effect_util.o util.o widgets.o effect.o effect_chain.o init.o resource_pool.o fp16.o $(INPUTS:=.o) $(EFFECTS:=.o)
+LIB_OBJS=effect_util.o util.o widgets.o effect.o effect_chain.o init.o resource_pool.o fp16.o ycbcr.o $(INPUTS:=.o) $(EFFECTS:=.o)
 
 # Default target:
 all: libmovit.la $(TESTS)
 
 # Default target:
 all: libmovit.la $(TESTS)
index eba79237678611bf474c065f81111e49531cfb94..7f813765c53ac1dd47c21d69dfd9bc552eb1b89c 100644 (file)
@@ -461,6 +461,9 @@ size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format)
        case GL_R16F:
                bytes_per_pixel = 2;
                break;
        case GL_R16F:
                bytes_per_pixel = 2;
                break;
+       case GL_RG8:
+               bytes_per_pixel = 2;
+               break;
        case GL_R8:
                bytes_per_pixel = 1;
                break;
        case GL_R8:
                bytes_per_pixel = 1;
                break;
index 3ebf162d31a0558ddfb54d3a8520706035ee45e6..a6175b4dbaf81bcad6fe7d9b6b585a4b2a3b76b6 100644 (file)
--- a/util.cpp
+++ b/util.cpp
@@ -183,6 +183,16 @@ string output_glsl_mat3(const string &name, const Eigen::Matrix3d &m)
        return ss.str();
 }
 
        return ss.str();
 }
 
+string output_glsl_float(const string &name, float x)
+{
+       // Use stringstream to be independent of the current locale in a thread-safe manner.
+       stringstream ss;
+       ss.imbue(locale("C"));
+       ss.precision(8);
+       ss << "const float " << name << " = " << x << ";\n";
+       return ss.str();
+}
+
 string output_glsl_vec2(const string &name, float x, float y)
 {
        // Use stringstream to be independent of the current locale in a thread-safe manner.
 string output_glsl_vec2(const string &name, float x, float y)
 {
        // Use stringstream to be independent of the current locale in a thread-safe manner.
diff --git a/util.h b/util.h
index f57af77691befa0afa80bb943f7d11a6e54aeadd..e102f2115612ede741798f86e58fec3a718622a4 100644 (file)
--- a/util.h
+++ b/util.h
@@ -38,7 +38,8 @@ void print_3x3_matrix(const Eigen::Matrix3d &m);
 // Output a GLSL 3x3 matrix declaration.
 std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m);
 
 // Output a GLSL 3x3 matrix declaration.
 std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m);
 
-// Output GLSL 2-length and 3-length vector declarations.
+// Output GLSL scalar, 2-length and 3-length vector declarations.
+std::string output_glsl_float(const std::string &name, float x);
 std::string output_glsl_vec2(const std::string &name, float x, float y);
 std::string output_glsl_vec3(const std::string &name, float x, float y, float z);
 
 std::string output_glsl_vec2(const std::string &name, float x, float y);
 std::string output_glsl_vec3(const std::string &name, float x, float y, float z);
 
diff --git a/ycbcr.cpp b/ycbcr.cpp
new file mode 100644 (file)
index 0000000..f0124ea
--- /dev/null
+++ b/ycbcr.cpp
@@ -0,0 +1,126 @@
+#include <Eigen/Core>
+#include <Eigen/LU>
+
+#include "ycbcr.h"
+
+using namespace Eigen;
+
+namespace movit {
+
+// OpenGL has texel center in (0.5, 0.5), but different formats have
+// chroma in various other places. If luma samples are X, the chroma
+// sample is *, and subsampling is 3x3, the situation with chroma
+// center in (0.5, 0.5) looks approximately like this:
+//
+//   X   X
+//     *   
+//   X   X
+//
+// If, on the other hand, chroma center is in (0.0, 0.5) (common
+// for e.g. MPEG-4), the figure changes to:
+//
+//   X   X
+//   *      
+//   X   X
+//
+// In other words, (0.0, 0.0) means that the chroma sample is exactly
+// co-sited on top of the top-left luma sample. Note, however, that
+// this is _not_ 0.5 texels to the left, since the OpenGL's texel center
+// is in (0.5, 0.5); it is in (0.25, 0.25). In a sense, the four luma samples
+// define a square where chroma position (0.0, 0.0) is in texel position
+// (0.25, 0.25) and chroma position (1.0, 1.0) is in texel position (0.75, 0.75)
+// (the outer border shows the borders of the texel itself, ie. from
+// (0, 0) to (1, 1)):
+//
+//  ---------
+// |         |
+// |  X---X  |
+// |  | * |  |
+// |  X---X  |
+// |         |
+//  ---------
+//
+// Also note that if we have no subsampling, the square will have zero
+// area and the chroma position does not matter at all.
+float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution)
+{
+       float local_chroma_pos = (0.5 + pos * (subsampling_factor - 1)) / subsampling_factor;
+       return (0.5 - local_chroma_pos) / resolution;
+}
+
+// Given <ycbcr_format>, compute the values needed to turn Y'CbCr into R'G'B';
+// first subtract the returned offset, then left-multiply the returned matrix
+// (the scaling is already folded into it).
+void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb)
+{
+       double coeff[3], scale[3];
+
+       switch (ycbcr_format.luma_coefficients) {
+       case YCBCR_REC_601:
+               // Rec. 601, page 2.
+               coeff[0] = 0.299;
+               coeff[1] = 0.587;
+               coeff[2] = 0.114;
+               break;
+
+       case YCBCR_REC_709:
+               // Rec. 709, page 19.
+               coeff[0] = 0.2126;
+               coeff[1] = 0.7152;
+               coeff[2] = 0.0722;
+               break;
+
+       case YCBCR_REC_2020:
+               // Rec. 2020, page 4.
+               coeff[0] = 0.2627;
+               coeff[1] = 0.6780;
+               coeff[2] = 0.0593;
+               break;
+
+       default:
+               assert(false);
+       }
+
+       if (ycbcr_format.full_range) {
+               offset[0] = 0.0 / 255.0;
+               offset[1] = 128.0 / 255.0;
+               offset[2] = 128.0 / 255.0;
+
+               scale[0] = 1.0;
+               scale[1] = 1.0;
+               scale[2] = 1.0;
+       } else {
+               // Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 4.
+               offset[0] = 16.0 / 255.0;
+               offset[1] = 128.0 / 255.0;
+               offset[2] = 128.0 / 255.0;
+
+               scale[0] = 255.0 / 219.0;
+               scale[1] = 255.0 / 224.0;
+               scale[2] = 255.0 / 224.0;
+       }
+
+       // Matrix to convert RGB to YCbCr. See e.g. Rec. 601.
+       Matrix3d rgb_to_ycbcr;
+       rgb_to_ycbcr(0,0) = coeff[0];
+       rgb_to_ycbcr(0,1) = coeff[1];
+       rgb_to_ycbcr(0,2) = coeff[2];
+
+       float cb_fac = (224.0 / 219.0) / (coeff[0] + coeff[1] + 1.0f - coeff[2]);
+       rgb_to_ycbcr(1,0) = -coeff[0] * cb_fac;
+       rgb_to_ycbcr(1,1) = -coeff[1] * cb_fac;
+       rgb_to_ycbcr(1,2) = (1.0f - coeff[2]) * cb_fac;
+
+       float cr_fac = (224.0 / 219.0) / (1.0f - coeff[0] + coeff[1] + coeff[2]);
+       rgb_to_ycbcr(2,0) = (1.0f - coeff[0]) * cr_fac;
+       rgb_to_ycbcr(2,1) = -coeff[1] * cr_fac;
+       rgb_to_ycbcr(2,2) = -coeff[2] * cr_fac;
+
+       // Inverting the matrix gives us what we need to go from YCbCr back to RGB.
+       *ycbcr_to_rgb = rgb_to_ycbcr.inverse();
+
+       // Fold in the scaling.
+       *ycbcr_to_rgb *= Map<const Vector3d>(scale).asDiagonal();
+}
+
+}  // namespace movit
diff --git a/ycbcr.h b/ycbcr.h
new file mode 100644 (file)
index 0000000..7e5891f
--- /dev/null
+++ b/ycbcr.h
@@ -0,0 +1,44 @@
+#ifndef _MOVIT_YCBCR_H
+#define _MOVIT_YCBCR_H 1
+
+// Shared utility functions between YCbCrInput and YCbCr422InterleavedInput.
+
+#include "image_format.h"
+
+#include <Eigen/Core>
+
+namespace movit {
+
+struct YCbCrFormat {
+       // Which formula for Y' to use.
+       YCbCrLumaCoefficients luma_coefficients;
+
+       // If true, assume Y'CbCr coefficients are full-range, ie. go from 0 to 255
+       // instead of the limited 220/225 steps in classic MPEG. For instance,
+       // JPEG uses the Rec. 601 luma coefficients, but full range.
+       bool full_range;
+
+       // Sampling factors for chroma components. For no subsampling (4:4:4),
+       // set both to 1.
+       unsigned chroma_subsampling_x, chroma_subsampling_y;
+
+       // Positioning of the chroma samples. MPEG-1 and JPEG is (0.5, 0.5);
+       // MPEG-2 and newer typically are (0.0, 0.5).
+       float cb_x_position, cb_y_position;
+       float cr_x_position, cr_y_position;
+};
+
+// Convert texel sampling offset for the given chroma channel, given that
+// chroma position is <pos> (0..1), we are downsampling this chroma channel
+// by a factor of <subsampling_factor> and the texture we are sampling from
+// is <resolution> pixels wide/high.
+float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution);
+
+// Given <ycbcr_format>, compute the values needed to turn Y'CbCr into R'G'B';
+// first subtract the returned offset, then left-multiply the returned matrix
+// (the scaling is already folded into it).
+void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float *offset, Eigen::Matrix3d *ycbcr_to_rgb);
+
+}  // namespace movit
+
+#endif // !defined(_MOVIT_YCBCR_INPUT_H)
diff --git a/ycbcr_422interleaved_input.cpp b/ycbcr_422interleaved_input.cpp
new file mode 100644 (file)
index 0000000..b634289
--- /dev/null
@@ -0,0 +1,149 @@
+#include <epoxy/gl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "effect_util.h"
+#include "resource_pool.h"
+#include "util.h"
+#include "ycbcr.h"
+#include "ycbcr_422interleaved_input.h"
+
+using namespace Eigen;
+using namespace std;
+
+namespace movit {
+
+YCbCr422InterleavedInput::YCbCr422InterleavedInput(const ImageFormat &image_format,
+                                                   const YCbCrFormat &ycbcr_format,
+                                                  unsigned width, unsigned height)
+       : image_format(image_format),
+         ycbcr_format(ycbcr_format),
+         width(width),
+         height(height),
+         resource_pool(NULL)
+{
+       pbo = 0;
+       texture_num[0] = texture_num[1] = 0;
+
+       assert(ycbcr_format.chroma_subsampling_x == 2);
+       assert(ycbcr_format.chroma_subsampling_y == 1);
+       assert(width % ycbcr_format.chroma_subsampling_x == 0);
+
+       widths[CHANNEL_LUMA] = width;
+       widths[CHANNEL_CHROMA] = width / ycbcr_format.chroma_subsampling_x;
+       pitches[CHANNEL_LUMA] = width;
+       pitches[CHANNEL_CHROMA] = width / ycbcr_format.chroma_subsampling_x;
+
+       pixel_data = NULL;
+}
+
+YCbCr422InterleavedInput::~YCbCr422InterleavedInput()
+{
+       for (unsigned channel = 0; channel < 2; ++channel) {
+               if (texture_num[channel] != 0) {
+                       resource_pool->release_2d_texture(texture_num[channel]);
+               }
+       }
+}
+
+void YCbCr422InterleavedInput::set_gl_state(GLuint glsl_program_num, const string& prefix, unsigned *sampler_num)
+{
+       for (unsigned channel = 0; channel < 2; ++channel) {
+               glActiveTexture(GL_TEXTURE0 + *sampler_num + channel);
+               check_error();
+
+               if (texture_num[channel] == 0) {
+                       // (Re-)upload the texture.
+                       GLuint format, internal_format;
+                       if (channel == CHANNEL_LUMA) {
+                               format = GL_RG;
+                               internal_format = GL_RG8;
+                       } else {        
+                               assert(channel == CHANNEL_CHROMA);
+                               format = GL_RGBA;
+                               internal_format = GL_RGBA8;
+                       }
+
+                       texture_num[channel] = resource_pool->create_2d_texture(internal_format, widths[channel], height);
+                       glBindTexture(GL_TEXTURE_2D, texture_num[channel]);
+                       check_error();
+                       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+                       check_error();
+                       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
+                       check_error();
+                       glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+                       check_error();
+                       glPixelStorei(GL_UNPACK_ROW_LENGTH, pitches[channel]);
+                       check_error();
+                       glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], height, format, GL_UNSIGNED_BYTE, pixel_data);
+                       check_error();
+                       glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+                       check_error();
+                       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+                       check_error();
+                       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+                       check_error();
+               } else {
+                       glBindTexture(GL_TEXTURE_2D, texture_num[channel]);
+                       check_error();
+               }
+       }
+
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+       check_error();
+
+       // Bind samplers.
+       set_uniform_int(glsl_program_num, prefix, "tex_y", *sampler_num + 0);
+       set_uniform_int(glsl_program_num, prefix, "tex_cbcr", *sampler_num + 1);
+
+       *sampler_num += 2;
+}
+
+string YCbCr422InterleavedInput::output_fragment_shader()
+{
+       float offset[3];
+       Matrix3d ycbcr_to_rgb;
+       compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb);
+
+       string frag_shader;
+
+       frag_shader = output_glsl_mat3("PREFIX(inv_ycbcr_matrix)", ycbcr_to_rgb);
+       frag_shader += output_glsl_vec3("PREFIX(offset)", offset[0], offset[1], offset[2]);
+
+       float cb_offset_x = compute_chroma_offset(
+               ycbcr_format.cb_x_position, ycbcr_format.chroma_subsampling_x, widths[CHANNEL_CHROMA]);
+       float cr_offset_x = compute_chroma_offset(
+               ycbcr_format.cr_x_position, ycbcr_format.chroma_subsampling_x, widths[CHANNEL_CHROMA]);
+       frag_shader += output_glsl_float("PREFIX(cb_offset_x)", cb_offset_x);
+       frag_shader += output_glsl_float("PREFIX(cr_offset_x)", cr_offset_x);
+
+       char buf[256];
+       sprintf(buf, "#define CB_CR_OFFSETS_EQUAL %d\n",
+               (fabs(ycbcr_format.cb_x_position - ycbcr_format.cr_x_position) < 1e-6));
+       frag_shader += buf;
+
+       frag_shader += read_file("ycbcr_422interleaved_input.frag");
+       return frag_shader;
+}
+
+void YCbCr422InterleavedInput::invalidate_pixel_data()
+{
+       for (unsigned channel = 0; channel < 2; ++channel) {
+               if (texture_num[channel] != 0) {
+                       resource_pool->release_2d_texture(texture_num[channel]);
+                       texture_num[channel] = 0;
+               }
+       }
+}
+
+bool YCbCr422InterleavedInput::set_int(const std::string& key, int value)
+{
+       if (key == "needs_mipmaps") {
+               // We currently do not support this.
+               return (value == 0);
+       }
+       return Effect::set_int(key, value);
+}
+
+}  // namespace movit
diff --git a/ycbcr_422interleaved_input.frag b/ycbcr_422interleaved_input.frag
new file mode 100644 (file)
index 0000000..66762a8
--- /dev/null
@@ -0,0 +1,32 @@
+uniform sampler2D PREFIX(tex_y);
+uniform sampler2D PREFIX(tex_cbcr);
+
+vec4 FUNCNAME(vec2 tc) {
+       // OpenGL's origin is bottom-left, but most graphics software assumes
+       // a top-left origin. Thus, for inputs that come from the user,
+       // we flip the y coordinate.
+       tc.y = 1.0 - tc.y;
+
+       vec3 ycbcr;
+       ycbcr.x = tex2D(PREFIX(tex_y), tc).y;
+#if CB_CR_OFFSETS_EQUAL
+       vec2 tc_cbcr = tc;
+       tc_cbcr.x += PREFIX(cb_offset_x);
+       ycbcr.yz = tex2D(PREFIX(tex_cbcr), tc_cbcr).xz;
+#else
+       vec2 tc_cb = tc;
+       tc_cb.x += PREFIX(cb_offset_x);
+       ycbcr.y = tex2D(PREFIX(tex_cbcr), tc_cb).x;
+
+       vec2 tc_cr = tc;
+       tc_cr.x += PREFIX(cr_offset_x);
+       ycbcr.z = tex2D(PREFIX(tex_cbcr), tc_cr).z;
+#endif
+
+       ycbcr -= PREFIX(offset);
+
+       vec4 rgba;
+       rgba.rgb = PREFIX(inv_ycbcr_matrix) * ycbcr;
+       rgba.a = 1.0;
+       return rgba;
+}
diff --git a/ycbcr_422interleaved_input.h b/ycbcr_422interleaved_input.h
new file mode 100644 (file)
index 0000000..b346986
--- /dev/null
@@ -0,0 +1,130 @@
+#ifndef _MOVIT_YCBCR_422INTERLEAVED_INPUT_H
+#define _MOVIT_YCBCR_422INTERLEAVED_INPUT_H 1
+
+// YCbCr422InterleavedInput is for handling 4:2:2 interleaved 8-bit Y'CbCr,
+// which you can get from e.g. certain capture cards. (Most other Y'CbCr
+// encodings are planar, which is handled by YCbCrInput.) Currently we only
+// handle the UYVY variant, although YUY2 should be easy to support if needed.
+//
+// Horizontal chroma placement is freely choosable as with YCbCrInput,
+// but BT.601 (which at least DeckLink claims to conform to, under the
+// name CCIR 601) seems to specify chroma positioning to the far left
+// (that is 0.0); BT.601 Annex 1 (page 7) says “C R and C B samples co-sited
+// with odd (1st, 3rd, 5th, etc.) Y samples in each line”, and I assume they do
+// not start counting from 0 when they use the “1st” moniker.
+//
+// Interpolation is bilinear as in YCbCrInput (done by the GPU's normal
+// scaling, except for the Y channel which of course needs some fiddling),
+// and is done in non-linear light (since that's what everything specifies,
+// except Rec. 2020 lets you choose between the two). A higher-quality
+// choice would be to use a single pass of ResampleEffect to scale the
+// chroma, but for now we are consistent between the two.
+//
+// There is a disparity between the interleaving and the way OpenGL typically
+// expects to sample. In lieu of accessible hardware support (a lot of hardware
+// supports native interleaved 4:2:2 sampling, but OpenGL drivers seem to
+// rarely support it), we simply upload the same data twice; once as a
+// full-width RG texture (from which we sample luma) and once as a half-width
+// RGBA texture (from which we sample chroma). We throw away half of the color
+// channels each time, so bandwidth is wasted, but it makes for a very
+// uncomplicated shader.
+
+#include <epoxy/gl.h>
+#include <string>
+
+#include "effect.h"
+#include "effect_chain.h"
+#include "image_format.h"
+#include "input.h"
+#include "ycbcr.h"
+
+namespace movit {
+
+class ResourcePool;
+
+class YCbCr422InterleavedInput : public Input {
+public:
+       // <ycbcr_format> must be consistent with 4:2:2 sampling; specifically:
+       //
+       //  * chroma_subsampling_x must be 2.
+       //  * chroma_subsampling_y must be 1.
+       //
+       // <width> must obviously be an even number. It is the true width of the image
+       // in pixels, ie., the number of horizontal luma samples.
+       YCbCr422InterleavedInput(const ImageFormat &image_format,
+                                const YCbCrFormat &ycbcr_format,
+                                unsigned width, unsigned height);
+       ~YCbCr422InterleavedInput();
+
+       virtual std::string effect_type_id() const { return "YCbCr422InterleavedInput"; }
+
+       virtual bool can_output_linear_gamma() const { return false; }
+       virtual AlphaHandling alpha_handling() const { return OUTPUT_BLANK_ALPHA; }
+
+       std::string output_fragment_shader();
+
+       // Uploads the texture if it has changed since last time.
+       void set_gl_state(GLuint glsl_program_num, const std::string& prefix, unsigned *sampler_num);
+
+       unsigned get_width() const { return width; }
+       unsigned get_height() const { return height; }
+       Colorspace get_color_space() const { return image_format.color_space; }
+       GammaCurve get_gamma_curve() const { return image_format.gamma_curve; }
+       virtual bool can_supply_mipmaps() const { return false; }
+
+       // Tells the input where to fetch the actual pixel data. Note that if you change
+       // this data, you must either call set_pixel_data() again (using the same pointer
+       // is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded
+       // on subsequent frames.
+       //
+       // The data can either be a regular pointer (if pbo==0), or a byte offset
+       // into a PBO. The latter will allow you to start uploading the texture data
+       // asynchronously to the GPU, if you have any CPU-intensive work between the
+       // call to set_pixel_data() and the actual rendering. Also, since we upload
+       // the data twice, using a PBO can save texture upload bandwidth. In either case,
+       // the pointer (and PBO, if set) has to be valid at the time of the render call.
+       void set_pixel_data(const unsigned char *pixel_data, GLuint pbo = 0)
+       {
+               this->pixel_data = pixel_data;
+               this->pbo = pbo;
+               invalidate_pixel_data();
+       }
+
+       void invalidate_pixel_data();
+
+       void set_pitch(unsigned pitch) {
+               assert(pitch % ycbcr_format.chroma_subsampling_x == 0);
+               pitches[CHANNEL_LUMA] = pitch;
+               pitches[CHANNEL_CHROMA] = pitch / ycbcr_format.chroma_subsampling_x;
+               invalidate_pixel_data();
+       }
+
+       virtual void inform_added(EffectChain *chain)
+       {
+               resource_pool = chain->get_resource_pool();
+       }
+
+       bool set_int(const std::string& key, int value);
+
+private:
+       ImageFormat image_format;
+       YCbCrFormat ycbcr_format;
+       GLuint pbo;
+
+       // Luma texture is 0, chroma texture is 1.
+       enum Channel {
+               CHANNEL_LUMA,
+               CHANNEL_CHROMA
+       };
+       GLuint texture_num[2];
+       GLuint widths[2];
+       unsigned pitches[2];
+
+       unsigned width, height;
+       const unsigned char *pixel_data;
+       ResourcePool *resource_pool;
+};
+
+}  // namespace movit
+
+#endif  // !defined(_MOVIT_YCBCR_422INTERLEAVED_INPUT_H)
diff --git a/ycbcr_422interleaved_input_test.cpp b/ycbcr_422interleaved_input_test.cpp
new file mode 100644 (file)
index 0000000..d47bb90
--- /dev/null
@@ -0,0 +1,180 @@
+// Unit tests for YCbCr422InterleavedInput.
+
+#include <epoxy/gl.h>
+#include <stddef.h>
+
+#include "effect_chain.h"
+#include "gtest/gtest.h"
+#include "test_util.h"
+#include "util.h"
+#include "ycbcr_422interleaved_input.h"
+
+namespace movit {
+
+// Adapted from the Simple444 test from YCbCrInputTest.
+TEST(YCbCr422InterleavedInputTest, Simple422) {
+       const int width = 2;
+       const int height = 5;
+
+       // Pure-color test inputs, calculated with the formulas in Rec. 601
+        // section 2.5.4.
+       unsigned char uyvy[width * height * 2] = {
+               /*U=*/128, /*Y=*/ 16, /*V=*/128, /*Y=*/ 16,
+               /*U=*/128, /*Y=*/235, /*V=*/128, /*Y=*/235,
+               /*U=*/ 90, /*Y=*/ 81, /*V=*/240, /*Y=*/ 81,
+               /*U=*/ 54, /*Y=*/145, /*V=*/ 34, /*Y=*/145,
+               /*U=*/240, /*Y=*/ 41, /*V=*/110, /*Y=*/ 41,
+       };
+
+       float expected_data[4 * width * height] = {
+               0.0, 0.0, 0.0, 1.0,   0.0, 0.0, 0.0, 1.0,
+               1.0, 1.0, 1.0, 1.0,   1.0, 1.0, 1.0, 1.0,
+               1.0, 0.0, 0.0, 1.0,   1.0, 0.0, 0.0, 1.0,
+               0.0, 1.0, 0.0, 1.0,   0.0, 1.0, 0.0, 1.0,
+               0.0, 0.0, 1.0, 1.0,   0.0, 0.0, 1.0, 1.0,
+       };
+       float out_data[4 * width * height];
+
+       EffectChainTester tester(NULL, width, height);
+
+       ImageFormat format;
+       format.color_space = COLORSPACE_sRGB;
+       format.gamma_curve = GAMMA_sRGB;
+
+       YCbCrFormat ycbcr_format;
+       ycbcr_format.luma_coefficients = YCBCR_REC_601;
+       ycbcr_format.full_range = false;
+       ycbcr_format.chroma_subsampling_x = 2;
+       ycbcr_format.chroma_subsampling_y = 1;
+       ycbcr_format.cb_x_position = 0.0f;  // Doesn't really matter here, since Y is constant.
+       ycbcr_format.cb_y_position = 0.5f;
+       ycbcr_format.cr_x_position = 0.0f;
+       ycbcr_format.cr_y_position = 0.5f;
+
+       YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height);
+       input->set_pixel_data(uyvy);
+       tester.get_chain()->add_input(input);
+
+       tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+        // Y'CbCr isn't 100% accurate (the input values are rounded),
+        // so we need some leeway.
+        expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002);
+}
+
+// Adapted from the YCbCrInput test of the same name.
+TEST(YCbCr422InterleavedInputTest, DifferentCbAndCrPositioning) {
+       const int width = 4;
+       const int height = 4;
+
+       unsigned char uyvy[width * height * 2] = {
+               /*U=*/ 64, /*Y=*/126, /*V=*/ 48, /*Y=*/126,  /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,
+               /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,  /*U=*/192, /*Y=*/126, /*V=*/208, /*Y=*/126,
+               /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,  /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,
+               /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,  /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,
+       };
+
+       // Chroma samples in this case are always co-sited with a luma sample;
+       // their associated color values and position are marked off in comments.
+       float expected_data_blue[width * height] = {
+                  0.000 /* 0.0 */, 0.250,           0.500 /* 0.5 */, 0.500, 
+                  0.500 /* 0.5 */, 0.750,           1.000 /* 1.0 */, 1.000, 
+                  0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 0.500, 
+                  0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 0.500, 
+       };
+       float expected_data_red[width * height] = {
+                  0.000,           0.000 /* 0.0 */, 0.250,           0.500 /* 0.5 */, 
+                  0.500,           0.500 /* 0.5 */, 0.750,           1.000 /* 1.0 */, 
+                  0.500,           0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 
+                  0.500,           0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 
+       };
+       float out_data[width * height];
+
+       EffectChainTester tester(NULL, width, height);
+
+       ImageFormat format;
+       format.color_space = COLORSPACE_sRGB;
+       format.gamma_curve = GAMMA_sRGB;
+
+       YCbCrFormat ycbcr_format;
+       ycbcr_format.luma_coefficients = YCBCR_REC_601;
+       ycbcr_format.full_range = false;
+       ycbcr_format.chroma_subsampling_x = 2;
+       ycbcr_format.chroma_subsampling_y = 1;
+       ycbcr_format.cb_x_position = 0.0f;
+       ycbcr_format.cb_y_position = 0.5f;
+       ycbcr_format.cr_x_position = 1.0f;
+       ycbcr_format.cr_y_position = 0.5f;
+
+       YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height);
+       input->set_pixel_data(uyvy);
+       tester.get_chain()->add_input(input);
+
+       // Y'CbCr isn't 100% accurate (the input values are rounded),
+       // so we need some leeway.
+       tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_sRGB);
+       expect_equal(expected_data_red, out_data, width, height, 0.02, 0.002);
+
+       tester.run(out_data, GL_BLUE, COLORSPACE_sRGB, GAMMA_sRGB);
+       expect_equal(expected_data_blue, out_data, width, height, 0.01, 0.001);
+}
+
+TEST(YCbCr422InterleavedInputTest, PBO) {
+       const int width = 2;
+       const int height = 5;
+
+       // Pure-color test inputs, calculated with the formulas in Rec. 601
+        // section 2.5.4.
+       unsigned char uyvy[width * height * 2] = {
+               /*U=*/128, /*Y=*/ 16, /*V=*/128, /*Y=*/ 16,
+               /*U=*/128, /*Y=*/235, /*V=*/128, /*Y=*/235,
+               /*U=*/ 90, /*Y=*/ 81, /*V=*/240, /*Y=*/ 81,
+               /*U=*/ 54, /*Y=*/145, /*V=*/ 34, /*Y=*/145,
+               /*U=*/240, /*Y=*/ 41, /*V=*/110, /*Y=*/ 41,
+       };
+
+       float expected_data[4 * width * height] = {
+               0.0, 0.0, 0.0, 1.0,   0.0, 0.0, 0.0, 1.0,
+               1.0, 1.0, 1.0, 1.0,   1.0, 1.0, 1.0, 1.0,
+               1.0, 0.0, 0.0, 1.0,   1.0, 0.0, 0.0, 1.0,
+               0.0, 1.0, 0.0, 1.0,   0.0, 1.0, 0.0, 1.0,
+               0.0, 0.0, 1.0, 1.0,   0.0, 0.0, 1.0, 1.0,
+       };
+       float out_data[4 * width * height];
+
+       GLuint pbo;
+       glGenBuffers(1, &pbo);
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
+       glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * 2, uyvy, GL_STREAM_DRAW);
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+
+       EffectChainTester tester(NULL, width, height);
+
+       ImageFormat format;
+       format.color_space = COLORSPACE_sRGB;
+       format.gamma_curve = GAMMA_sRGB;
+
+       YCbCrFormat ycbcr_format;
+       ycbcr_format.luma_coefficients = YCBCR_REC_601;
+       ycbcr_format.full_range = false;
+       ycbcr_format.chroma_subsampling_x = 2;
+       ycbcr_format.chroma_subsampling_y = 1;
+       ycbcr_format.cb_x_position = 0.0f;  // Doesn't really matter here, since Y is constant.
+       ycbcr_format.cb_y_position = 0.5f;
+       ycbcr_format.cr_x_position = 0.0f;
+       ycbcr_format.cr_y_position = 0.5f;
+
+       YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height);
+       input->set_pixel_data((unsigned char *)BUFFER_OFFSET(0), pbo);
+       tester.get_chain()->add_input(input);
+
+       tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+        // Y'CbCr isn't 100% accurate (the input values are rounded),
+        // so we need some leeway.
+        expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002);
+
+       glDeleteBuffers(1, &pbo);
+}
+
+}  // namespace movit
index ee341f0124de5985da6c2f0d5eb7dcb37ea18a64..091880fdade7f0dd2ee778b9bc74366dc668a637 100644 (file)
@@ -8,6 +8,7 @@
 #include "effect_util.h"
 #include "resource_pool.h"
 #include "util.h"
 #include "effect_util.h"
 #include "resource_pool.h"
 #include "util.h"
+#include "ycbcr.h"
 #include "ycbcr_input.h"
 
 using namespace Eigen;
 #include "ycbcr_input.h"
 
 using namespace Eigen;
@@ -15,126 +16,6 @@ using namespace std;
 
 namespace movit {
 
 
 namespace movit {
 
-namespace {
-
-// OpenGL has texel center in (0.5, 0.5), but different formats have
-// chroma in various other places. If luma samples are X, the chroma
-// sample is *, and subsampling is 3x3, the situation with chroma
-// center in (0.5, 0.5) looks approximately like this:
-//
-//   X   X
-//     *   
-//   X   X
-//
-// If, on the other hand, chroma center is in (0.0, 0.5) (common
-// for e.g. MPEG-4), the figure changes to:
-//
-//   X   X
-//   *      
-//   X   X
-//
-// In other words, (0.0, 0.0) means that the chroma sample is exactly
-// co-sited on top of the top-left luma sample. Note, however, that
-// this is _not_ 0.5 texels to the left, since the OpenGL's texel center
-// is in (0.5, 0.5); it is in (0.25, 0.25). In a sense, the four luma samples
-// define a square where chroma position (0.0, 0.0) is in texel position
-// (0.25, 0.25) and chroma position (1.0, 1.0) is in texel position (0.75, 0.75)
-// (the outer border shows the borders of the texel itself, ie. from
-// (0, 0) to (1, 1)):
-//
-//  ---------
-// |         |
-// |  X---X  |
-// |  | * |  |
-// |  X---X  |
-// |         |
-//  ---------
-//
-// Also note that if we have no subsampling, the square will have zero
-// area and the chroma position does not matter at all.
-float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution)
-{
-       float local_chroma_pos = (0.5 + pos * (subsampling_factor - 1)) / subsampling_factor;
-       return (0.5 - local_chroma_pos) / resolution;
-}
-
-// Given <ycbcr_format>, compute the values needed to turn Y'CbCr into R'G'B';
-// first subtract the returned offset, then left-multiply the returned matrix
-// (the scaling is already folded into it).
-void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb)
-{
-       double coeff[3], scale[3];
-
-       switch (ycbcr_format.luma_coefficients) {
-       case YCBCR_REC_601:
-               // Rec. 601, page 2.
-               coeff[0] = 0.299;
-               coeff[1] = 0.587;
-               coeff[2] = 0.114;
-               break;
-
-       case YCBCR_REC_709:
-               // Rec. 709, page 19.
-               coeff[0] = 0.2126;
-               coeff[1] = 0.7152;
-               coeff[2] = 0.0722;
-               break;
-
-       case YCBCR_REC_2020:
-               // Rec. 2020, page 4.
-               coeff[0] = 0.2627;
-               coeff[1] = 0.6780;
-               coeff[2] = 0.0593;
-               break;
-
-       default:
-               assert(false);
-       }
-
-       if (ycbcr_format.full_range) {
-               offset[0] = 0.0 / 255.0;
-               offset[1] = 128.0 / 255.0;
-               offset[2] = 128.0 / 255.0;
-
-               scale[0] = 1.0;
-               scale[1] = 1.0;
-               scale[2] = 1.0;
-       } else {
-               // Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 4.
-               offset[0] = 16.0 / 255.0;
-               offset[1] = 128.0 / 255.0;
-               offset[2] = 128.0 / 255.0;
-
-               scale[0] = 255.0 / 219.0;
-               scale[1] = 255.0 / 224.0;
-               scale[2] = 255.0 / 224.0;
-       }
-
-       // Matrix to convert RGB to YCbCr. See e.g. Rec. 601.
-       Matrix3d rgb_to_ycbcr;
-       rgb_to_ycbcr(0,0) = coeff[0];
-       rgb_to_ycbcr(0,1) = coeff[1];
-       rgb_to_ycbcr(0,2) = coeff[2];
-
-       float cb_fac = (224.0 / 219.0) / (coeff[0] + coeff[1] + 1.0f - coeff[2]);
-       rgb_to_ycbcr(1,0) = -coeff[0] * cb_fac;
-       rgb_to_ycbcr(1,1) = -coeff[1] * cb_fac;
-       rgb_to_ycbcr(1,2) = (1.0f - coeff[2]) * cb_fac;
-
-       float cr_fac = (224.0 / 219.0) / (1.0f - coeff[0] + coeff[1] + coeff[2]);
-       rgb_to_ycbcr(2,0) = (1.0f - coeff[0]) * cr_fac;
-       rgb_to_ycbcr(2,1) = -coeff[1] * cr_fac;
-       rgb_to_ycbcr(2,2) = -coeff[2] * cr_fac;
-
-       // Inverting the matrix gives us what we need to go from YCbCr back to RGB.
-       *ycbcr_to_rgb = rgb_to_ycbcr.inverse();
-
-       // Fold in the scaling.
-       *ycbcr_to_rgb *= Map<const Vector3d>(scale).asDiagonal();
-}
-
-}  // namespace
-
 YCbCrInput::YCbCrInput(const ImageFormat &image_format,
                        const YCbCrFormat &ycbcr_format,
                        unsigned width, unsigned height)
 YCbCrInput::YCbCrInput(const ImageFormat &image_format,
                        const YCbCrFormat &ycbcr_format,
                        unsigned width, unsigned height)
index 4c7b3e9a2b6bd8282976f8a759c0061a16c2fb7b..97ad5266bb3b0c9942f39dd3a2964aceedf848c1 100644 (file)
 #include "effect_chain.h"
 #include "image_format.h"
 #include "input.h"
 #include "effect_chain.h"
 #include "image_format.h"
 #include "input.h"
+#include "ycbcr.h"
 
 namespace movit {
 
 class ResourcePool;
 
 
 namespace movit {
 
 class ResourcePool;
 
-struct YCbCrFormat {
-       // Which formula for Y' to use.
-       YCbCrLumaCoefficients luma_coefficients;
-
-       // If true, assume Y'CbCr coefficients are full-range, ie. go from 0 to 255
-       // instead of the limited 220/225 steps in classic MPEG. For instance,
-       // JPEG uses the Rec. 601 luma coefficients, but full range.
-       bool full_range;
-
-       // Sampling factors for chroma components. For no subsampling (4:4:4),
-       // set both to 1.
-       unsigned chroma_subsampling_x, chroma_subsampling_y;
-
-       // Positioning of the chroma samples. MPEG-1 and JPEG is (0.5, 0.5);
-       // MPEG-2 and newer typically are (0.0, 0.5).
-       float cb_x_position, cb_y_position;
-       float cr_x_position, cr_y_position;
-};
-
 class YCbCrInput : public Input {
 public:
        YCbCrInput(const ImageFormat &image_format,
 class YCbCrInput : public Input {
 public:
        YCbCrInput(const ImageFormat &image_format,