From ba60914d4e5eda7b28af700bf43e9699b7aa720d Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Thu, 30 Jul 2015 01:38:38 +0200
Subject: [PATCH] Add an effect for 4:2:2 interleaved YCbCr input (UYVY).

This is primarily motivated by the fact that DeckLink uses this format
natively.
---
 .gitignore                          |   1 +
 Makefile.in                         |   3 +-
 resource_pool.cpp                   |   3 +
 util.cpp                            |  10 ++
 util.h                              |   3 +-
 ycbcr.cpp                           | 126 +++++++++++++++++++
 ycbcr.h                             |  44 +++++++
 ycbcr_422interleaved_input.cpp      | 149 +++++++++++++++++++++++
 ycbcr_422interleaved_input.frag     |  32 +++++
 ycbcr_422interleaved_input.h        | 130 ++++++++++++++++++++
 ycbcr_422interleaved_input_test.cpp | 180 ++++++++++++++++++++++++++++
 ycbcr_input.cpp                     | 121 +------------------
 ycbcr_input.h                       |  20 +---
 13 files changed, 681 insertions(+), 141 deletions(-)
 create mode 100644 ycbcr.cpp
 create mode 100644 ycbcr.h
 create mode 100644 ycbcr_422interleaved_input.cpp
 create mode 100644 ycbcr_422interleaved_input.frag
 create mode 100644 ycbcr_422interleaved_input.h
 create mode 100644 ycbcr_422interleaved_input_test.cpp

diff --git a/.gitignore b/.gitignore
index 547ccdb..b73db80 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ glow_effect_test
 padding_effect_test
 flat_input_test
 ycbcr_input_test
+ycbcr_422interleaved_input_test
 complex_modulate_effect_test
 fft_pass_effect_test
 fp16_test
diff --git a/Makefile.in b/Makefile.in
index 5cb7fa9..fe46ec3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -47,6 +47,7 @@ DEMO_OBJS=demo.o
 # Inputs.
 TESTED_INPUTS = flat_input
 TESTED_INPUTS += ycbcr_input
+TESTED_INPUTS += ycbcr_422interleaved_input
 
 INPUTS = $(TESTED_INPUTS) $(UNTESTED_INPUTS)
 
@@ -87,7 +88,7 @@ EFFECTS = $(TESTED_EFFECTS) $(UNTESTED_EFFECTS)
 # Unit tests.
 TESTS=effect_chain_test fp16_test $(TESTED_INPUTS:=_test) $(TESTED_EFFECTS:=_test)
 
-LIB_OBJS=effect_util.o util.o widgets.o effect.o effect_chain.o init.o resource_pool.o fp16.o $(INPUTS:=.o) $(EFFECTS:=.o)
+LIB_OBJS=effect_util.o util.o widgets.o effect.o effect_chain.o init.o resource_pool.o fp16.o ycbcr.o $(INPUTS:=.o) $(EFFECTS:=.o)
 
 # Default target:
 all: libmovit.la $(TESTS)
diff --git a/resource_pool.cpp b/resource_pool.cpp
index eba7923..7f81376 100644
--- a/resource_pool.cpp
+++ b/resource_pool.cpp
@@ -461,6 +461,9 @@ size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format)
 	case GL_R16F:
 		bytes_per_pixel = 2;
 		break;
+	case GL_RG8:
+		bytes_per_pixel = 2;
+		break;
 	case GL_R8:
 		bytes_per_pixel = 1;
 		break;
diff --git a/util.cpp b/util.cpp
index 3ebf162..a6175b4 100644
--- a/util.cpp
+++ b/util.cpp
@@ -183,6 +183,16 @@ string output_glsl_mat3(const string &name, const Eigen::Matrix3d &m)
 	return ss.str();
 }
 
+string output_glsl_float(const string &name, float x)
+{
+	// Use stringstream to be independent of the current locale in a thread-safe manner.
+	stringstream ss;
+	ss.imbue(locale("C"));
+	ss.precision(8);
+	ss << "const float " << name << " = " << x << ";\n";
+	return ss.str();
+}
+
 string output_glsl_vec2(const string &name, float x, float y)
 {
 	// Use stringstream to be independent of the current locale in a thread-safe manner.
diff --git a/util.h b/util.h
index f57af77..e102f21 100644
--- a/util.h
+++ b/util.h
@@ -38,7 +38,8 @@ void print_3x3_matrix(const Eigen::Matrix3d &m);
 // Output a GLSL 3x3 matrix declaration.
 std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m);
 
-// Output GLSL 2-length and 3-length vector declarations.
+// Output GLSL scalar, 2-length and 3-length vector declarations.
+std::string output_glsl_float(const std::string &name, float x);
 std::string output_glsl_vec2(const std::string &name, float x, float y);
 std::string output_glsl_vec3(const std::string &name, float x, float y, float z);
 
diff --git a/ycbcr.cpp b/ycbcr.cpp
new file mode 100644
index 0000000..f0124ea
--- /dev/null
+++ b/ycbcr.cpp
@@ -0,0 +1,126 @@
+#include <Eigen/Core>
+#include <Eigen/LU>
+
+#include "ycbcr.h"
+
+using namespace Eigen;
+
+namespace movit {
+
+// OpenGL has texel center in (0.5, 0.5), but different formats have
+// chroma in various other places. If luma samples are X, the chroma
+// sample is *, and subsampling is 3x3, the situation with chroma
+// center in (0.5, 0.5) looks approximately like this:
+//
+//   X   X
+//     *   
+//   X   X
+//
+// If, on the other hand, chroma center is in (0.0, 0.5) (common
+// for e.g. MPEG-4), the figure changes to:
+//
+//   X   X
+//   *      
+//   X   X
+//
+// In other words, (0.0, 0.0) means that the chroma sample is exactly
+// co-sited on top of the top-left luma sample. Note, however, that
+// this is _not_ 0.5 texels to the left, since the OpenGL's texel center
+// is in (0.5, 0.5); it is in (0.25, 0.25). In a sense, the four luma samples
+// define a square where chroma position (0.0, 0.0) is in texel position
+// (0.25, 0.25) and chroma position (1.0, 1.0) is in texel position (0.75, 0.75)
+// (the outer border shows the borders of the texel itself, ie. from
+// (0, 0) to (1, 1)):
+//
+//  ---------
+// |         |
+// |  X---X  |
+// |  | * |  |
+// |  X---X  |
+// |         |
+//  ---------
+//
+// Also note that if we have no subsampling, the square will have zero
+// area and the chroma position does not matter at all.
+float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution)
+{
+	float local_chroma_pos = (0.5 + pos * (subsampling_factor - 1)) / subsampling_factor;
+	return (0.5 - local_chroma_pos) / resolution;
+}
+
+// Given <ycbcr_format>, compute the values needed to turn Y'CbCr into R'G'B';
+// first subtract the returned offset, then left-multiply the returned matrix
+// (the scaling is already folded into it).
+void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb)
+{
+	double coeff[3], scale[3];
+
+	switch (ycbcr_format.luma_coefficients) {
+	case YCBCR_REC_601:
+		// Rec. 601, page 2.
+		coeff[0] = 0.299;
+		coeff[1] = 0.587;
+		coeff[2] = 0.114;
+		break;
+
+	case YCBCR_REC_709:
+		// Rec. 709, page 19.
+		coeff[0] = 0.2126;
+		coeff[1] = 0.7152;
+		coeff[2] = 0.0722;
+		break;
+
+	case YCBCR_REC_2020:
+		// Rec. 2020, page 4.
+		coeff[0] = 0.2627;
+		coeff[1] = 0.6780;
+		coeff[2] = 0.0593;
+		break;
+
+	default:
+		assert(false);
+	}
+
+	if (ycbcr_format.full_range) {
+		offset[0] = 0.0 / 255.0;
+		offset[1] = 128.0 / 255.0;
+		offset[2] = 128.0 / 255.0;
+
+		scale[0] = 1.0;
+		scale[1] = 1.0;
+		scale[2] = 1.0;
+	} else {
+		// Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 4.
+		offset[0] = 16.0 / 255.0;
+		offset[1] = 128.0 / 255.0;
+		offset[2] = 128.0 / 255.0;
+
+		scale[0] = 255.0 / 219.0;
+		scale[1] = 255.0 / 224.0;
+		scale[2] = 255.0 / 224.0;
+	}
+
+	// Matrix to convert RGB to YCbCr. See e.g. Rec. 601.
+	Matrix3d rgb_to_ycbcr;
+	rgb_to_ycbcr(0,0) = coeff[0];
+	rgb_to_ycbcr(0,1) = coeff[1];
+	rgb_to_ycbcr(0,2) = coeff[2];
+
+	float cb_fac = (224.0 / 219.0) / (coeff[0] + coeff[1] + 1.0f - coeff[2]);
+	rgb_to_ycbcr(1,0) = -coeff[0] * cb_fac;
+	rgb_to_ycbcr(1,1) = -coeff[1] * cb_fac;
+	rgb_to_ycbcr(1,2) = (1.0f - coeff[2]) * cb_fac;
+
+	float cr_fac = (224.0 / 219.0) / (1.0f - coeff[0] + coeff[1] + coeff[2]);
+	rgb_to_ycbcr(2,0) = (1.0f - coeff[0]) * cr_fac;
+	rgb_to_ycbcr(2,1) = -coeff[1] * cr_fac;
+	rgb_to_ycbcr(2,2) = -coeff[2] * cr_fac;
+
+	// Inverting the matrix gives us what we need to go from YCbCr back to RGB.
+	*ycbcr_to_rgb = rgb_to_ycbcr.inverse();
+
+	// Fold in the scaling.
+	*ycbcr_to_rgb *= Map<const Vector3d>(scale).asDiagonal();
+}
+
+}  // namespace movit
diff --git a/ycbcr.h b/ycbcr.h
new file mode 100644
index 0000000..7e5891f
--- /dev/null
+++ b/ycbcr.h
@@ -0,0 +1,44 @@
+#ifndef _MOVIT_YCBCR_H
+#define _MOVIT_YCBCR_H 1
+
+// Shared utility functions between YCbCrInput and YCbCr422InterleavedInput.
+
+#include "image_format.h"
+
+#include <Eigen/Core>
+
+namespace movit {
+
+struct YCbCrFormat {
+	// Which formula for Y' to use.
+	YCbCrLumaCoefficients luma_coefficients;
+
+	// If true, assume Y'CbCr coefficients are full-range, ie. go from 0 to 255
+	// instead of the limited 220/225 steps in classic MPEG. For instance,
+	// JPEG uses the Rec. 601 luma coefficients, but full range.
+	bool full_range;
+
+	// Sampling factors for chroma components. For no subsampling (4:4:4),
+	// set both to 1.
+	unsigned chroma_subsampling_x, chroma_subsampling_y;
+
+	// Positioning of the chroma samples. MPEG-1 and JPEG is (0.5, 0.5);
+	// MPEG-2 and newer typically are (0.0, 0.5).
+	float cb_x_position, cb_y_position;
+	float cr_x_position, cr_y_position;
+};
+
+// Convert texel sampling offset for the given chroma channel, given that
+// chroma position is <pos> (0..1), we are downsampling this chroma channel
+// by a factor of <subsampling_factor> and the texture we are sampling from
+// is <resolution> pixels wide/high.
+float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution);
+
+// Given <ycbcr_format>, compute the values needed to turn Y'CbCr into R'G'B';
+// first subtract the returned offset, then left-multiply the returned matrix
+// (the scaling is already folded into it).
+void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float *offset, Eigen::Matrix3d *ycbcr_to_rgb);
+
+}  // namespace movit
+
+#endif // !defined(_MOVIT_YCBCR_INPUT_H)
diff --git a/ycbcr_422interleaved_input.cpp b/ycbcr_422interleaved_input.cpp
new file mode 100644
index 0000000..b634289
--- /dev/null
+++ b/ycbcr_422interleaved_input.cpp
@@ -0,0 +1,149 @@
+#include <epoxy/gl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "effect_util.h"
+#include "resource_pool.h"
+#include "util.h"
+#include "ycbcr.h"
+#include "ycbcr_422interleaved_input.h"
+
+using namespace Eigen;
+using namespace std;
+
+namespace movit {
+
+YCbCr422InterleavedInput::YCbCr422InterleavedInput(const ImageFormat &image_format,
+                                                   const YCbCrFormat &ycbcr_format,
+						   unsigned width, unsigned height)
+	: image_format(image_format),
+	  ycbcr_format(ycbcr_format),
+	  width(width),
+	  height(height),
+	  resource_pool(NULL)
+{
+	pbo = 0;
+	texture_num[0] = texture_num[1] = 0;
+
+	assert(ycbcr_format.chroma_subsampling_x == 2);
+	assert(ycbcr_format.chroma_subsampling_y == 1);
+	assert(width % ycbcr_format.chroma_subsampling_x == 0);
+
+	widths[CHANNEL_LUMA] = width;
+	widths[CHANNEL_CHROMA] = width / ycbcr_format.chroma_subsampling_x;
+	pitches[CHANNEL_LUMA] = width;
+	pitches[CHANNEL_CHROMA] = width / ycbcr_format.chroma_subsampling_x;
+
+	pixel_data = NULL;
+}
+
+YCbCr422InterleavedInput::~YCbCr422InterleavedInput()
+{
+	for (unsigned channel = 0; channel < 2; ++channel) {
+		if (texture_num[channel] != 0) {
+			resource_pool->release_2d_texture(texture_num[channel]);
+		}
+	}
+}
+
+void YCbCr422InterleavedInput::set_gl_state(GLuint glsl_program_num, const string& prefix, unsigned *sampler_num)
+{
+	for (unsigned channel = 0; channel < 2; ++channel) {
+		glActiveTexture(GL_TEXTURE0 + *sampler_num + channel);
+		check_error();
+
+		if (texture_num[channel] == 0) {
+			// (Re-)upload the texture.
+			GLuint format, internal_format;
+			if (channel == CHANNEL_LUMA) {
+				format = GL_RG;
+				internal_format = GL_RG8;
+			} else {	
+				assert(channel == CHANNEL_CHROMA);
+				format = GL_RGBA;
+				internal_format = GL_RGBA8;
+			}
+
+			texture_num[channel] = resource_pool->create_2d_texture(internal_format, widths[channel], height);
+			glBindTexture(GL_TEXTURE_2D, texture_num[channel]);
+			check_error();
+			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+			check_error();
+			glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
+			check_error();
+			glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+			check_error();
+			glPixelStorei(GL_UNPACK_ROW_LENGTH, pitches[channel]);
+			check_error();
+			glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, widths[channel], height, format, GL_UNSIGNED_BYTE, pixel_data);
+			check_error();
+			glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+			check_error();
+			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+			check_error();
+			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+			check_error();
+		} else {
+			glBindTexture(GL_TEXTURE_2D, texture_num[channel]);
+			check_error();
+		}
+	}
+
+	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+	check_error();
+
+	// Bind samplers.
+	set_uniform_int(glsl_program_num, prefix, "tex_y", *sampler_num + 0);
+	set_uniform_int(glsl_program_num, prefix, "tex_cbcr", *sampler_num + 1);
+
+	*sampler_num += 2;
+}
+
+string YCbCr422InterleavedInput::output_fragment_shader()
+{
+	float offset[3];
+	Matrix3d ycbcr_to_rgb;
+	compute_ycbcr_matrix(ycbcr_format, offset, &ycbcr_to_rgb);
+
+	string frag_shader;
+
+	frag_shader = output_glsl_mat3("PREFIX(inv_ycbcr_matrix)", ycbcr_to_rgb);
+	frag_shader += output_glsl_vec3("PREFIX(offset)", offset[0], offset[1], offset[2]);
+
+	float cb_offset_x = compute_chroma_offset(
+		ycbcr_format.cb_x_position, ycbcr_format.chroma_subsampling_x, widths[CHANNEL_CHROMA]);
+	float cr_offset_x = compute_chroma_offset(
+		ycbcr_format.cr_x_position, ycbcr_format.chroma_subsampling_x, widths[CHANNEL_CHROMA]);
+	frag_shader += output_glsl_float("PREFIX(cb_offset_x)", cb_offset_x);
+	frag_shader += output_glsl_float("PREFIX(cr_offset_x)", cr_offset_x);
+
+	char buf[256];
+	sprintf(buf, "#define CB_CR_OFFSETS_EQUAL %d\n",
+		(fabs(ycbcr_format.cb_x_position - ycbcr_format.cr_x_position) < 1e-6));
+	frag_shader += buf;
+
+	frag_shader += read_file("ycbcr_422interleaved_input.frag");
+	return frag_shader;
+}
+
+void YCbCr422InterleavedInput::invalidate_pixel_data()
+{
+	for (unsigned channel = 0; channel < 2; ++channel) {
+		if (texture_num[channel] != 0) {
+			resource_pool->release_2d_texture(texture_num[channel]);
+			texture_num[channel] = 0;
+		}
+	}
+}
+
+bool YCbCr422InterleavedInput::set_int(const std::string& key, int value)
+{
+	if (key == "needs_mipmaps") {
+		// We currently do not support this.
+		return (value == 0);
+	}
+	return Effect::set_int(key, value);
+}
+
+}  // namespace movit
diff --git a/ycbcr_422interleaved_input.frag b/ycbcr_422interleaved_input.frag
new file mode 100644
index 0000000..66762a8
--- /dev/null
+++ b/ycbcr_422interleaved_input.frag
@@ -0,0 +1,32 @@
+uniform sampler2D PREFIX(tex_y);
+uniform sampler2D PREFIX(tex_cbcr);
+
+vec4 FUNCNAME(vec2 tc) {
+	// OpenGL's origin is bottom-left, but most graphics software assumes
+	// a top-left origin. Thus, for inputs that come from the user,
+	// we flip the y coordinate.
+	tc.y = 1.0 - tc.y;
+
+	vec3 ycbcr;
+	ycbcr.x = tex2D(PREFIX(tex_y), tc).y;
+#if CB_CR_OFFSETS_EQUAL
+	vec2 tc_cbcr = tc;
+	tc_cbcr.x += PREFIX(cb_offset_x);
+	ycbcr.yz = tex2D(PREFIX(tex_cbcr), tc_cbcr).xz;
+#else
+	vec2 tc_cb = tc;
+	tc_cb.x += PREFIX(cb_offset_x);
+	ycbcr.y = tex2D(PREFIX(tex_cbcr), tc_cb).x;
+
+	vec2 tc_cr = tc;
+	tc_cr.x += PREFIX(cr_offset_x);
+	ycbcr.z = tex2D(PREFIX(tex_cbcr), tc_cr).z;
+#endif
+
+	ycbcr -= PREFIX(offset);
+
+	vec4 rgba;
+	rgba.rgb = PREFIX(inv_ycbcr_matrix) * ycbcr;
+	rgba.a = 1.0;
+	return rgba;
+}
diff --git a/ycbcr_422interleaved_input.h b/ycbcr_422interleaved_input.h
new file mode 100644
index 0000000..b346986
--- /dev/null
+++ b/ycbcr_422interleaved_input.h
@@ -0,0 +1,130 @@
+#ifndef _MOVIT_YCBCR_422INTERLEAVED_INPUT_H
+#define _MOVIT_YCBCR_422INTERLEAVED_INPUT_H 1
+
+// YCbCr422InterleavedInput is for handling 4:2:2 interleaved 8-bit Y'CbCr,
+// which you can get from e.g. certain capture cards. (Most other Y'CbCr
+// encodings are planar, which is handled by YCbCrInput.) Currently we only
+// handle the UYVY variant, although YUY2 should be easy to support if needed.
+//
+// Horizontal chroma placement is freely choosable as with YCbCrInput,
+// but BT.601 (which at least DeckLink claims to conform to, under the
+// name CCIR 601) seems to specify chroma positioning to the far left
+// (that is 0.0); BT.601 Annex 1 (page 7) says âC R and C B samples co-sited
+// with odd (1st, 3rd, 5th, etc.) Y samples in each lineâ, and I assume they do
+// not start counting from 0 when they use the â1stâ moniker.
+//
+// Interpolation is bilinear as in YCbCrInput (done by the GPU's normal
+// scaling, except for the Y channel which of course needs some fiddling),
+// and is done in non-linear light (since that's what everything specifies,
+// except Rec. 2020 lets you choose between the two). A higher-quality
+// choice would be to use a single pass of ResampleEffect to scale the
+// chroma, but for now we are consistent between the two.
+//
+// There is a disparity between the interleaving and the way OpenGL typically
+// expects to sample. In lieu of accessible hardware support (a lot of hardware
+// supports native interleaved 4:2:2 sampling, but OpenGL drivers seem to
+// rarely support it), we simply upload the same data twice; once as a
+// full-width RG texture (from which we sample luma) and once as a half-width
+// RGBA texture (from which we sample chroma). We throw away half of the color
+// channels each time, so bandwidth is wasted, but it makes for a very
+// uncomplicated shader.
+
+#include <epoxy/gl.h>
+#include <string>
+
+#include "effect.h"
+#include "effect_chain.h"
+#include "image_format.h"
+#include "input.h"
+#include "ycbcr.h"
+
+namespace movit {
+
+class ResourcePool;
+
+class YCbCr422InterleavedInput : public Input {
+public:
+	// <ycbcr_format> must be consistent with 4:2:2 sampling; specifically:
+	//
+	//  * chroma_subsampling_x must be 2.
+	//  * chroma_subsampling_y must be 1.
+	//
+	// <width> must obviously be an even number. It is the true width of the image
+	// in pixels, ie., the number of horizontal luma samples.
+	YCbCr422InterleavedInput(const ImageFormat &image_format,
+	                         const YCbCrFormat &ycbcr_format,
+				 unsigned width, unsigned height);
+	~YCbCr422InterleavedInput();
+
+	virtual std::string effect_type_id() const { return "YCbCr422InterleavedInput"; }
+
+	virtual bool can_output_linear_gamma() const { return false; }
+	virtual AlphaHandling alpha_handling() const { return OUTPUT_BLANK_ALPHA; }
+
+	std::string output_fragment_shader();
+
+	// Uploads the texture if it has changed since last time.
+	void set_gl_state(GLuint glsl_program_num, const std::string& prefix, unsigned *sampler_num);
+
+	unsigned get_width() const { return width; }
+	unsigned get_height() const { return height; }
+	Colorspace get_color_space() const { return image_format.color_space; }
+	GammaCurve get_gamma_curve() const { return image_format.gamma_curve; }
+	virtual bool can_supply_mipmaps() const { return false; }
+
+	// Tells the input where to fetch the actual pixel data. Note that if you change
+	// this data, you must either call set_pixel_data() again (using the same pointer
+	// is fine), or invalidate_pixel_data(). Otherwise, the texture won't be re-uploaded
+	// on subsequent frames.
+	//
+	// The data can either be a regular pointer (if pbo==0), or a byte offset
+	// into a PBO. The latter will allow you to start uploading the texture data
+	// asynchronously to the GPU, if you have any CPU-intensive work between the
+	// call to set_pixel_data() and the actual rendering. Also, since we upload
+	// the data twice, using a PBO can save texture upload bandwidth. In either case,
+	// the pointer (and PBO, if set) has to be valid at the time of the render call.
+	void set_pixel_data(const unsigned char *pixel_data, GLuint pbo = 0)
+	{
+		this->pixel_data = pixel_data;
+		this->pbo = pbo;
+		invalidate_pixel_data();
+	}
+
+	void invalidate_pixel_data();
+
+	void set_pitch(unsigned pitch) {
+		assert(pitch % ycbcr_format.chroma_subsampling_x == 0);
+		pitches[CHANNEL_LUMA] = pitch;
+		pitches[CHANNEL_CHROMA] = pitch / ycbcr_format.chroma_subsampling_x;
+		invalidate_pixel_data();
+	}
+
+	virtual void inform_added(EffectChain *chain)
+	{
+		resource_pool = chain->get_resource_pool();
+	}
+
+	bool set_int(const std::string& key, int value);
+
+private:
+	ImageFormat image_format;
+	YCbCrFormat ycbcr_format;
+	GLuint pbo;
+
+	// Luma texture is 0, chroma texture is 1.
+	enum Channel {
+		CHANNEL_LUMA,
+		CHANNEL_CHROMA
+	};
+	GLuint texture_num[2];
+	GLuint widths[2];
+	unsigned pitches[2];
+
+	unsigned width, height;
+	const unsigned char *pixel_data;
+	ResourcePool *resource_pool;
+};
+
+}  // namespace movit
+
+#endif  // !defined(_MOVIT_YCBCR_422INTERLEAVED_INPUT_H)
diff --git a/ycbcr_422interleaved_input_test.cpp b/ycbcr_422interleaved_input_test.cpp
new file mode 100644
index 0000000..d47bb90
--- /dev/null
+++ b/ycbcr_422interleaved_input_test.cpp
@@ -0,0 +1,180 @@
+// Unit tests for YCbCr422InterleavedInput.
+
+#include <epoxy/gl.h>
+#include <stddef.h>
+
+#include "effect_chain.h"
+#include "gtest/gtest.h"
+#include "test_util.h"
+#include "util.h"
+#include "ycbcr_422interleaved_input.h"
+
+namespace movit {
+
+// Adapted from the Simple444 test from YCbCrInputTest.
+TEST(YCbCr422InterleavedInputTest, Simple422) {
+	const int width = 2;
+	const int height = 5;
+
+	// Pure-color test inputs, calculated with the formulas in Rec. 601
+        // section 2.5.4.
+	unsigned char uyvy[width * height * 2] = {
+		/*U=*/128, /*Y=*/ 16, /*V=*/128, /*Y=*/ 16,
+		/*U=*/128, /*Y=*/235, /*V=*/128, /*Y=*/235,
+		/*U=*/ 90, /*Y=*/ 81, /*V=*/240, /*Y=*/ 81,
+		/*U=*/ 54, /*Y=*/145, /*V=*/ 34, /*Y=*/145,
+		/*U=*/240, /*Y=*/ 41, /*V=*/110, /*Y=*/ 41,
+	};
+
+	float expected_data[4 * width * height] = {
+		0.0, 0.0, 0.0, 1.0,   0.0, 0.0, 0.0, 1.0,
+		1.0, 1.0, 1.0, 1.0,   1.0, 1.0, 1.0, 1.0,
+		1.0, 0.0, 0.0, 1.0,   1.0, 0.0, 0.0, 1.0,
+		0.0, 1.0, 0.0, 1.0,   0.0, 1.0, 0.0, 1.0,
+		0.0, 0.0, 1.0, 1.0,   0.0, 0.0, 1.0, 1.0,
+	};
+	float out_data[4 * width * height];
+
+	EffectChainTester tester(NULL, width, height);
+
+	ImageFormat format;
+	format.color_space = COLORSPACE_sRGB;
+	format.gamma_curve = GAMMA_sRGB;
+
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_601;
+	ycbcr_format.full_range = false;
+	ycbcr_format.chroma_subsampling_x = 2;
+	ycbcr_format.chroma_subsampling_y = 1;
+	ycbcr_format.cb_x_position = 0.0f;  // Doesn't really matter here, since Y is constant.
+	ycbcr_format.cb_y_position = 0.5f;
+	ycbcr_format.cr_x_position = 0.0f;
+	ycbcr_format.cr_y_position = 0.5f;
+
+	YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height);
+	input->set_pixel_data(uyvy);
+	tester.get_chain()->add_input(input);
+
+	tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+        // Y'CbCr isn't 100% accurate (the input values are rounded),
+        // so we need some leeway.
+        expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002);
+}
+
+// Adapted from the YCbCrInput test of the same name.
+TEST(YCbCr422InterleavedInputTest, DifferentCbAndCrPositioning) {
+	const int width = 4;
+	const int height = 4;
+
+	unsigned char uyvy[width * height * 2] = {
+		/*U=*/ 64, /*Y=*/126, /*V=*/ 48, /*Y=*/126,  /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,
+		/*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,  /*U=*/192, /*Y=*/126, /*V=*/208, /*Y=*/126,
+		/*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,  /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,
+		/*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,  /*U=*/128, /*Y=*/126, /*V=*/128, /*Y=*/126,
+	};
+
+	// Chroma samples in this case are always co-sited with a luma sample;
+	// their associated color values and position are marked off in comments.
+	float expected_data_blue[width * height] = {
+		   0.000 /* 0.0 */, 0.250,           0.500 /* 0.5 */, 0.500, 
+		   0.500 /* 0.5 */, 0.750,           1.000 /* 1.0 */, 1.000, 
+		   0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 0.500, 
+		   0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 0.500, 
+	};
+	float expected_data_red[width * height] = {
+		   0.000,           0.000 /* 0.0 */, 0.250,           0.500 /* 0.5 */, 
+		   0.500,           0.500 /* 0.5 */, 0.750,           1.000 /* 1.0 */, 
+		   0.500,           0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 
+		   0.500,           0.500 /* 0.5 */, 0.500,           0.500 /* 0.5 */, 
+	};
+	float out_data[width * height];
+
+	EffectChainTester tester(NULL, width, height);
+
+	ImageFormat format;
+	format.color_space = COLORSPACE_sRGB;
+	format.gamma_curve = GAMMA_sRGB;
+
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_601;
+	ycbcr_format.full_range = false;
+	ycbcr_format.chroma_subsampling_x = 2;
+	ycbcr_format.chroma_subsampling_y = 1;
+	ycbcr_format.cb_x_position = 0.0f;
+	ycbcr_format.cb_y_position = 0.5f;
+	ycbcr_format.cr_x_position = 1.0f;
+	ycbcr_format.cr_y_position = 0.5f;
+
+	YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height);
+	input->set_pixel_data(uyvy);
+	tester.get_chain()->add_input(input);
+
+	// Y'CbCr isn't 100% accurate (the input values are rounded),
+	// so we need some leeway.
+	tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_sRGB);
+	expect_equal(expected_data_red, out_data, width, height, 0.02, 0.002);
+
+	tester.run(out_data, GL_BLUE, COLORSPACE_sRGB, GAMMA_sRGB);
+	expect_equal(expected_data_blue, out_data, width, height, 0.01, 0.001);
+}
+
+TEST(YCbCr422InterleavedInputTest, PBO) {
+	const int width = 2;
+	const int height = 5;
+
+	// Pure-color test inputs, calculated with the formulas in Rec. 601
+        // section 2.5.4.
+	unsigned char uyvy[width * height * 2] = {
+		/*U=*/128, /*Y=*/ 16, /*V=*/128, /*Y=*/ 16,
+		/*U=*/128, /*Y=*/235, /*V=*/128, /*Y=*/235,
+		/*U=*/ 90, /*Y=*/ 81, /*V=*/240, /*Y=*/ 81,
+		/*U=*/ 54, /*Y=*/145, /*V=*/ 34, /*Y=*/145,
+		/*U=*/240, /*Y=*/ 41, /*V=*/110, /*Y=*/ 41,
+	};
+
+	float expected_data[4 * width * height] = {
+		0.0, 0.0, 0.0, 1.0,   0.0, 0.0, 0.0, 1.0,
+		1.0, 1.0, 1.0, 1.0,   1.0, 1.0, 1.0, 1.0,
+		1.0, 0.0, 0.0, 1.0,   1.0, 0.0, 0.0, 1.0,
+		0.0, 1.0, 0.0, 1.0,   0.0, 1.0, 0.0, 1.0,
+		0.0, 0.0, 1.0, 1.0,   0.0, 0.0, 1.0, 1.0,
+	};
+	float out_data[4 * width * height];
+
+	GLuint pbo;
+	glGenBuffers(1, &pbo);
+	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
+	glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, width * height * 2, uyvy, GL_STREAM_DRAW);
+	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+
+	EffectChainTester tester(NULL, width, height);
+
+	ImageFormat format;
+	format.color_space = COLORSPACE_sRGB;
+	format.gamma_curve = GAMMA_sRGB;
+
+	YCbCrFormat ycbcr_format;
+	ycbcr_format.luma_coefficients = YCBCR_REC_601;
+	ycbcr_format.full_range = false;
+	ycbcr_format.chroma_subsampling_x = 2;
+	ycbcr_format.chroma_subsampling_y = 1;
+	ycbcr_format.cb_x_position = 0.0f;  // Doesn't really matter here, since Y is constant.
+	ycbcr_format.cb_y_position = 0.5f;
+	ycbcr_format.cr_x_position = 0.0f;
+	ycbcr_format.cr_y_position = 0.5f;
+
+	YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(format, ycbcr_format, width, height);
+	input->set_pixel_data((unsigned char *)BUFFER_OFFSET(0), pbo);
+	tester.get_chain()->add_input(input);
+
+	tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_sRGB);
+
+        // Y'CbCr isn't 100% accurate (the input values are rounded),
+        // so we need some leeway.
+        expect_equal(expected_data, out_data, 4 * width, height, 0.025, 0.002);
+
+	glDeleteBuffers(1, &pbo);
+}
+
+}  // namespace movit
diff --git a/ycbcr_input.cpp b/ycbcr_input.cpp
index ee341f0..091880f 100644
--- a/ycbcr_input.cpp
+++ b/ycbcr_input.cpp
@@ -8,6 +8,7 @@
 #include "effect_util.h"
 #include "resource_pool.h"
 #include "util.h"
+#include "ycbcr.h"
 #include "ycbcr_input.h"
 
 using namespace Eigen;
@@ -15,126 +16,6 @@ using namespace std;
 
 namespace movit {
 
-namespace {
-
-// OpenGL has texel center in (0.5, 0.5), but different formats have
-// chroma in various other places. If luma samples are X, the chroma
-// sample is *, and subsampling is 3x3, the situation with chroma
-// center in (0.5, 0.5) looks approximately like this:
-//
-//   X   X
-//     *   
-//   X   X
-//
-// If, on the other hand, chroma center is in (0.0, 0.5) (common
-// for e.g. MPEG-4), the figure changes to:
-//
-//   X   X
-//   *      
-//   X   X
-//
-// In other words, (0.0, 0.0) means that the chroma sample is exactly
-// co-sited on top of the top-left luma sample. Note, however, that
-// this is _not_ 0.5 texels to the left, since the OpenGL's texel center
-// is in (0.5, 0.5); it is in (0.25, 0.25). In a sense, the four luma samples
-// define a square where chroma position (0.0, 0.0) is in texel position
-// (0.25, 0.25) and chroma position (1.0, 1.0) is in texel position (0.75, 0.75)
-// (the outer border shows the borders of the texel itself, ie. from
-// (0, 0) to (1, 1)):
-//
-//  ---------
-// |         |
-// |  X---X  |
-// |  | * |  |
-// |  X---X  |
-// |         |
-//  ---------
-//
-// Also note that if we have no subsampling, the square will have zero
-// area and the chroma position does not matter at all.
-float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution)
-{
-	float local_chroma_pos = (0.5 + pos * (subsampling_factor - 1)) / subsampling_factor;
-	return (0.5 - local_chroma_pos) / resolution;
-}
-
-// Given <ycbcr_format>, compute the values needed to turn Y'CbCr into R'G'B';
-// first subtract the returned offset, then left-multiply the returned matrix
-// (the scaling is already folded into it).
-void compute_ycbcr_matrix(YCbCrFormat ycbcr_format, float* offset, Matrix3d* ycbcr_to_rgb)
-{
-	double coeff[3], scale[3];
-
-	switch (ycbcr_format.luma_coefficients) {
-	case YCBCR_REC_601:
-		// Rec. 601, page 2.
-		coeff[0] = 0.299;
-		coeff[1] = 0.587;
-		coeff[2] = 0.114;
-		break;
-
-	case YCBCR_REC_709:
-		// Rec. 709, page 19.
-		coeff[0] = 0.2126;
-		coeff[1] = 0.7152;
-		coeff[2] = 0.0722;
-		break;
-
-	case YCBCR_REC_2020:
-		// Rec. 2020, page 4.
-		coeff[0] = 0.2627;
-		coeff[1] = 0.6780;
-		coeff[2] = 0.0593;
-		break;
-
-	default:
-		assert(false);
-	}
-
-	if (ycbcr_format.full_range) {
-		offset[0] = 0.0 / 255.0;
-		offset[1] = 128.0 / 255.0;
-		offset[2] = 128.0 / 255.0;
-
-		scale[0] = 1.0;
-		scale[1] = 1.0;
-		scale[2] = 1.0;
-	} else {
-		// Rec. 601, page 4; Rec. 709, page 19; Rec. 2020, page 4.
-		offset[0] = 16.0 / 255.0;
-		offset[1] = 128.0 / 255.0;
-		offset[2] = 128.0 / 255.0;
-
-		scale[0] = 255.0 / 219.0;
-		scale[1] = 255.0 / 224.0;
-		scale[2] = 255.0 / 224.0;
-	}
-
-	// Matrix to convert RGB to YCbCr. See e.g. Rec. 601.
-	Matrix3d rgb_to_ycbcr;
-	rgb_to_ycbcr(0,0) = coeff[0];
-	rgb_to_ycbcr(0,1) = coeff[1];
-	rgb_to_ycbcr(0,2) = coeff[2];
-
-	float cb_fac = (224.0 / 219.0) / (coeff[0] + coeff[1] + 1.0f - coeff[2]);
-	rgb_to_ycbcr(1,0) = -coeff[0] * cb_fac;
-	rgb_to_ycbcr(1,1) = -coeff[1] * cb_fac;
-	rgb_to_ycbcr(1,2) = (1.0f - coeff[2]) * cb_fac;
-
-	float cr_fac = (224.0 / 219.0) / (1.0f - coeff[0] + coeff[1] + coeff[2]);
-	rgb_to_ycbcr(2,0) = (1.0f - coeff[0]) * cr_fac;
-	rgb_to_ycbcr(2,1) = -coeff[1] * cr_fac;
-	rgb_to_ycbcr(2,2) = -coeff[2] * cr_fac;
-
-	// Inverting the matrix gives us what we need to go from YCbCr back to RGB.
-	*ycbcr_to_rgb = rgb_to_ycbcr.inverse();
-
-	// Fold in the scaling.
-	*ycbcr_to_rgb *= Map<const Vector3d>(scale).asDiagonal();
-}
-
-}  // namespace
-
 YCbCrInput::YCbCrInput(const ImageFormat &image_format,
                        const YCbCrFormat &ycbcr_format,
                        unsigned width, unsigned height)
diff --git a/ycbcr_input.h b/ycbcr_input.h
index 4c7b3e9..97ad526 100644
--- a/ycbcr_input.h
+++ b/ycbcr_input.h
@@ -13,30 +13,12 @@
 #include "effect_chain.h"
 #include "image_format.h"
 #include "input.h"
+#include "ycbcr.h"
 
 namespace movit {
 
 class ResourcePool;
 
-struct YCbCrFormat {
-	// Which formula for Y' to use.
-	YCbCrLumaCoefficients luma_coefficients;
-
-	// If true, assume Y'CbCr coefficients are full-range, ie. go from 0 to 255
-	// instead of the limited 220/225 steps in classic MPEG. For instance,
-	// JPEG uses the Rec. 601 luma coefficients, but full range.
-	bool full_range;
-
-	// Sampling factors for chroma components. For no subsampling (4:4:4),
-	// set both to 1.
-	unsigned chroma_subsampling_x, chroma_subsampling_y;
-
-	// Positioning of the chroma samples. MPEG-1 and JPEG is (0.5, 0.5);
-	// MPEG-2 and newer typically are (0.0, 0.5).
-	float cb_x_position, cb_y_position;
-	float cr_x_position, cr_y_position;
-};
-
 class YCbCrInput : public Input {
 public:
 	YCbCrInput(const ImageFormat &image_format,
-- 
2.39.2