From 8cefe0ef1926be7931d4a9bbfed93ee6e85f3540 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Sun, 21 May 2017 12:36:10 +0200
Subject: [PATCH] Add support for decoding video as Y'CbCr. Not activated yet.

---
 ffmpeg_capture.cpp      | 188 ++++++++++++++++++++++++++++++++++++++--
 ffmpeg_capture.h        |  18 +++-
 mixer.cpp               |  74 +++++++++++++---
 mixer.h                 |   1 +
 pbo_frame_allocator.cpp |  91 +++++++++++++------
 pbo_frame_allocator.h   |   7 ++
 theme.cpp               |  27 +++++-
 7 files changed, 353 insertions(+), 53 deletions(-)
diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp
index 407f359..0fe74a7 100644
--- a/ffmpeg_capture.cpp
+++ b/ffmpeg_capture.cpp
@@ -37,6 +37,7 @@ extern "C" {
 using namespace std;
 using namespace std::chrono;
 using namespace bmusb;
+using namespace movit;
 
 namespace {
 
@@ -59,6 +60,143 @@ bool changed_since(const std::string &pathname, const timespec &ts)
 	return (buf.st_mtim.tv_sec != ts.tv_sec || buf.st_mtim.tv_nsec != ts.tv_nsec);
 }
 
+bool is_full_range(const AVPixFmtDescriptor *desc)
+{
+	// This is horrible, but there's no better way that I know of.
+	return (strchr(desc->name, 'j') != nullptr);
+}
+
+AVPixelFormat decide_dst_format(AVPixelFormat src_format, bmusb::PixelFormat dst_format_type)
+{
+	if (dst_format_type == bmusb::PixelFormat_8BitBGRA) {
+		return AV_PIX_FMT_BGRA;
+	}
+
+	assert(dst_format_type == bmusb::PixelFormat_8BitYCbCrPlanar);
+
+	// If this is a non-Y'CbCr format, just convert to 4:4:4 Y'CbCr
+	// and be done with it. It's too strange to spend a lot of time on.
+	// (Let's hope there's no alpha.)
+	const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_format);
+	if (src_desc == nullptr ||
+	    src_desc->nb_components != 3 ||
+	    (src_desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+		return AV_PIX_FMT_YUV444P;
+	}
+
+	// The best for us would be Cb and Cr together if possible,
+	// but FFmpeg doesn't support that except in the special case of
+	// NV12, so we need to go to planar even for the case of NV12.
+	// Thus, look for the closest (but no worse) 8-bit planar Y'CbCr format
+	// that matches in color range. (This will also include the case of
+	// the source format already being acceptable.)
+	bool src_full_range = is_full_range(src_desc);
+	const char *best_format = "yuv444p";
+	unsigned best_score = numeric_limits<unsigned>::max();
+	for (const AVPixFmtDescriptor *desc = av_pix_fmt_desc_next(nullptr);
+	     desc;
+	     desc = av_pix_fmt_desc_next(desc)) {
+		// Find planar Y'CbCr formats only.
+		if (desc->nb_components != 3) continue;
+		if (desc->flags & AV_PIX_FMT_FLAG_RGB) continue;
+		if (!(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) continue;
+		if (desc->comp[0].plane != 0 ||
+		    desc->comp[1].plane != 1 ||
+		    desc->comp[2].plane != 2) continue;
+
+		// 8-bit formats only.
+		if (desc->flags & AV_PIX_FMT_FLAG_BE) continue;
+		if (desc->comp[0].depth != 8) continue;
+
+		// Same or better chroma resolution only.
+		int chroma_w_diff = desc->log2_chroma_w - src_desc->log2_chroma_w;
+		int chroma_h_diff = desc->log2_chroma_h - src_desc->log2_chroma_h;
+		if (chroma_w_diff < 0 || chroma_h_diff < 0)
+			continue;
+
+		// Matching full/limited range only.
+		if (is_full_range(desc) != src_full_range)
+			continue;
+
+		// Pick something with as little excess chroma resolution as possible.
+		unsigned score = (1 << (chroma_w_diff)) << chroma_h_diff;
+		if (score < best_score) {
+			best_score = score;
+			best_format = desc->name;
+		}
+	}
+	return av_get_pix_fmt(best_format);
+}
+
+YCbCrFormat decode_ycbcr_format(const AVPixFmtDescriptor *desc, const AVFrame *frame)
+{
+	YCbCrFormat format;
+	AVColorSpace colorspace = av_frame_get_colorspace(frame);
+	switch (colorspace) {
+	case AVCOL_SPC_BT709:
+		format.luma_coefficients = YCBCR_REC_709;
+		break;
+	case AVCOL_SPC_BT470BG:
+	case AVCOL_SPC_SMPTE170M:
+	case AVCOL_SPC_SMPTE240M:
+		format.luma_coefficients = YCBCR_REC_601;
+		break;
+	case AVCOL_SPC_BT2020_NCL:
+		format.luma_coefficients = YCBCR_REC_2020;
+		break;
+	case AVCOL_SPC_UNSPECIFIED:
+		format.luma_coefficients = (frame->height >= 720 ? YCBCR_REC_709 : YCBCR_REC_601);
+		break;
+	default:
+		fprintf(stderr, "Unknown Y'CbCr coefficient enum %d from FFmpeg; choosing Rec. 709.\n",
+			colorspace);
+		format.luma_coefficients = YCBCR_REC_709;
+		break;
+	}
+
+	format.full_range = is_full_range(desc);
+	format.num_levels = 1 << desc->comp[0].depth;
+	format.chroma_subsampling_x = 1 << desc->log2_chroma_w;
+	format.chroma_subsampling_y = 1 << desc->log2_chroma_h;
+
+	switch (frame->chroma_location) {
+	case AVCHROMA_LOC_LEFT:
+		format.cb_x_position = 0.0;
+		format.cb_y_position = 0.5;
+		break;
+	case AVCHROMA_LOC_CENTER:
+		format.cb_x_position = 0.5;
+		format.cb_y_position = 0.5;
+		break;
+	case AVCHROMA_LOC_TOPLEFT:
+		format.cb_x_position = 0.0;
+		format.cb_y_position = 0.0;
+		break;
+	case AVCHROMA_LOC_TOP:
+		format.cb_x_position = 0.5;
+		format.cb_y_position = 0.0;
+		break;
+	case AVCHROMA_LOC_BOTTOMLEFT:
+		format.cb_x_position = 0.0;
+		format.cb_y_position = 1.0;
+		break;
+	case AVCHROMA_LOC_BOTTOM:
+		format.cb_x_position = 0.5;
+		format.cb_y_position = 1.0;
+		break;
+	default:
+		fprintf(stderr, "Unknown chroma location coefficient enum %d from FFmpeg; choosing Rec. 709.\n",
+			frame->chroma_location);
+		format.cb_x_position = 0.5;
+		format.cb_y_position = 0.5;
+		break;
+	}
+
+	format.cr_x_position = format.cb_x_position;
+	format.cr_y_position = format.cb_y_position;
+	return format;
+}
+
 }  // namespace
 
 FFmpegCapture::FFmpegCapture(const string &filename, unsigned width, unsigned height)
@@ -235,7 +373,8 @@ bool FFmpegCapture::play_video(const string &pathname)
 	double rate = 1.0;
 
 	unique_ptr<SwsContext, decltype(sws_freeContext)*> sws_ctx(nullptr, sws_freeContext);
-	int sws_last_width = -1, sws_last_height = -1;
+	int sws_last_width = -1, sws_last_height = -1, sws_last_src_format = -1;
+	AVPixelFormat sws_dst_format = AVPixelFormat(-1);  // In practice, always initialized.
 
 	// Main loop.
 	while (!producer_thread_should_quit.should_quit()) {
@@ -320,13 +459,18 @@ bool FFmpegCapture::play_video(const string &pathname)
 			continue;
 		}
 
-		if (sws_ctx == nullptr || sws_last_width != frame->width || sws_last_height != frame->height) {
+		if (sws_ctx == nullptr ||
+		    sws_last_width != frame->width ||
+		    sws_last_height != frame->height ||
+		    sws_last_src_format != frame->format) {
+			sws_dst_format = decide_dst_format(AVPixelFormat(frame->format), pixel_format);
 			sws_ctx.reset(
-				sws_getContext(frame->width, frame->height, (AVPixelFormat)frame->format,
-					width, height, AV_PIX_FMT_BGRA,
+				sws_getContext(frame->width, frame->height, AVPixelFormat(frame->format),
+					width, height, sws_dst_format,
 					SWS_BICUBIC, nullptr, nullptr, nullptr));
 			sws_last_width = frame->width;
 			sws_last_height = frame->height;
+			sws_last_src_format = frame->format;
 		}
 		if (sws_ctx == nullptr) {
 			fprintf(stderr, "%s: Could not create scaler context\n", pathname.c_str());
@@ -336,7 +480,12 @@ bool FFmpegCapture::play_video(const string &pathname)
 		VideoFormat video_format;
 		video_format.width = width;
 		video_format.height = height;
-		video_format.stride = width * 4;
+		if (pixel_format == bmusb::PixelFormat_8BitBGRA) {
+			video_format.stride = width * 4;
+		} else {
+			assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar);
+			video_format.stride = width;
+		}
 		video_format.frame_rate_nom = video_timebase.den;
 		video_format.frame_rate_den = av_frame_get_pkt_duration(frame.get()) * video_timebase.num;
 		if (video_format.frame_rate_nom == 0 || video_format.frame_rate_den == 0) {
@@ -352,10 +501,33 @@ bool FFmpegCapture::play_video(const string &pathname)
 
 		FrameAllocator::Frame video_frame = video_frame_allocator->alloc_frame();
 		if (video_frame.data != nullptr) {
-			uint8_t *pic_data[4] = { video_frame.data, nullptr, nullptr, nullptr };
-			int linesizes[4] = { int(video_format.stride), 0, 0, 0 };
+			uint8_t *pic_data[4] = { nullptr, nullptr, nullptr, nullptr };
+			int linesizes[4] = { 0, 0, 0, 0 };
+			if (pixel_format == bmusb::PixelFormat_8BitBGRA) {
+				pic_data[0] = video_frame.data;
+				linesizes[0] = video_format.stride;
+				video_frame.len = video_format.stride * height;
+			} else {
+				assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar);
+				const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format);
+
+				int chroma_width = AV_CEIL_RSHIFT(int(width), desc->log2_chroma_w);
+				int chroma_height = AV_CEIL_RSHIFT(int(height), desc->log2_chroma_h);
+
+				pic_data[0] = video_frame.data;
+				linesizes[0] = width;
+
+				pic_data[1] = pic_data[0] + width * height;
+				linesizes[1] = chroma_width;
+
+				pic_data[2] = pic_data[1] + chroma_width * chroma_height;
+				linesizes[2] = chroma_width;
+
+				video_frame.len = width * height + 2 * chroma_width * chroma_height;
+
+				current_frame_ycbcr_format = decode_ycbcr_format(desc, frame.get());
+			}
 			sws_scale(sws_ctx.get(), frame->data, frame->linesize, 0, frame->height, pic_data, linesizes);
-			video_frame.len = video_format.stride * height;
 			video_frame.received_timestamp = next_frame_start;
 		}
 
diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h
index 82ccfaf..e20f24c 100644
--- a/ffmpeg_capture.h
+++ b/ffmpeg_capture.h
@@ -29,6 +29,8 @@
 #include <string>
 #include <thread>
 
+#include <movit/ycbcr.h>
+
 #include "bmusb/bmusb.h"
 #include "quittable_sleeper.h"
 
@@ -93,6 +95,14 @@ public:
 		frame_callback = callback;
 	}
 
+	// Used to get precise information about the Y'CbCr format used
+	// for a given frame. Only valid to call during the frame callback,
+	// and only when receiving a frame with pixel format PixelFormat_8BitYCbCrPlanar.
+	movit::YCbCrFormat get_current_frame_ycbcr_format() const
+	{
+		return current_frame_ycbcr_format;
+	}
+
 	void set_dequeue_thread_callbacks(std::function<void()> init, std::function<void()> cleanup) override
 	{
 		dequeue_init_callback = init;
@@ -115,13 +125,13 @@ public:
 	uint32_t get_current_video_mode() const override { return 0; }
 
 	std::set<bmusb::PixelFormat> get_available_pixel_formats() const override {
-		return std::set<bmusb::PixelFormat>{ bmusb::PixelFormat_8BitBGRA };
+		return std::set<bmusb::PixelFormat>{ bmusb::PixelFormat_8BitBGRA, bmusb::PixelFormat_8BitYCbCrPlanar };
 	}
 	void set_pixel_format(bmusb::PixelFormat pixel_format) override {
-		assert(pixel_format == bmusb::PixelFormat_8BitBGRA);
+		this->pixel_format = pixel_format;
 	}	
 	bmusb::PixelFormat get_current_pixel_format() const override {
-		return bmusb::PixelFormat_8BitBGRA;
+		return pixel_format;
 	}
 
 	std::map<uint32_t, std::string> get_available_video_inputs() const override {
@@ -144,6 +154,8 @@ private:
 	std::string description, filename;
 	uint16_t timecode = 0;
 	unsigned width, height;
+	bmusb::PixelFormat pixel_format = bmusb::PixelFormat_8BitBGRA;
+	movit::YCbCrFormat current_frame_ycbcr_format;
 	bool running = false;
 	int card_index = -1;
 
diff --git a/mixer.cpp b/mixer.cpp
index c238d16..857d62b 100644
--- a/mixer.cpp
+++ b/mixer.cpp
@@ -79,7 +79,7 @@ void insert_new_frame(RefCountedFrame frame, unsigned field_num, bool interlaced
 	}
 }
 
-void ensure_texture_resolution(PBOFrameAllocator::Userdata *userdata, unsigned field, unsigned width, unsigned height, unsigned v210_width)
+void ensure_texture_resolution(PBOFrameAllocator::Userdata *userdata, unsigned field, unsigned width, unsigned height, unsigned cbcr_width, unsigned cbcr_height, unsigned v210_width)
 {
 	bool first;
 	switch (userdata->pixel_format) {
@@ -92,13 +92,18 @@ void ensure_texture_resolution(PBOFrameAllocator::Userdata *userdata, unsigned f
 	case PixelFormat_8BitBGRA:
 		first = userdata->tex_rgba[field] == 0;
 		break;
+	case PixelFormat_8BitYCbCrPlanar:
+		first = userdata->tex_y[field] == 0 || userdata->tex_cb[field] == 0 || userdata->tex_cr[field] == 0;
+		break;
 	default:
 		assert(false);
 	}
 
 	if (first ||
 	    width != userdata->last_width[field] ||
-	    height != userdata->last_height[field]) {
+	    height != userdata->last_height[field] ||
+	    cbcr_width != userdata->last_cbcr_width[field] ||
+	    cbcr_height != userdata->last_cbcr_height[field]) {
 		// We changed resolution since last use of this texture, so we need to create
 		// a new object. Note that this each card has its own PBOFrameAllocator,
 		// we don't need to worry about these flip-flopping between resolutions.
@@ -110,8 +115,6 @@ void ensure_texture_resolution(PBOFrameAllocator::Userdata *userdata, unsigned f
 			check_error();
 			break;
 		case PixelFormat_8BitYCbCr: {
-			size_t cbcr_width = width / 2;
-
 			glBindTexture(GL_TEXTURE_2D, userdata->tex_cbcr[field]);
 			check_error();
 			glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, cbcr_width, height, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr);
@@ -122,6 +125,21 @@ void ensure_texture_resolution(PBOFrameAllocator::Userdata *userdata, unsigned f
 			check_error();
 			break;
 		}
+		case PixelFormat_8BitYCbCrPlanar: {
+			glBindTexture(GL_TEXTURE_2D, userdata->tex_y[field]);
+			check_error();
+			glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
+			check_error();
+			glBindTexture(GL_TEXTURE_2D, userdata->tex_cb[field]);
+			check_error();
+			glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, cbcr_width, cbcr_height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
+			check_error();
+			glBindTexture(GL_TEXTURE_2D, userdata->tex_cr[field]);
+			check_error();
+			glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, cbcr_width, cbcr_height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
+			check_error();
+			break;
+		}
 		case PixelFormat_8BitBGRA:
 			glBindTexture(GL_TEXTURE_2D, userdata->tex_rgba[field]);
 			check_error();
@@ -135,6 +153,8 @@ void ensure_texture_resolution(PBOFrameAllocator::Userdata *userdata, unsigned f
 		}
 		userdata->last_width[field] = width;
 		userdata->last_height[field] = height;
+		userdata->last_cbcr_width[field] = cbcr_width;
+		userdata->last_cbcr_height[field] = cbcr_height;
 	}
 	if (global_flags.ten_bit_input &&
 	    (first || v210_width != userdata->last_v210_width[field])) {
@@ -400,6 +420,7 @@ void Mixer::configure_card(unsigned card_index, CaptureInterface *capture, CardT
 	}
 	card->capture.reset(capture);
 	card->is_fake_capture = (card_type == CardType::FAKE_CAPTURE);
+	card->type = card_type;
 	if (card->output.get() != output) {
 		card->output.reset(output);
 	}
@@ -566,7 +587,28 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 
 	card->last_timecode = timecode;
 
+	PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)video_frame.userdata;
+
+	size_t cbcr_width, cbcr_height, cbcr_offset, y_offset;
 	size_t expected_length = video_format.stride * (video_format.height + video_format.extra_lines_top + video_format.extra_lines_bottom);
+	if (userdata != nullptr && userdata->pixel_format == PixelFormat_8BitYCbCrPlanar) {
+		// The calculation above is wrong for planar Y'CbCr, so just override it.
+		assert(card->type == CardType::FFMPEG_INPUT);
+		assert(video_offset == 0);
+		expected_length = video_frame.len;
+
+		userdata->ycbcr_format = (static_cast<FFmpegCapture *>(card->capture.get()))->get_current_frame_ycbcr_format();
+		cbcr_width = video_format.width / userdata->ycbcr_format.chroma_subsampling_x;
+		cbcr_height = video_format.height / userdata->ycbcr_format.chroma_subsampling_y;
+		cbcr_offset = video_format.width * video_format.height;
+		y_offset = 0;
+	} else {
+		// All the other Y'CbCr formats are 4:2:2.
+		cbcr_width = video_format.width / 2;
+		cbcr_height = video_format.height;
+		cbcr_offset = video_offset / 2;
+		y_offset = video_frame.size / 2 + video_offset / 2;
+	}
 	if (video_frame.len - video_offset == 0 ||
 	    video_frame.len - video_offset != expected_length) {
 		if (video_frame.len != 0) {
@@ -593,8 +635,6 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 		return;
 	}
 
-	PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)video_frame.userdata;
-
 	unsigned num_fields = video_format.interlaced ? 2 : 1;
 	steady_clock::time_point frame_upload_start;
 	bool interlaced_stride = false;
@@ -619,10 +659,6 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 	RefCountedFrame frame(video_frame);
 
 	// Upload the textures.
-	const size_t cbcr_width = video_format.width / 2;
-	const size_t cbcr_offset = video_offset / 2;
-	const size_t y_offset = video_frame.size / 2 + video_offset / 2;
-
 	for (unsigned field = 0; field < num_fields; ++field) {
 		// Put the actual texture upload in a lambda that is executed in the main thread.
 		// It is entirely possible to do this in the same thread (and it might even be
@@ -632,7 +668,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 		// Note that this means we must hold on to the actual frame data in <userdata>
 		// until the upload command is run, but we hold on to <frame> much longer than that
 		// (in fact, all the way until we no longer use the texture in rendering).
-		auto upload_func = [this, field, video_format, y_offset, video_offset, cbcr_offset, cbcr_width, interlaced_stride, userdata]() {
+		auto upload_func = [this, field, video_format, y_offset, video_offset, cbcr_offset, cbcr_width, cbcr_height, interlaced_stride, userdata]() {
 			unsigned field_start_line;
 			if (field == 1) {
 				field_start_line = video_format.second_field_start;
@@ -642,7 +678,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 
 			// For anything not FRAME_FORMAT_YCBCR_10BIT, v210_width will be nonsensical but not used.
 			size_t v210_width = video_format.stride / sizeof(uint32_t);
-			ensure_texture_resolution(userdata, field, video_format.width, video_format.height, v210_width);
+			ensure_texture_resolution(userdata, field, video_format.width, video_format.height, cbcr_width, cbcr_height, v210_width);
 
 			glBindBuffer(GL_PIXEL_UNPACK_BUFFER, userdata->pbo);
 			check_error();
@@ -660,7 +696,19 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 
 				// Make up our own strides, since we are interleaving.
 				upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
-				upload_texture(userdata->tex_cbcr[field], cbcr_width, video_format.height, cbcr_width * sizeof(uint16_t), interlaced_stride, GL_RG, GL_UNSIGNED_BYTE, field_cbcr_start);
+				upload_texture(userdata->tex_cbcr[field], cbcr_width, cbcr_height, cbcr_width * sizeof(uint16_t), interlaced_stride, GL_RG, GL_UNSIGNED_BYTE, field_cbcr_start);
+				break;
+			}
+			case PixelFormat_8BitYCbCrPlanar: {
+				assert(field_start_line == 0);  // We don't really support interlaced here.
+				size_t field_y_start = y_offset;
+				size_t field_cb_start = cbcr_offset;
+				size_t field_cr_start = cbcr_offset + cbcr_width * cbcr_height;
+
+				// Make up our own strides, since we are interleaving.
+				upload_texture(userdata->tex_y[field], video_format.width, video_format.height, video_format.width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_y_start);
+				upload_texture(userdata->tex_cb[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cb_start);
+				upload_texture(userdata->tex_cr[field], cbcr_width, cbcr_height, cbcr_width, interlaced_stride, GL_RED, GL_UNSIGNED_BYTE, field_cr_start);
 				break;
 			}
 			case PixelFormat_8BitBGRA: {
diff --git a/mixer.h b/mixer.h
index c63db64..73dfa3b 100644
--- a/mixer.h
+++ b/mixer.h
@@ -415,6 +415,7 @@ private:
 	struct CaptureCard {
 		std::unique_ptr<bmusb::CaptureInterface> capture;
 		bool is_fake_capture;
+		CardType type;
 		std::unique_ptr<DeckLinkOutput> output;
 
 		// If this card is used for output (ie., output_card_index points to it),
diff --git a/pbo_frame_allocator.cpp b/pbo_frame_allocator.cpp
index 35e4ed5..84a75e4 100644
--- a/pbo_frame_allocator.cpp
+++ b/pbo_frame_allocator.cpp
@@ -12,6 +12,20 @@
 
 using namespace std;
 
+namespace {
+
+void set_clamp_to_edge()
+{
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+	check_error();
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+	check_error();
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+	check_error();
+}
+
+}  // namespace
+
 PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits)
         : pixel_format(pixel_format), buffer(buffer)
 {
@@ -35,7 +49,7 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra
 		userdata[i].pixel_format = pixel_format;
 		frame.owner = this;
 
-		// For 8-bit Y'CbCr, we ask the driver to split Y' and Cb/Cr
+		// For 8-bit non-planar Y'CbCr, we ask the driver to split Y' and Cb/Cr
 		// into separate textures. For 10-bit, the input format (v210)
 		// is complicated enough that we need to interpolate up to 4:4:4,
 		// which we do in a compute shader ourselves. For BGRA, the data
@@ -62,14 +76,28 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra
 			glGenTextures(2, userdata[i].tex_rgba);
 			check_error();
 			break;
+		case bmusb::PixelFormat_8BitYCbCrPlanar:
+			glGenTextures(2, userdata[i].tex_y);
+			check_error();
+			glGenTextures(2, userdata[i].tex_cb);
+			check_error();
+			glGenTextures(2, userdata[i].tex_cr);
+			check_error();
+			break;
+		default:
+			assert(false);
 		}
 
 		userdata[i].last_width[0] = width;
 		userdata[i].last_height[0] = height;
+		userdata[i].last_cbcr_width[0] = width / 2;
+		userdata[i].last_cbcr_height[0] = height;
 		userdata[i].last_v210_width[0] = 0;
 
 		userdata[i].last_width[1] = 0;
 		userdata[i].last_height[1] = 0;
+		userdata[i].last_cbcr_width[1] = 0;
+		userdata[i].last_cbcr_height[1] = 0;
 		userdata[i].last_v210_width[1] = 0;
 
 		userdata[i].last_interlaced = false;
@@ -95,12 +123,7 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra
 
 				glBindTexture(GL_TEXTURE_2D, userdata[i].tex_444[field]);
 				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-				check_error();
+				set_clamp_to_edge();
 				if (field == 0) {
 					glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, NULL);
 					check_error();
@@ -110,12 +133,7 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra
 			case bmusb::PixelFormat_8BitYCbCr:
 				glBindTexture(GL_TEXTURE_2D, userdata[i].tex_y[field]);
 				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-				check_error();
+				set_clamp_to_edge();
 				if (field == 0) {
 					glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, NULL);
 					check_error();
@@ -123,12 +141,7 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra
 
 				glBindTexture(GL_TEXTURE_2D, userdata[i].tex_cbcr[field]);
 				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-				check_error();
+				set_clamp_to_edge();
 				if (field == 0) {
 					glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, width / 2, height, 0, GL_RG, GL_UNSIGNED_BYTE, NULL);
 					check_error();
@@ -137,12 +150,7 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra
 			case bmusb::PixelFormat_8BitBGRA:
 				glBindTexture(GL_TEXTURE_2D, userdata[i].tex_rgba[field]);
 				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-				check_error();
-				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-				check_error();
+				set_clamp_to_edge();
 				if (field == 0) {
 					if (global_flags.can_disable_srgb_decoder) {  // See the comments in tweaked_inputs.h.
 						glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
@@ -152,6 +160,31 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra
 					check_error();
 				}
 				break;
+			case bmusb::PixelFormat_8BitYCbCrPlanar:
+				glBindTexture(GL_TEXTURE_2D, userdata[i].tex_y[field]);
+				check_error();
+				set_clamp_to_edge();
+				if (field == 0) {
+					glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, NULL);
+					check_error();
+				}
+
+				glBindTexture(GL_TEXTURE_2D, userdata[i].tex_cb[field]);
+				check_error();
+				set_clamp_to_edge();
+				if (field == 0) {
+					glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, NULL);
+					check_error();
+				}
+
+				glBindTexture(GL_TEXTURE_2D, userdata[i].tex_cr[field]);
+				check_error();
+				set_clamp_to_edge();
+				if (field == 0) {
+					glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, NULL);
+					check_error();
+				}
+				break;
 			default:
 				assert(false);
 			}
@@ -196,6 +229,14 @@ PBOFrameAllocator::~PBOFrameAllocator()
 			glDeleteTextures(2, ((Userdata *)frame.userdata)->tex_rgba);
 			check_error();
 			break;
+		case bmusb::PixelFormat_8BitYCbCrPlanar:
+			glDeleteTextures(2, ((Userdata *)frame.userdata)->tex_y);
+			check_error();
+			glDeleteTextures(2, ((Userdata *)frame.userdata)->tex_cb);
+			check_error();
+			glDeleteTextures(2, ((Userdata *)frame.userdata)->tex_cr);
+			check_error();
+			break;
 		default:
 			assert(false);
 		}
diff --git a/pbo_frame_allocator.h b/pbo_frame_allocator.h
index fc4b522..37fd383 100644
--- a/pbo_frame_allocator.h
+++ b/pbo_frame_allocator.h
@@ -8,6 +8,8 @@
 #include <mutex>
 #include <queue>
 
+#include <movit/ycbcr.h>
+
 #include "bmusb/bmusb.h"
 
 // An allocator that allocates straight into OpenGL pinned memory.
@@ -37,11 +39,16 @@ public:
 		// freely at runtime.
 		bmusb::PixelFormat pixel_format;
 
+		// Used only for PixelFormat_8BitYCbCrPlanar.
+		movit::YCbCrFormat ycbcr_format;
+
 		// The second set is only used for the second field of interlaced inputs.
 		GLuint tex_y[2], tex_cbcr[2];  // For PixelFormat_8BitYCbCr.
+		GLuint tex_cb[2], tex_cr[2];  // For PixelFormat_8BitYCbCrPlanar (which also uses tex_y).
 		GLuint tex_v210[2], tex_444[2];  // For PixelFormat_10BitYCbCr.
 		GLuint tex_rgba[2];  // For PixelFormat_8BitBGRA.
 		GLuint last_width[2], last_height[2];
+		GLuint last_cbcr_width[2], last_cbcr_height[2];
 		GLuint last_v210_width[2];  // PixelFormat_10BitYCbCr.
 		bool last_interlaced, last_has_signal, last_is_connected;
 		unsigned last_frame_rate_nom, last_frame_rate_den;
diff --git a/theme.cpp b/theme.cpp
index dd56419..ce1389f 100644
--- a/theme.cpp
+++ b/theme.cpp
@@ -718,7 +718,11 @@ LiveInputWrapper::LiveInputWrapper(Theme *theme, EffectChain *chain, bmusb::Pixe
 			chain->add_effect(deinterlace_effect, reverse_inputs);
 		}
 	} else {
-		assert(pixel_format == bmusb::PixelFormat_8BitYCbCr || pixel_format == bmusb::PixelFormat_10BitYCbCr);
+		assert(pixel_format == bmusb::PixelFormat_8BitYCbCr ||
+		       pixel_format == bmusb::PixelFormat_10BitYCbCr ||
+		       pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar);
+
+		// Most of these settings will be overridden later if using PixelFormat_8BitYCbCrPlanar.
 		input_ycbcr_format.chroma_subsampling_x = (pixel_format == bmusb::PixelFormat_10BitYCbCr) ? 1 : 2;
 		input_ycbcr_format.chroma_subsampling_y = 1;
 		input_ycbcr_format.num_levels = (pixel_format == bmusb::PixelFormat_10BitYCbCr) ? 1024 : 256;
@@ -726,12 +730,19 @@ LiveInputWrapper::LiveInputWrapper(Theme *theme, EffectChain *chain, bmusb::Pixe
 		input_ycbcr_format.cr_x_position = 0.0;
 		input_ycbcr_format.cb_y_position = 0.5;
 		input_ycbcr_format.cr_y_position = 0.5;
-		input_ycbcr_format.luma_coefficients = YCBCR_REC_709;  // Will be overridden later.
-		input_ycbcr_format.full_range = false;  // Will be overridden later.
+		input_ycbcr_format.luma_coefficients = YCBCR_REC_709;  // Will be overridden later even if not planar.
+		input_ycbcr_format.full_range = false;  // Will be overridden later even if not planar.
 
 		for (unsigned i = 0; i < num_inputs; ++i) {
 			// When using 10-bit input, we're converting to interleaved through v210Converter.
-			YCbCrInputSplitting splitting = (pixel_format == bmusb::PixelFormat_10BitYCbCr) ? YCBCR_INPUT_INTERLEAVED : YCBCR_INPUT_SPLIT_Y_AND_CBCR;
+			YCbCrInputSplitting splitting;
+			if (pixel_format == bmusb::PixelFormat_10BitYCbCr) {
+				splitting = YCBCR_INPUT_INTERLEAVED;
+			} else if (pixel_format == bmusb::PixelFormat_8BitYCbCr) {
+				splitting = YCBCR_INPUT_SPLIT_Y_AND_CBCR;
+			} else {
+				splitting = YCBCR_INPUT_PLANAR;
+			}
 			if (override_bounce) {
 				ycbcr_inputs.push_back(new NonBouncingYCbCrInput(inout_format, input_ycbcr_format, global_flags.width, global_flags.height, splitting));
 			} else {
@@ -824,6 +835,14 @@ void LiveInputWrapper::connect_signal_raw(int signal_num)
 			ycbcr_inputs[i]->set_width(width);
 			ycbcr_inputs[i]->set_height(height);
 			break;
+		case bmusb::PixelFormat_8BitYCbCrPlanar:
+			ycbcr_inputs[i]->set_texture_num(0, userdata->tex_y[frame.field_number]);
+			ycbcr_inputs[i]->set_texture_num(1, userdata->tex_cb[frame.field_number]);
+			ycbcr_inputs[i]->set_texture_num(2, userdata->tex_cr[frame.field_number]);
+			ycbcr_inputs[i]->change_ycbcr_format(userdata->ycbcr_format);
+			ycbcr_inputs[i]->set_width(width);
+			ycbcr_inputs[i]->set_height(height);
+			break;
 		case bmusb::PixelFormat_10BitYCbCr:
 			ycbcr_inputs[i]->set_texture_num(0, userdata->tex_444[frame.field_number]);
 			ycbcr_inputs[i]->change_ycbcr_format(input_ycbcr_format);
-- 
2.39.2