X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=quicksync_encoder.cpp;h=bd6b4c2edbb7097360ee02d6915abea7c599bddd;hb=bd5b2de9a277b87c75d71d94bd8c5095ab14ecf7;hp=2e8633d3cf72bc55d7fdd84f057bb50a0c7c1683;hpb=e066f18188fde1e6bd0b698c89427119cbffaaa3;p=nageru

diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp
index 2e8633d..bd6b4c2 100644
--- a/quicksync_encoder.cpp
+++ b/quicksync_encoder.cpp
@@ -1,5 +1,6 @@
 #include "quicksync_encoder.h"
 
+#include <movit/image_format.h>
 #include <movit/resource_pool.h>  // Must be above the Xlib includes.
 #include <movit/util.h>
 
@@ -55,6 +56,7 @@ extern "C" {
 #include "timebase.h"
 #include "x264_encoder.h"
 
+using namespace movit;
 using namespace std;
 using namespace std::chrono;
 using namespace std::placeholders;
@@ -259,7 +261,7 @@ static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type)
     bitstream_put_ui(bs, nal_unit_type, 5);
 }
 
-void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs)
+void QuickSyncEncoderImpl::sps_rbsp(YCbCrLumaCoefficients ycbcr_coefficients, bitstream *bs)
 {
     int profile_idc = PROFILE_IDC_BASELINE;
 
@@ -331,9 +333,10 @@ void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs)
             {
                 bitstream_put_ui(bs, 1, 8);  /* colour_primaries (1 = BT.709) */
                 bitstream_put_ui(bs, 2, 8);  /* transfer_characteristics (2 = unspecified, since we use sRGB) */
-                if (global_flags.ycbcr_rec709_coefficients) {
+                if (ycbcr_coefficients == YCBCR_REC_709) {
                     bitstream_put_ui(bs, 1, 8);  /* matrix_coefficients (1 = BT.709) */
                 } else {
+                    assert(ycbcr_coefficients == YCBCR_REC_601);
                     bitstream_put_ui(bs, 6, 8);  /* matrix_coefficients (6 = BT.601/SMPTE 170M) */
                 }
             }
@@ -515,14 +518,14 @@ int QuickSyncEncoderImpl::build_packed_pic_buffer(unsigned char **header_buffer)
 }
 
 int
-QuickSyncEncoderImpl::build_packed_seq_buffer(unsigned char **header_buffer)
+QuickSyncEncoderImpl::build_packed_seq_buffer(YCbCrLumaCoefficients ycbcr_coefficients, unsigned char **header_buffer)
 {
     bitstream bs;
 
     bitstream_start(&bs);
     nal_start_code_prefix(&bs);
     nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS);
-    sps_rbsp(&bs);
+    sps_rbsp(ycbcr_coefficients, &bs);
     bitstream_end(&bs);
 
     *header_buffer = (unsigned char *)bs.buffer;
@@ -721,7 +724,10 @@ static const char *rc_to_string(int rc_mode)
 
 void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
 {
-	if (global_flags.uncompressed_video_to_http) {
+	if (global_flags.x264_video_to_disk) {
+		// Quick Sync is entirely disabled.
+		use_zerocopy = false;
+	} else if (global_flags.uncompressed_video_to_http) {
 		fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-uncompressed-video.\n");
 		use_zerocopy = false;
 	} else if (global_flags.x264_video_to_http) {
@@ -740,7 +746,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display)
 			fprintf(stderr, "error: can't connect to X server!\n");
 			return NULL;
 		}
-		enable_zerocopy_if_possible();
 		return vaGetDisplay(x11_display);
 	} else if (va_display[0] != '/') {
 		x11_display = XOpenDisplay(va_display.c_str());
@@ -748,7 +753,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display)
 			fprintf(stderr, "error: can't connect to X server!\n");
 			return NULL;
 		}
-		enable_zerocopy_if_possible();
 		return vaGetDisplay(x11_display);
 	} else {
 		drm_fd = open(va_display.c_str(), O_RDWR);
@@ -926,88 +930,88 @@ int QuickSyncEncoderImpl::init_va(const string &va_display)
 
 int QuickSyncEncoderImpl::setup_encode()
 {
-    VAStatus va_status;
-    VASurfaceID *tmp_surfaceid;
-    int codedbuf_size, i;
-    VASurfaceID src_surface[SURFACE_NUM];
-    VASurfaceID ref_surface[SURFACE_NUM];
-    
-    va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
-            &config_attrib[0], config_attrib_num, &config_id);
-    CHECK_VASTATUS(va_status, "vaCreateConfig");
-
-    /* create source surfaces */
-    va_status = vaCreateSurfaces(va_dpy,
-                                 VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
-                                 &src_surface[0], SURFACE_NUM,
-                                 NULL, 0);
-    CHECK_VASTATUS(va_status, "vaCreateSurfaces");
-
-    /* create reference surfaces */
-    va_status = vaCreateSurfaces(va_dpy,
-                                 VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
-				 &ref_surface[0], SURFACE_NUM,
-				 NULL, 0);
-    CHECK_VASTATUS(va_status, "vaCreateSurfaces");
-
-    tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
-    memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
-    memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
-    
-    /* Create a context for this encode pipe */
-    va_status = vaCreateContext(va_dpy, config_id,
-                                frame_width_mbaligned, frame_height_mbaligned,
-                                VA_PROGRESSIVE,
-                                tmp_surfaceid, 2 * SURFACE_NUM,
-                                &context_id);
-    CHECK_VASTATUS(va_status, "vaCreateContext");
-    free(tmp_surfaceid);
-
-    codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16);
-
-    for (i = 0; i < SURFACE_NUM; i++) {
-        /* create coded buffer once for all
-         * other VA buffers which won't be used again after vaRenderPicture.
-         * so APP can always vaCreateBuffer for every frame
-         * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
-         * so VA won't maintain the coded buffer
-         */
-        va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType,
-                codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf);
-        CHECK_VASTATUS(va_status, "vaCreateBuffer");
-    }
+	if (!global_flags.x264_video_to_disk) {
+		VAStatus va_status;
+		VASurfaceID *tmp_surfaceid;
+		int codedbuf_size;
+		VASurfaceID src_surface[SURFACE_NUM];
+		VASurfaceID ref_surface[SURFACE_NUM];
+
+		va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
+				&config_attrib[0], config_attrib_num, &config_id);
+		CHECK_VASTATUS(va_status, "vaCreateConfig");
+
+		/* create source surfaces */
+		va_status = vaCreateSurfaces(va_dpy,
+				VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
+				&src_surface[0], SURFACE_NUM,
+				NULL, 0);
+		CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+		/* create reference surfaces */
+		va_status = vaCreateSurfaces(va_dpy,
+				VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
+				&ref_surface[0], SURFACE_NUM,
+				NULL, 0);
+		CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+		tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
+		memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
+		memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
+
+		for (int i = 0; i < SURFACE_NUM; i++) {
+			gl_surfaces[i].src_surface = src_surface[i];
+			gl_surfaces[i].ref_surface = ref_surface[i];
+		}
 
-    /* create OpenGL objects */
-    //glGenFramebuffers(SURFACE_NUM, fbos);
-    
-    for (i = 0; i < SURFACE_NUM; i++) {
-        if (use_zerocopy) {
-            gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
-            gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
-        } else {
-            gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
-            gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
-
-            // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
-            // buffers, due to potentially differing pitch.
-            glGenBuffers(1, &gl_surfaces[i].pbo);
-            glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
-            glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
-            uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
-            gl_surfaces[i].y_offset = 0;
-            gl_surfaces[i].cbcr_offset = frame_width * frame_height;
-            gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
-            gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
-            glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-        }
-    }
+		/* Create a context for this encode pipe */
+		va_status = vaCreateContext(va_dpy, config_id,
+				frame_width_mbaligned, frame_height_mbaligned,
+				VA_PROGRESSIVE,
+				tmp_surfaceid, 2 * SURFACE_NUM,
+				&context_id);
+		CHECK_VASTATUS(va_status, "vaCreateContext");
+		free(tmp_surfaceid);
+
+		codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16);
+
+		for (int i = 0; i < SURFACE_NUM; i++) {
+			/* create coded buffer once for all
+			 * other VA buffers which won't be used again after vaRenderPicture.
+			 * so APP can always vaCreateBuffer for every frame
+			 * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
+			 * so VA won't maintain the coded buffer
+			 */
+			va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType,
+					codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf);
+			CHECK_VASTATUS(va_status, "vaCreateBuffer");
+		}
+	}
 
-    for (i = 0; i < SURFACE_NUM; i++) {
-        gl_surfaces[i].src_surface = src_surface[i];
-        gl_surfaces[i].ref_surface = ref_surface[i];
-    }
-    
-    return 0;
+	/* create OpenGL objects */
+	for (int i = 0; i < SURFACE_NUM; i++) {
+		if (use_zerocopy) {
+			gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
+			gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
+		} else {
+			gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
+			gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
+
+			// Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
+			// buffers, due to potentially differing pitch.
+			glGenBuffers(1, &gl_surfaces[i].pbo);
+			glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
+			glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
+			uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+			gl_surfaces[i].y_offset = 0;
+			gl_surfaces[i].cbcr_offset = frame_width * frame_height;
+			gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
+			gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
+			glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+		}
+	}
+
+	return 0;
 }
 
 // Given a list like 1 9 3 0 2 8 4 and a pivot element 3, will produce
@@ -1220,7 +1224,7 @@ int QuickSyncEncoderImpl::render_picture(GLSurface *surf, int frame_type, int di
     return 0;
 }
 
-int QuickSyncEncoderImpl::render_packedsequence()
+int QuickSyncEncoderImpl::render_packedsequence(YCbCrLumaCoefficients ycbcr_coefficients)
 {
     VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
     VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2];
@@ -1228,7 +1232,7 @@ int QuickSyncEncoderImpl::render_packedsequence()
     unsigned char *packedseq_buffer = NULL;
     VAStatus va_status;
 
-    length_in_bits = build_packed_seq_buffer(&packedseq_buffer); 
+    length_in_bits = build_packed_seq_buffer(ycbcr_coefficients, &packedseq_buffer); 
     
     packedheader_param_buffer.type = VAEncPackedHeaderSequence;
     
@@ -1526,7 +1530,7 @@ int QuickSyncEncoderImpl::deinit_va()
     return 0;
 }
 
-QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
 	: current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
 {
 	file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
@@ -1538,23 +1542,27 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::R
 
 	//print_input();
 
-	if (global_flags.x264_video_to_http) {
+	if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
 		assert(x264_encoder != nullptr);
 	} else {
 		assert(x264_encoder == nullptr);
 	}
 
-	init_va(va_display);
+	enable_zerocopy_if_possible();
+	if (!global_flags.x264_video_to_disk) {
+		init_va(va_display);
+	}
 	setup_encode();
 
-	memset(&seq_param, 0, sizeof(seq_param));
-	memset(&pic_param, 0, sizeof(pic_param));
-	memset(&slice_param, 0, sizeof(slice_param));
+	if (!global_flags.x264_video_to_disk) {
+		memset(&seq_param, 0, sizeof(seq_param));
+		memset(&pic_param, 0, sizeof(pic_param));
+		memset(&slice_param, 0, sizeof(slice_param));
+	}
 
 	storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this);
 
 	encode_thread = thread([this]{
-		//SDL_GL_MakeCurrent(window, context);
 		QOpenGLContext *context = create_context(this->surface);
 		eglBindAPI(EGL_OPENGL_API);
 		if (!make_current(context, this->surface)) {
@@ -1595,7 +1603,7 @@ void QuickSyncEncoderImpl::release_gl_surface(size_t display_frame_num)
 	}
 }
 
-bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
+bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
 {
 	assert(!is_shutdown);
 	GLSurface *surf = nullptr;
@@ -1621,55 +1629,57 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vect
 	*y_tex = surf->y_tex;
 	*cbcr_tex = surf->cbcr_tex;
 
-	VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
-	CHECK_VASTATUS(va_status, "vaDeriveImage");
-
-	if (use_zerocopy) {
-		VABufferInfo buf_info;
-		buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;  // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM?
-		va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info);
-		CHECK_VASTATUS(va_status, "vaAcquireBufferHandle");
-
-		// Create Y image.
-		surf->y_egl_image = EGL_NO_IMAGE_KHR;
-		EGLint y_attribs[] = {
-			EGL_WIDTH, frame_width,
-			EGL_HEIGHT, frame_height,
-			EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '),
-			EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
-			EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]),
-			EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]),
-			EGL_NONE
-		};
-
-		surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs);
-		assert(surf->y_egl_image != EGL_NO_IMAGE_KHR);
-
-		// Associate Y image to a texture.
-		glBindTexture(GL_TEXTURE_2D, *y_tex);
-		glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image);
-
-		// Create CbCr image.
-		surf->cbcr_egl_image = EGL_NO_IMAGE_KHR;
-		EGLint cbcr_attribs[] = {
-			EGL_WIDTH, frame_width,
-			EGL_HEIGHT, frame_height,
-			EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'),
-			EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
-			EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]),
-			EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]),
-			EGL_NONE
-		};
-
-		surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs);
-		assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR);
-
-		// Associate CbCr image to a texture.
-		glBindTexture(GL_TEXTURE_2D, *cbcr_tex);
-		glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image);
+	if (!global_flags.x264_video_to_disk) {
+		VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
+		CHECK_VASTATUS(va_status, "vaDeriveImage");
+
+		if (use_zerocopy) {
+			VABufferInfo buf_info;
+			buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;  // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM?
+			va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info);
+			CHECK_VASTATUS(va_status, "vaAcquireBufferHandle");
+
+			// Create Y image.
+			surf->y_egl_image = EGL_NO_IMAGE_KHR;
+			EGLint y_attribs[] = {
+				EGL_WIDTH, frame_width,
+				EGL_HEIGHT, frame_height,
+				EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '),
+				EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
+				EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]),
+				EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]),
+				EGL_NONE
+			};
+
+			surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs);
+			assert(surf->y_egl_image != EGL_NO_IMAGE_KHR);
+
+			// Associate Y image to a texture.
+			glBindTexture(GL_TEXTURE_2D, *y_tex);
+			glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image);
+
+			// Create CbCr image.
+			surf->cbcr_egl_image = EGL_NO_IMAGE_KHR;
+			EGLint cbcr_attribs[] = {
+				EGL_WIDTH, frame_width,
+				EGL_HEIGHT, frame_height,
+				EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'),
+				EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
+				EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]),
+				EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]),
+				EGL_NONE
+			};
+
+			surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs);
+			assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR);
+
+			// Associate CbCr image to a texture.
+			glBindTexture(GL_TEXTURE_2D, *cbcr_tex);
+			glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image);
+		}
 	}
 
-	current_video_frame = PendingFrame{ {}, input_frames, pts, duration };
+	current_video_frame = PendingFrame{ {}, input_frames, pts, duration, ycbcr_coefficients };
 
 	return true;
 }
@@ -1755,12 +1765,18 @@ void QuickSyncEncoderImpl::shutdown()
 	// Encode any leftover audio in the queues, and also any delayed frames.
 	file_audio_encoder->encode_last_audio();
 
-	release_encode();
-	deinit_va();
-	file_mux.reset();
+	if (!global_flags.x264_video_to_disk) {
+		release_encode();
+		deinit_va();
+	}
 	is_shutdown = true;
 }
 
+void QuickSyncEncoderImpl::close_file()
+{
+	file_mux.reset();
+}
+
 void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
 {
 	AVFormatContext *avctx = avformat_alloc_context();
@@ -1776,10 +1792,18 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
 		exit(1);
 	}
 
-	string video_extradata = "";  // FIXME: See other comment about global headers.
+	string video_extradata;  // FIXME: See other comment about global headers.
+	if (global_flags.x264_video_to_disk) {
+		video_extradata = x264_encoder->get_global_headers();
+	}
+
 	AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
 	file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
 		std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1)));
+
+	if (global_flags.x264_video_to_disk) {
+		x264_encoder->add_mux(file_mux.get());
+	}
 }
 
 void QuickSyncEncoderImpl::encode_thread_func()
@@ -1815,6 +1839,13 @@ void QuickSyncEncoderImpl::encode_thread_func()
 		// Pass the frame on to x264 (or uncompressed to HTTP) as needed.
 		// Note that this implicitly waits for the frame to be done rendering.
 		pass_frame(frame, display_frame_num, frame.pts, frame.duration);
+
+		if (global_flags.x264_video_to_disk) {
+			unique_lock<mutex> lock(storage_task_queue_mutex);
+			release_gl_surface(display_frame_num);
+			continue;
+		}
+
 		reorder_buffer[display_frame_num] = move(frame);
 
 		// Now encode as many QuickSync frames as we can using the frames we have available.
@@ -1850,7 +1881,7 @@ void QuickSyncEncoderImpl::encode_thread_func()
 			}
 			last_dts = dts;
 
-			encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration);
+			encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
 			++quicksync_encoding_frame_num;
 		}
 	}
@@ -1868,7 +1899,7 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num,
 		PendingFrame frame = move(pending_frame.second);
 		int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
 		printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
-		encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration);
+		encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
 		last_dts = dts;
 	}
 }
@@ -1930,13 +1961,13 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame,
 	uint8_t *data = reinterpret_cast<uint8_t *>(surf->y_ptr);
 	if (global_flags.uncompressed_video_to_http) {
 		add_packet_for_uncompressed_frame(pts, duration, data);
-	} else if (global_flags.x264_video_to_http) {
-		x264_encoder->add_frame(pts, duration, data, received_ts);
+	} else if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
+		x264_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
 	}
 }
 
 void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
-                                        int frame_type, int64_t pts, int64_t dts, int64_t duration)
+                                        int frame_type, int64_t pts, int64_t dts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients)
 {
 	const ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames);
 
@@ -1980,10 +2011,14 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
 		// FIXME: If the mux wants global headers, we should not put the
 		// SPS/PPS before each IDR frame, but rather put it into the
 		// codec extradata (formatted differently?).
+		//
+		// NOTE: If we change ycbcr_coefficients, it will not take effect
+		// before the next IDR frame. This is acceptable, as it should only
+		// happen on a mode change, which is rare.
 		render_sequence();
 		render_picture(surf, frame_type, display_frame_num, gop_start_display_frame_num);
 		if (h264_packedheader) {
-			render_packedsequence();
+			render_packedsequence(ycbcr_coefficients);
 			render_packedpicture();
 		}
 	} else {
@@ -2018,13 +2053,14 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
 	tmp.pts = pts;
 	tmp.dts = dts;
 	tmp.duration = duration;
+	tmp.ycbcr_coefficients = ycbcr_coefficients;
 	tmp.received_ts = received_ts;
 	tmp.ref_display_frame_numbers = move(ref_display_frame_numbers);
 	storage_task_enqueue(move(tmp));
 }
 
 // Proxy object.
-QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
 	: impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {}
 
 // Must be defined here because unique_ptr<> destructor needs to know the impl.
@@ -2035,9 +2071,9 @@ void QuickSyncEncoder::add_audio(int64_t pts, vector<float> audio)
 	impl->add_audio(pts, audio);
 }
 
-bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
+bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
 {
-	return impl->begin_frame(pts, duration, input_frames, y_tex, cbcr_tex);
+	return impl->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex);
 }
 
 RefCountedGLsync QuickSyncEncoder::end_frame()
@@ -2050,6 +2086,11 @@ void QuickSyncEncoder::shutdown()
 	impl->shutdown();
 }
 
+void QuickSyncEncoder::close_file()
+{
+	impl->shutdown();
+}
+
 void QuickSyncEncoder::set_stream_mux(Mux *mux)
 {
 	impl->set_stream_mux(mux);