From bd5b2de9a277b87c75d71d94bd8c5095ab14ecf7 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Mon, 13 Mar 2017 19:05:37 +0100
Subject: [PATCH] Add support for recording the x264 video to disk.

This makes recording entirely independent on Quick Sync Video
(or VA-API, if you wish). There's no way of running two separate
x264 encodes, though; you get the same as for the stream.
---
 flags.cpp                |   8 +
 flags.h                  |   1 +
 quicksync_encoder.cpp    | 313 ++++++++++++++++++++++-----------------
 quicksync_encoder.h      |  10 +-
 quicksync_encoder_impl.h |   8 +-
 video_encoder.cpp        |  28 +++-
 video_encoder.h          |   1 +
 x264_encoder.cpp         |   6 +-
 x264_encoder.h           |   5 +-
 9 files changed, 228 insertions(+), 152 deletions(-)

diff --git a/flags.cpp b/flags.cpp
index 88a2c45..62028df 100644
--- a/flags.cpp
+++ b/flags.cpp
@@ -19,6 +19,7 @@ enum LongOption {
 	OPTION_FAKE_CARDS_AUDIO,
 	OPTION_HTTP_UNCOMPRESSED_VIDEO,
 	OPTION_HTTP_X264_VIDEO,
+	OPTION_RECORD_X264_VIDEO,
 	OPTION_X264_PRESET,
 	OPTION_X264_TUNE,
 	OPTION_X264_SPEEDCONTROL,
@@ -76,6 +77,8 @@ void usage()
 	fprintf(stderr, "      --fake-cards-audio          make fake (disconnected) cards output a simple tone\n");
 	fprintf(stderr, "      --http-uncompressed-video   send uncompressed NV12 video to HTTP clients\n");
 	fprintf(stderr, "      --http-x264-video           send x264-compressed video to HTTP clients\n");
+	fprintf(stderr, "      --record-x264-video         store x264-compressed video to disk (implies --http-x264-video,\n");
+	fprintf(stderr, "                                    removes the need for working VA-API encoding)\n");
 	fprintf(stderr, "      --x264-preset               x264 quality preset (default " X264_DEFAULT_PRESET ")\n");
 	fprintf(stderr, "      --x264-tune                 x264 tuning (default " X264_DEFAULT_TUNE ", can be blank)\n");
 	fprintf(stderr, "      --x264-speedcontrol         try to match x264 preset to available CPU speed\n");
@@ -145,6 +148,7 @@ void parse_flags(int argc, char * const argv[])
 		{ "fake-cards-audio", no_argument, 0, OPTION_FAKE_CARDS_AUDIO },
 		{ "http-uncompressed-video", no_argument, 0, OPTION_HTTP_UNCOMPRESSED_VIDEO },
 		{ "http-x264-video", no_argument, 0, OPTION_HTTP_X264_VIDEO },
+		{ "record-x264-video", no_argument, 0, OPTION_RECORD_X264_VIDEO },
 		{ "x264-preset", required_argument, 0, OPTION_X264_PRESET },
 		{ "x264-tune", required_argument, 0, OPTION_X264_TUNE },
 		{ "x264-speedcontrol", no_argument, 0, OPTION_X264_SPEEDCONTROL },
@@ -261,6 +265,10 @@ void parse_flags(int argc, char * const argv[])
 		case OPTION_HTTP_X264_VIDEO:
 			global_flags.x264_video_to_http = true;
 			break;
+		case OPTION_RECORD_X264_VIDEO:
+			global_flags.x264_video_to_disk = true;
+			global_flags.x264_video_to_http = true;
+			break;
 		case OPTION_X264_PRESET:
 			global_flags.x264_preset = optarg;
 			break;
diff --git a/flags.h b/flags.h
index 78b1f1f..97d2fe6 100644
--- a/flags.h
+++ b/flags.h
@@ -14,6 +14,7 @@ struct Flags {
 	bool fake_cards_audio = false;
 	bool uncompressed_video_to_http = false;
 	bool x264_video_to_http = false;
+	bool x264_video_to_disk = false;  // Disables Quick Sync entirely. Implies x264_video_to_http == true.
 	std::vector<std::string> theme_dirs { ".", "/usr/local/share/nageru" };
 	std::string theme_filename = "theme.lua";
 	bool locut_enabled = true;
diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp
index d49a483..bd6b4c2 100644
--- a/quicksync_encoder.cpp
+++ b/quicksync_encoder.cpp
@@ -724,7 +724,10 @@ static const char *rc_to_string(int rc_mode)
 
 void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
 {
-	if (global_flags.uncompressed_video_to_http) {
+	if (global_flags.x264_video_to_disk) {
+		// Quick Sync is entirely disabled.
+		use_zerocopy = false;
+	} else if (global_flags.uncompressed_video_to_http) {
 		fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-uncompressed-video.\n");
 		use_zerocopy = false;
 	} else if (global_flags.x264_video_to_http) {
@@ -743,7 +746,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display)
 			fprintf(stderr, "error: can't connect to X server!\n");
 			return NULL;
 		}
-		enable_zerocopy_if_possible();
 		return vaGetDisplay(x11_display);
 	} else if (va_display[0] != '/') {
 		x11_display = XOpenDisplay(va_display.c_str());
@@ -751,7 +753,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display)
 			fprintf(stderr, "error: can't connect to X server!\n");
 			return NULL;
 		}
-		enable_zerocopy_if_possible();
 		return vaGetDisplay(x11_display);
 	} else {
 		drm_fd = open(va_display.c_str(), O_RDWR);
@@ -929,88 +930,88 @@ int QuickSyncEncoderImpl::init_va(const string &va_display)
 
 int QuickSyncEncoderImpl::setup_encode()
 {
-    VAStatus va_status;
-    VASurfaceID *tmp_surfaceid;
-    int codedbuf_size, i;
-    VASurfaceID src_surface[SURFACE_NUM];
-    VASurfaceID ref_surface[SURFACE_NUM];
-    
-    va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
-            &config_attrib[0], config_attrib_num, &config_id);
-    CHECK_VASTATUS(va_status, "vaCreateConfig");
-
-    /* create source surfaces */
-    va_status = vaCreateSurfaces(va_dpy,
-                                 VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
-                                 &src_surface[0], SURFACE_NUM,
-                                 NULL, 0);
-    CHECK_VASTATUS(va_status, "vaCreateSurfaces");
-
-    /* create reference surfaces */
-    va_status = vaCreateSurfaces(va_dpy,
-                                 VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
-				 &ref_surface[0], SURFACE_NUM,
-				 NULL, 0);
-    CHECK_VASTATUS(va_status, "vaCreateSurfaces");
-
-    tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
-    memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
-    memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
-    
-    /* Create a context for this encode pipe */
-    va_status = vaCreateContext(va_dpy, config_id,
-                                frame_width_mbaligned, frame_height_mbaligned,
-                                VA_PROGRESSIVE,
-                                tmp_surfaceid, 2 * SURFACE_NUM,
-                                &context_id);
-    CHECK_VASTATUS(va_status, "vaCreateContext");
-    free(tmp_surfaceid);
-
-    codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16);
-
-    for (i = 0; i < SURFACE_NUM; i++) {
-        /* create coded buffer once for all
-         * other VA buffers which won't be used again after vaRenderPicture.
-         * so APP can always vaCreateBuffer for every frame
-         * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
-         * so VA won't maintain the coded buffer
-         */
-        va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType,
-                codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf);
-        CHECK_VASTATUS(va_status, "vaCreateBuffer");
-    }
+	if (!global_flags.x264_video_to_disk) {
+		VAStatus va_status;
+		VASurfaceID *tmp_surfaceid;
+		int codedbuf_size;
+		VASurfaceID src_surface[SURFACE_NUM];
+		VASurfaceID ref_surface[SURFACE_NUM];
+
+		va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
+				&config_attrib[0], config_attrib_num, &config_id);
+		CHECK_VASTATUS(va_status, "vaCreateConfig");
+
+		/* create source surfaces */
+		va_status = vaCreateSurfaces(va_dpy,
+				VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
+				&src_surface[0], SURFACE_NUM,
+				NULL, 0);
+		CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+		/* create reference surfaces */
+		va_status = vaCreateSurfaces(va_dpy,
+				VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
+				&ref_surface[0], SURFACE_NUM,
+				NULL, 0);
+		CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+		tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
+		memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
+		memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
+
+		for (int i = 0; i < SURFACE_NUM; i++) {
+			gl_surfaces[i].src_surface = src_surface[i];
+			gl_surfaces[i].ref_surface = ref_surface[i];
+		}
 
-    /* create OpenGL objects */
-    //glGenFramebuffers(SURFACE_NUM, fbos);
-    
-    for (i = 0; i < SURFACE_NUM; i++) {
-        if (use_zerocopy) {
-            gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
-            gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
-        } else {
-            gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
-            gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
-
-            // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
-            // buffers, due to potentially differing pitch.
-            glGenBuffers(1, &gl_surfaces[i].pbo);
-            glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
-            glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
-            uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
-            gl_surfaces[i].y_offset = 0;
-            gl_surfaces[i].cbcr_offset = frame_width * frame_height;
-            gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
-            gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
-            glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-        }
-    }
+		/* Create a context for this encode pipe */
+		va_status = vaCreateContext(va_dpy, config_id,
+				frame_width_mbaligned, frame_height_mbaligned,
+				VA_PROGRESSIVE,
+				tmp_surfaceid, 2 * SURFACE_NUM,
+				&context_id);
+		CHECK_VASTATUS(va_status, "vaCreateContext");
+		free(tmp_surfaceid);
+
+		codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16);
+
+		for (int i = 0; i < SURFACE_NUM; i++) {
+			/* create coded buffer once for all
+			 * other VA buffers which won't be used again after vaRenderPicture.
+			 * so APP can always vaCreateBuffer for every frame
+			 * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
+			 * so VA won't maintain the coded buffer
+			 */
+			va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType,
+					codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf);
+			CHECK_VASTATUS(va_status, "vaCreateBuffer");
+		}
+	}
 
-    for (i = 0; i < SURFACE_NUM; i++) {
-        gl_surfaces[i].src_surface = src_surface[i];
-        gl_surfaces[i].ref_surface = ref_surface[i];
-    }
-    
-    return 0;
+	/* create OpenGL objects */
+	for (int i = 0; i < SURFACE_NUM; i++) {
+		if (use_zerocopy) {
+			gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
+			gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
+		} else {
+			gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
+			gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
+
+			// Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
+			// buffers, due to potentially differing pitch.
+			glGenBuffers(1, &gl_surfaces[i].pbo);
+			glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
+			glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
+			uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+			gl_surfaces[i].y_offset = 0;
+			gl_surfaces[i].cbcr_offset = frame_width * frame_height;
+			gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
+			gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
+			glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+		}
+	}
+
+	return 0;
 }
 
 // Given a list like 1 9 3 0 2 8 4 and a pivot element 3, will produce
@@ -1541,23 +1542,27 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource
 
 	//print_input();
 
-	if (global_flags.x264_video_to_http) {
+	if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
 		assert(x264_encoder != nullptr);
 	} else {
 		assert(x264_encoder == nullptr);
 	}
 
-	init_va(va_display);
+	enable_zerocopy_if_possible();
+	if (!global_flags.x264_video_to_disk) {
+		init_va(va_display);
+	}
 	setup_encode();
 
-	memset(&seq_param, 0, sizeof(seq_param));
-	memset(&pic_param, 0, sizeof(pic_param));
-	memset(&slice_param, 0, sizeof(slice_param));
+	if (!global_flags.x264_video_to_disk) {
+		memset(&seq_param, 0, sizeof(seq_param));
+		memset(&pic_param, 0, sizeof(pic_param));
+		memset(&slice_param, 0, sizeof(slice_param));
+	}
 
 	storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this);
 
 	encode_thread = thread([this]{
-		//SDL_GL_MakeCurrent(window, context);
 		QOpenGLContext *context = create_context(this->surface);
 		eglBindAPI(EGL_OPENGL_API);
 		if (!make_current(context, this->surface)) {
@@ -1624,52 +1629,54 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC
 	*y_tex = surf->y_tex;
 	*cbcr_tex = surf->cbcr_tex;
 
-	VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
-	CHECK_VASTATUS(va_status, "vaDeriveImage");
-
-	if (use_zerocopy) {
-		VABufferInfo buf_info;
-		buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;  // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM?
-		va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info);
-		CHECK_VASTATUS(va_status, "vaAcquireBufferHandle");
-
-		// Create Y image.
-		surf->y_egl_image = EGL_NO_IMAGE_KHR;
-		EGLint y_attribs[] = {
-			EGL_WIDTH, frame_width,
-			EGL_HEIGHT, frame_height,
-			EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '),
-			EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
-			EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]),
-			EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]),
-			EGL_NONE
-		};
-
-		surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs);
-		assert(surf->y_egl_image != EGL_NO_IMAGE_KHR);
-
-		// Associate Y image to a texture.
-		glBindTexture(GL_TEXTURE_2D, *y_tex);
-		glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image);
-
-		// Create CbCr image.
-		surf->cbcr_egl_image = EGL_NO_IMAGE_KHR;
-		EGLint cbcr_attribs[] = {
-			EGL_WIDTH, frame_width,
-			EGL_HEIGHT, frame_height,
-			EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'),
-			EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
-			EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]),
-			EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]),
-			EGL_NONE
-		};
-
-		surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs);
-		assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR);
-
-		// Associate CbCr image to a texture.
-		glBindTexture(GL_TEXTURE_2D, *cbcr_tex);
-		glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image);
+	if (!global_flags.x264_video_to_disk) {
+		VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
+		CHECK_VASTATUS(va_status, "vaDeriveImage");
+
+		if (use_zerocopy) {
+			VABufferInfo buf_info;
+			buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;  // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM?
+			va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info);
+			CHECK_VASTATUS(va_status, "vaAcquireBufferHandle");
+
+			// Create Y image.
+			surf->y_egl_image = EGL_NO_IMAGE_KHR;
+			EGLint y_attribs[] = {
+				EGL_WIDTH, frame_width,
+				EGL_HEIGHT, frame_height,
+				EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '),
+				EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
+				EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]),
+				EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]),
+				EGL_NONE
+			};
+
+			surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs);
+			assert(surf->y_egl_image != EGL_NO_IMAGE_KHR);
+
+			// Associate Y image to a texture.
+			glBindTexture(GL_TEXTURE_2D, *y_tex);
+			glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image);
+
+			// Create CbCr image.
+			surf->cbcr_egl_image = EGL_NO_IMAGE_KHR;
+			EGLint cbcr_attribs[] = {
+				EGL_WIDTH, frame_width,
+				EGL_HEIGHT, frame_height,
+				EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'),
+				EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
+				EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]),
+				EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]),
+				EGL_NONE
+			};
+
+			surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs);
+			assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR);
+
+			// Associate CbCr image to a texture.
+			glBindTexture(GL_TEXTURE_2D, *cbcr_tex);
+			glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image);
+		}
 	}
 
 	current_video_frame = PendingFrame{ {}, input_frames, pts, duration, ycbcr_coefficients };
@@ -1758,12 +1765,18 @@ void QuickSyncEncoderImpl::shutdown()
 	// Encode any leftover audio in the queues, and also any delayed frames.
 	file_audio_encoder->encode_last_audio();
 
-	release_encode();
-	deinit_va();
-	file_mux.reset();
+	if (!global_flags.x264_video_to_disk) {
+		release_encode();
+		deinit_va();
+	}
 	is_shutdown = true;
 }
 
+void QuickSyncEncoderImpl::close_file()
+{
+	file_mux.reset();
+}
+
 void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
 {
 	AVFormatContext *avctx = avformat_alloc_context();
@@ -1779,10 +1792,18 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
 		exit(1);
 	}
 
-	string video_extradata = "";  // FIXME: See other comment about global headers.
+	string video_extradata;  // FIXME: See other comment about global headers.
+	if (global_flags.x264_video_to_disk) {
+		video_extradata = x264_encoder->get_global_headers();
+	}
+
 	AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
 	file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
 		std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1)));
+
+	if (global_flags.x264_video_to_disk) {
+		x264_encoder->add_mux(file_mux.get());
+	}
 }
 
 void QuickSyncEncoderImpl::encode_thread_func()
@@ -1818,6 +1839,13 @@ void QuickSyncEncoderImpl::encode_thread_func()
 		// Pass the frame on to x264 (or uncompressed to HTTP) as needed.
 		// Note that this implicitly waits for the frame to be done rendering.
 		pass_frame(frame, display_frame_num, frame.pts, frame.duration);
+
+		if (global_flags.x264_video_to_disk) {
+			unique_lock<mutex> lock(storage_task_queue_mutex);
+			release_gl_surface(display_frame_num);
+			continue;
+		}
+
 		reorder_buffer[display_frame_num] = move(frame);
 
 		// Now encode as many QuickSync frames as we can using the frames we have available.
@@ -1933,7 +1961,7 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame,
 	uint8_t *data = reinterpret_cast<uint8_t *>(surf->y_ptr);
 	if (global_flags.uncompressed_video_to_http) {
 		add_packet_for_uncompressed_frame(pts, duration, data);
-	} else if (global_flags.x264_video_to_http) {
+	} else if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
 		x264_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
 	}
 }
@@ -2058,6 +2086,11 @@ void QuickSyncEncoder::shutdown()
 	impl->shutdown();
 }
 
+void QuickSyncEncoder::close_file()
+{
+	impl->shutdown();
+}
+
 void QuickSyncEncoder::set_stream_mux(Mux *mux)
 {
 	impl->set_stream_mux(mux);
diff --git a/quicksync_encoder.h b/quicksync_encoder.h
index a247ee8..eebabbd 100644
--- a/quicksync_encoder.h
+++ b/quicksync_encoder.h
@@ -1,5 +1,10 @@
-// Hardware H.264 encoding via VAAPI. Heavily modified based on example
-// code by Intel. Intel's original copyright and license is reproduced below:
+// Hardware H.264 encoding via VAAPI. Also orchestrates the H.264 encoding
+// in general; this is unfortunate, and probably needs a cleanup. In particular,
+// even if you don't actually use Quick Sync for anything, this class
+// (or actually, QuickSyncEncoderImpl) still takes on a pretty central role.
+//
+// Heavily modified based on example code by Intel. Intel's original copyright
+// and license is reproduced below:
 //
 // Copyright (c) 2007-2013 Intel Corporation. All Rights Reserved.
 //
@@ -64,6 +69,7 @@ public:
 	bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex);
 	RefCountedGLsync end_frame();
 	void shutdown();  // Blocking. Does not require an OpenGL context.
+	void close_file();  // Does not require an OpenGL context. Must be run after shutdown.
 	void release_gl_resources();  // Requires an OpenGL context. Must be run after shutdown.
 	int64_t global_delay() const;  // So we never get negative dts.
 
diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h
index 679f2a2..1846132 100644
--- a/quicksync_encoder_impl.h
+++ b/quicksync_encoder_impl.h
@@ -39,6 +39,7 @@ public:
 	bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex);
 	RefCountedGLsync end_frame();
 	void shutdown();
+	void close_file();
 	void release_gl_resources();
 	void set_stream_mux(Mux *mux)
 	{
@@ -67,13 +68,14 @@ private:
 		movit::YCbCrLumaCoefficients ycbcr_coefficients;
 	};
 	struct GLSurface {
+		GLuint y_tex, cbcr_tex;
+
+		// Only if x264_video_to_disk == false.
 		VASurfaceID src_surface, ref_surface;
 		VABufferID coded_buf;
-
 		VAImage surface_image;
-		GLuint y_tex, cbcr_tex;
 
-		// Only if use_zerocopy == true.
+		// Only if use_zerocopy == true (which implies x264_video_to_disk == false).
 		EGLImage y_egl_image, cbcr_egl_image;
 
 		// Only if use_zerocopy == false.
diff --git a/video_encoder.cpp b/video_encoder.cpp
index fe0ecb1..b7e36bd 100644
--- a/video_encoder.cpp
+++ b/video_encoder.cpp
@@ -57,7 +57,7 @@ VideoEncoder::VideoEncoder(ResourcePool *resource_pool, QSurface *surface, const
 	} else {
 		stream_audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
 	}
-	if (global_flags.x264_video_to_http) {
+	if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
 		x264_encoder.reset(new X264Encoder(oformat));
 	}
 
@@ -68,12 +68,15 @@ VideoEncoder::VideoEncoder(ResourcePool *resource_pool, QSurface *surface, const
 	stream_audio_encoder->add_mux(stream_mux.get());
 	quicksync_encoder->set_stream_mux(stream_mux.get());
 	if (global_flags.x264_video_to_http) {
-		x264_encoder->set_mux(stream_mux.get());
+		x264_encoder->add_mux(stream_mux.get());
 	}
 }
 
 VideoEncoder::~VideoEncoder()
 {
+	quicksync_encoder->shutdown();
+	x264_encoder.reset(nullptr);
+	quicksync_encoder->close_file();
 	quicksync_encoder.reset(nullptr);
 	while (quicksync_encoders_in_shutdown.load() > 0) {
 		usleep(10000);
@@ -94,8 +97,14 @@ void VideoEncoder::do_cut(int frame)
 	stream_mux->plug();
 	lock_guard<mutex> lock(qs_mu);
 	QuickSyncEncoder *old_encoder = quicksync_encoder.release();  // When we go C++14, we can use move capture instead.
-	thread([old_encoder, this]{
+	X264Encoder *old_x264_encoder = nullptr;
+	if (global_flags.x264_video_to_disk) {
+		old_x264_encoder = x264_encoder.release();
+	}
+	thread([old_encoder, old_x264_encoder, this]{
 		old_encoder->shutdown();
+		delete old_x264_encoder;
+		old_encoder->close_file();
 		stream_mux->unplug();
 
 		// We cannot delete the encoder here, as this thread has no OpenGL context.
@@ -104,12 +113,23 @@ void VideoEncoder::do_cut(int frame)
 		qs_needing_cleanup.emplace_back(old_encoder);
 	}).detach();
 
+	if (global_flags.x264_video_to_disk) {
+		x264_encoder.reset(new X264Encoder(oformat));
+		if (global_flags.x264_video_to_http) {
+			x264_encoder->add_mux(stream_mux.get());
+		}
+		if (overriding_bitrate != 0) {
+			x264_encoder->change_bitrate(overriding_bitrate);
+		}
+	}
+
 	quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder.get(), disk_space_estimator));
 	quicksync_encoder->set_stream_mux(stream_mux.get());
 }
 
 void VideoEncoder::change_x264_bitrate(unsigned rate_kbit)
 {
+	overriding_bitrate = rate_kbit;
 	x264_encoder->change_bitrate(rate_kbit);
 }
 
@@ -153,7 +173,7 @@ void VideoEncoder::open_output_stream()
 	avctx->flags = AVFMT_FLAG_CUSTOM_IO;
 
 	string video_extradata;
-	if (global_flags.x264_video_to_http) {
+	if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
 		video_extradata = x264_encoder->get_global_headers();
 	}
 
diff --git a/video_encoder.h b/video_encoder.h
index 8578462..e1518ae 100644
--- a/video_encoder.h
+++ b/video_encoder.h
@@ -83,6 +83,7 @@ private:
 	std::string stream_mux_header;
 
 	std::atomic<int> quicksync_encoders_in_shutdown{0};
+	std::atomic<int> overriding_bitrate{0};
 
 	// Encoders that are shutdown, but need to call release_gl_resources()
 	// (or be deleted) from some thread with an OpenGL context.
diff --git a/x264_encoder.cpp b/x264_encoder.cpp
index 7d81d55..8e3b567 100644
--- a/x264_encoder.cpp
+++ b/x264_encoder.cpp
@@ -61,6 +61,8 @@ X264Encoder::~X264Encoder()
 
 void X264Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const uint8_t *data, const ReceivedTimestamps &received_ts)
 {
+	assert(!should_quit);
+
 	QueuedFrame qf;
 	qf.pts = pts;
 	qf.duration = duration;
@@ -354,5 +356,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
 	}
 	pkt.duration = reinterpret_cast<intptr_t>(pic.opaque);
 
-	mux->add_packet(pkt, pic.i_pts, pic.i_dts);
+	for (Mux *mux : muxes) {
+		mux->add_packet(pkt, pic.i_pts, pic.i_dts);
+	}
 }
diff --git a/x264_encoder.h b/x264_encoder.h
index 9b6f74d..34cf702 100644
--- a/x264_encoder.h
+++ b/x264_encoder.h
@@ -24,6 +24,7 @@
 #include <string>
 #include <thread>
 #include <unordered_map>
+#include <vector>
 
 extern "C" {
 #include <libavformat/avformat.h>
@@ -45,7 +46,7 @@ public:
 	~X264Encoder();
 
 	// Must be called before first frame. Does not take ownership.
-	void set_mux(Mux *mux) { this->mux = mux; }
+	void add_mux(Mux *mux) { muxes.push_back(mux); }
 
 	// <data> is taken to be raw NV12 data of WIDTHxHEIGHT resolution.
 	// Does not block.
@@ -78,7 +79,7 @@ private:
 	// pool.
 	std::unique_ptr<uint8_t[]> frame_pool;
 
-	Mux *mux = nullptr;
+	std::vector<Mux *> muxes;
 	bool wants_global_headers;
 
 	std::string global_headers;
-- 
2.39.2