]> git.sesse.net Git - nageru/blobdiff - quicksync_encoder.cpp
Rewrite the ALSA sequencer input loop.
[nageru] / quicksync_encoder.cpp
index acf6123c2ca60cc7c4a5bea16be2b06d067cb892..4e974223d15def72a9d348a2a4eb94039077d0e2 100644 (file)
@@ -1,27 +1,17 @@
-//#include "sysdeps.h"
 #include "quicksync_encoder.h"
 
+#include <movit/resource_pool.h>  // Must be above the Xlib includes.
 #include <movit/util.h>
+
 #include <EGL/eglplatform.h>
-#include <X11/X.h>
 #include <X11/Xlib.h>
 #include <assert.h>
 #include <epoxy/egl.h>
-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
-#include <libavresample/avresample.h>
-#include <libavutil/channel_layout.h>
-#include <libavutil/frame.h>
-#include <libavutil/rational.h>
-#include <libavutil/samplefmt.h>
-#include <libavutil/opt.h>
-}
-#include <libdrm/drm_fourcc.h>
+#include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <fcntl.h>
+#include <unistd.h>
 #include <va/va.h>
 #include <va/va_drm.h>
 #include <va/va_drmcommon.h>
@@ -29,23 +19,40 @@ extern "C" {
 #include <va/va_x11.h>
 #include <algorithm>
 #include <condition_variable>
+#include <cstddef>
 #include <cstdint>
+#include <functional>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <queue>
+#include <stack>
 #include <string>
 #include <thread>
 #include <utility>
 
+extern "C" {
+
+#include <libavcodec/avcodec.h>
+#include <libavformat/avio.h>
+#include <libavutil/error.h>
+#include <libdrm/drm_fourcc.h>
+
+}  // namespace
+
+#include "audio_encoder.h"
 #include "context.h"
 #include "defs.h"
+#include "disk_space_estimator.h"
+#include "ffmpeg_raii.h"
 #include "flags.h"
 #include "mux.h"
+#include "ref_counted_frame.h"
 #include "timebase.h"
 #include "x264_encoder.h"
 
 using namespace std;
+using namespace std::placeholders;
 
 class QOpenGLContext;
 class QSurface;
@@ -202,14 +209,22 @@ FrameReorderer::Frame FrameReorderer::get_first_frame()
 
 class QuickSyncEncoderImpl {
 public:
-       QuickSyncEncoderImpl(QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux);
+       QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator);
        ~QuickSyncEncoderImpl();
        void add_audio(int64_t pts, vector<float> audio);
        bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
        RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames);
        void shutdown();
-       void open_output_file(const std::string &filename);
-       void close_output_file();
+       void release_gl_resources();
+       void set_stream_mux(Mux *mux)
+       {
+               stream_mux = mux;
+       }
+
+       // So we never get negative dts.
+       int64_t global_delay() const {
+               return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS);
+       }
 
 private:
        struct storage_task {
@@ -224,35 +239,13 @@ private:
                int64_t pts, duration;
        };
 
-       // So we never get negative dts.
-       int64_t global_delay() const {
-               return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS);
-       }
-
+       void open_output_file(const std::string &filename);
        void encode_thread_func();
        void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
        void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data);
        void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
                          int frame_type, int64_t pts, int64_t dts, int64_t duration);
        void storage_task_thread();
-       void encode_audio(const vector<float> &audio,
-                         vector<float> *audio_queue,
-                         int64_t audio_pts,
-                         AVCodecContext *ctx,
-                         AVAudioResampleContext *resampler,
-                         const vector<Mux *> &muxes);
-       void encode_audio_one_frame(const float *audio,
-                                   size_t num_samples,  // In each channel.
-                                   int64_t audio_pts,
-                                   AVCodecContext *ctx,
-                                   AVAudioResampleContext *resampler,
-                                   const vector<Mux *> &muxes);
-       void encode_last_audio(vector<float> *audio_queue,
-                              int64_t audio_pts,
-                              AVCodecContext *ctx,
-                              AVAudioResampleContext *resampler,
-                              const vector<Mux *> &muxes);
-       void encode_remaining_audio();
        void storage_task_enqueue(storage_task task);
        void save_codeddata(storage_task task);
        int render_packedsequence();
@@ -273,11 +266,12 @@ private:
        VADisplay va_open_display(const string &va_display);
        void va_close_display(VADisplay va_dpy);
        int setup_encode();
-       int release_encode();
+       void release_encode();
        void update_ReferenceFrames(int frame_type);
        int update_RefPicList(int frame_type);
 
        bool is_shutdown = false;
+       bool has_released_gl_resources = false;
        bool use_zerocopy;
        int drm_fd = -1;
 
@@ -296,26 +290,17 @@ private:
        int current_storage_frame;
 
        map<int, PendingFrame> pending_video_frames;  // under frame_queue_mutex
-       map<int64_t, vector<float>> pending_audio_frames;  // under frame_queue_mutex
-       int64_t last_audio_pts = 0;  // The first pts after all audio we've encoded.
+       movit::ResourcePool *resource_pool;
        QSurface *surface;
 
-       AVCodecContext *context_audio_file;
-       AVCodecContext *context_audio_stream = nullptr;  // nullptr = don't code separate audio for stream.
-
-       AVAudioResampleContext *resampler_audio_file = nullptr;
-       AVAudioResampleContext *resampler_audio_stream = nullptr;
+       unique_ptr<AudioEncoder> file_audio_encoder;
 
-       vector<float> audio_queue_file;
-       vector<float> audio_queue_stream;
+       unique_ptr<FrameReorderer> reorderer;
+       X264Encoder *x264_encoder;  // nullptr if not using x264.
 
-       Mux* stream_mux;  // To HTTP.
+       Mux* stream_mux = nullptr;  // To HTTP.
        unique_ptr<Mux> file_mux;  // To local disk.
 
-       AVFrame *audio_frame = nullptr;
-       unique_ptr<FrameReorderer> reorderer;
-       unique_ptr<X264Encoder> x264_encoder;  // nullptr if not using x264.
-
        Display *x11_display = nullptr;
 
        // Encoder parameters
@@ -373,6 +358,8 @@ private:
        int frame_height;
        int frame_width_mbaligned;
        int frame_height_mbaligned;
+
+       DiskSpaceEstimator *disk_space_estimator;
 };
 
 // Supposedly vaRenderPicture() is supposed to destroy the buffer implicitly,
@@ -1184,8 +1171,8 @@ int QuickSyncEncoderImpl::setup_encode()
     VAStatus va_status;
     VASurfaceID *tmp_surfaceid;
     int codedbuf_size, i;
-    static VASurfaceID src_surface[SURFACE_NUM];
-    static VASurfaceID ref_surface[SURFACE_NUM];
+    VASurfaceID src_surface[SURFACE_NUM];
+    VASurfaceID ref_surface[SURFACE_NUM];
     
     va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
             &config_attrib[0], config_attrib_num, &config_id);
@@ -1236,17 +1223,12 @@ int QuickSyncEncoderImpl::setup_encode()
     //glGenFramebuffers(SURFACE_NUM, fbos);
     
     for (i = 0; i < SURFACE_NUM; i++) {
-        glGenTextures(1, &gl_surfaces[i].y_tex);
-        glGenTextures(1, &gl_surfaces[i].cbcr_tex);
-
-        if (!use_zerocopy) {
-            // Create Y image.
-            glBindTexture(GL_TEXTURE_2D, gl_surfaces[i].y_tex);
-            glTexStorage2D(GL_TEXTURE_2D, 1, GL_R8, frame_width, frame_height);
-
-            // Create CbCr image.
-            glBindTexture(GL_TEXTURE_2D, gl_surfaces[i].cbcr_tex);
-            glTexStorage2D(GL_TEXTURE_2D, 1, GL_RG8, frame_width / 2, frame_height / 2);
+        if (use_zerocopy) {
+            gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
+            gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
+        } else {
+            gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
+            gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
 
             // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
             // buffers, due to potentially differing pitch.
@@ -1670,146 +1652,8 @@ void QuickSyncEncoderImpl::save_codeddata(storage_task task)
                        stream_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
                }
        }
-       // Encode and add all audio frames up to and including the pts of this video frame.
-       for ( ;; ) {
-               int64_t audio_pts;
-               vector<float> audio;
-               {
-                       unique_lock<mutex> lock(frame_queue_mutex);
-                       frame_queue_nonempty.wait(lock, [this]{ return storage_thread_should_quit || !pending_audio_frames.empty(); });
-                       if (storage_thread_should_quit && pending_audio_frames.empty()) return;
-                       auto it = pending_audio_frames.begin();
-                       if (it->first > task.pts) break;
-                       audio_pts = it->first;
-                       audio = move(it->second);
-                       pending_audio_frames.erase(it); 
-               }
-
-               if (context_audio_stream) {
-                       encode_audio(audio, &audio_queue_file, audio_pts, context_audio_file, resampler_audio_file, { file_mux.get() });
-                       encode_audio(audio, &audio_queue_stream, audio_pts, context_audio_stream, resampler_audio_stream, { stream_mux });
-               } else {
-                       encode_audio(audio, &audio_queue_file, audio_pts, context_audio_file, resampler_audio_file, { stream_mux, file_mux.get() });
-               }
-               last_audio_pts = audio_pts + audio.size() * TIMEBASE / (OUTPUT_FREQUENCY * 2);
-
-               if (audio_pts == task.pts) break;
-       }
-}
-
-void QuickSyncEncoderImpl::encode_audio(
-       const vector<float> &audio,
-       vector<float> *audio_queue,
-       int64_t audio_pts,
-       AVCodecContext *ctx,
-       AVAudioResampleContext *resampler,
-       const vector<Mux *> &muxes)
-{
-       if (ctx->frame_size == 0) {
-               // No queueing needed.
-               assert(audio_queue->empty());
-               assert(audio.size() % 2 == 0);
-               encode_audio_one_frame(&audio[0], audio.size() / 2, audio_pts, ctx, resampler, muxes);
-               return;
-       }
-
-       int64_t sample_offset = audio_queue->size();
-
-       audio_queue->insert(audio_queue->end(), audio.begin(), audio.end());
-       size_t sample_num;
-       for (sample_num = 0;
-            sample_num + ctx->frame_size * 2 <= audio_queue->size();
-            sample_num += ctx->frame_size * 2) {
-               int64_t adjusted_audio_pts = audio_pts + (int64_t(sample_num) - sample_offset) * TIMEBASE / (OUTPUT_FREQUENCY * 2);
-               encode_audio_one_frame(&(*audio_queue)[sample_num],
-                                      ctx->frame_size,
-                                      adjusted_audio_pts,
-                                      ctx,
-                                      resampler,
-                                      muxes);
-       }
-       audio_queue->erase(audio_queue->begin(), audio_queue->begin() + sample_num);
-}
-
-void QuickSyncEncoderImpl::encode_audio_one_frame(
-       const float *audio,
-       size_t num_samples,
-       int64_t audio_pts,
-       AVCodecContext *ctx,
-       AVAudioResampleContext *resampler,
-       const vector<Mux *> &muxes)
-{
-       audio_frame->pts = audio_pts + global_delay();
-       audio_frame->nb_samples = num_samples;
-       audio_frame->channel_layout = AV_CH_LAYOUT_STEREO;
-       audio_frame->format = ctx->sample_fmt;
-       audio_frame->sample_rate = OUTPUT_FREQUENCY;
-
-       if (av_samples_alloc(audio_frame->data, nullptr, 2, num_samples, ctx->sample_fmt, 0) < 0) {
-               fprintf(stderr, "Could not allocate %ld samples.\n", num_samples);
-               exit(1);
-       }
-
-       if (avresample_convert(resampler, audio_frame->data, 0, num_samples,
-                              (uint8_t **)&audio, 0, num_samples) < 0) {
-               fprintf(stderr, "Audio conversion failed.\n");
-               exit(1);
-       }
-
-       AVPacket pkt;
-       av_init_packet(&pkt);
-       pkt.data = nullptr;
-       pkt.size = 0;
-       int got_output = 0;
-       avcodec_encode_audio2(ctx, &pkt, audio_frame, &got_output);
-       if (got_output) {
-               pkt.stream_index = 1;
-               pkt.flags = 0;
-               for (Mux *mux : muxes) {
-                       mux->add_packet(pkt, pkt.pts, pkt.dts);
-               }
-       }
-
-       av_freep(&audio_frame->data[0]);
-
-       av_frame_unref(audio_frame);
-       av_free_packet(&pkt);
 }
 
-void QuickSyncEncoderImpl::encode_last_audio(
-       vector<float> *audio_queue,
-       int64_t audio_pts,
-       AVCodecContext *ctx,
-       AVAudioResampleContext *resampler,
-       const vector<Mux *> &muxes)
-{
-       if (!audio_queue->empty()) {
-               // Last frame can be whatever size we want.
-               assert(audio_queue->size() % 2 == 0);
-               encode_audio_one_frame(&(*audio_queue)[0], audio_queue->size() / 2, audio_pts, ctx, resampler, muxes);
-               audio_queue->clear();
-       }
-
-       if (ctx->codec->capabilities & AV_CODEC_CAP_DELAY) {
-               // Collect any delayed frames.
-               for ( ;; ) {
-                       int got_output = 0;
-                       AVPacket pkt;
-                       av_init_packet(&pkt);
-                       pkt.data = nullptr;
-                       pkt.size = 0;
-                       avcodec_encode_audio2(ctx, &pkt, nullptr, &got_output);
-                       if (!got_output) break;
-
-                       pkt.stream_index = 1;
-                       pkt.flags = 0;
-                       for (Mux *mux : muxes) {
-                               mux->add_packet(pkt, pkt.pts, pkt.dts);
-                       }
-                       av_free_packet(&pkt);
-               }
-       }
-}
 
 // this is weird. but it seems to put a new frame onto the queue
 void QuickSyncEncoderImpl::storage_task_enqueue(storage_task task)
@@ -1847,27 +1691,37 @@ void QuickSyncEncoderImpl::storage_task_thread()
        }
 }
 
-int QuickSyncEncoderImpl::release_encode()
+void QuickSyncEncoderImpl::release_encode()
 {
        for (unsigned i = 0; i < SURFACE_NUM; i++) {
                vaDestroyBuffer(va_dpy, gl_surfaces[i].coded_buf);
                vaDestroySurfaces(va_dpy, &gl_surfaces[i].src_surface, 1);
                vaDestroySurfaces(va_dpy, &gl_surfaces[i].ref_surface, 1);
+       }
+
+       vaDestroyContext(va_dpy, context_id);
+       vaDestroyConfig(va_dpy, config_id);
+}
 
+void QuickSyncEncoderImpl::release_gl_resources()
+{
+       assert(is_shutdown);
+       if (has_released_gl_resources) {
+               return;
+       }
+
+       for (unsigned i = 0; i < SURFACE_NUM; i++) {
                if (!use_zerocopy) {
                        glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
                        glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
                        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
                        glDeleteBuffers(1, &gl_surfaces[i].pbo);
                }
-               glDeleteTextures(1, &gl_surfaces[i].y_tex);
-               glDeleteTextures(1, &gl_surfaces[i].cbcr_tex);
+               resource_pool->release_2d_texture(gl_surfaces[i].y_tex);
+               resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex);
        }
 
-       vaDestroyContext(va_dpy, context_id);
-       vaDestroyConfig(va_dpy, config_id);
-
-       return 0;
+       has_released_gl_resources = true;
 }
 
 int QuickSyncEncoderImpl::deinit_va()
@@ -1879,67 +1733,16 @@ int QuickSyncEncoderImpl::deinit_va()
     return 0;
 }
 
-namespace {
-
-void init_audio_encoder(const string &codec_name, int bit_rate, AVCodecContext **ctx, AVAudioResampleContext **resampler)
-{
-       AVCodec *codec_audio = avcodec_find_encoder_by_name(codec_name.c_str());
-       if (codec_audio == nullptr) {
-               fprintf(stderr, "ERROR: Could not find codec '%s'\n", codec_name.c_str());
-               exit(1);
-       }
-
-       AVCodecContext *context_audio = avcodec_alloc_context3(codec_audio);
-       context_audio->bit_rate = bit_rate;
-       context_audio->sample_rate = OUTPUT_FREQUENCY;
-       context_audio->sample_fmt = codec_audio->sample_fmts[0];
-       context_audio->channels = 2;
-       context_audio->channel_layout = AV_CH_LAYOUT_STEREO;
-       context_audio->time_base = AVRational{1, TIMEBASE};
-       context_audio->flags |= CODEC_FLAG_GLOBAL_HEADER;
-       if (avcodec_open2(context_audio, codec_audio, NULL) < 0) {
-               fprintf(stderr, "Could not open codec '%s'\n", codec_name.c_str());
-               exit(1);
-       }
-
-       *ctx = context_audio;
-
-       *resampler = avresample_alloc_context();
-       if (*resampler == nullptr) {
-               fprintf(stderr, "Allocating resampler failed.\n");
-               exit(1);
-       }
-
-       av_opt_set_int(*resampler, "in_channel_layout",  AV_CH_LAYOUT_STEREO,       0);
-       av_opt_set_int(*resampler, "out_channel_layout", AV_CH_LAYOUT_STEREO,       0);
-       av_opt_set_int(*resampler, "in_sample_rate",     OUTPUT_FREQUENCY,          0);
-       av_opt_set_int(*resampler, "out_sample_rate",    OUTPUT_FREQUENCY,          0);
-       av_opt_set_int(*resampler, "in_sample_fmt",      AV_SAMPLE_FMT_FLT,         0);
-       av_opt_set_int(*resampler, "out_sample_fmt",     context_audio->sample_fmt, 0);
-
-       if (avresample_open(*resampler) < 0) {
-               fprintf(stderr, "Could not open resample context.\n");
-               exit(1);
-       }
-}
-
-}  // namespace
-
-QuickSyncEncoderImpl::QuickSyncEncoderImpl(QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux)
-       : current_storage_frame(0), surface(surface), stream_mux(stream_mux), frame_width(width), frame_height(height)
+QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+       : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
 {
-       init_audio_encoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, &context_audio_file, &resampler_audio_file);
-
-       if (!global_flags.stream_audio_codec_name.empty()) {
-               init_audio_encoder(global_flags.stream_audio_codec_name,
-                       global_flags.stream_audio_codec_bitrate, &context_audio_stream, &resampler_audio_stream);
-       }
+       file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
+       open_output_file(filename);
+       file_audio_encoder->add_mux(file_mux.get());
 
        frame_width_mbaligned = (frame_width + 15) & (~15);
        frame_height_mbaligned = (frame_height + 15) & (~15);
 
-       audio_frame = av_frame_alloc();
-
        //print_input();
 
        if (global_flags.uncompressed_video_to_http ||
@@ -1947,7 +1750,9 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(QSurface *surface, const string &va_d
                reorderer.reset(new FrameReorderer(ip_period - 1, frame_width, frame_height));
        }
        if (global_flags.x264_video_to_http) {
-               x264_encoder.reset(new X264Encoder(stream_mux));
+               assert(x264_encoder != nullptr);
+       } else {
+               assert(x264_encoder == nullptr);
        }
 
        init_va(va_display);
@@ -1972,17 +1777,14 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(QSurface *surface, const string &va_d
                        exit(1);
                }
                encode_thread_func();
+               delete_context(context);
        });
 }
 
 QuickSyncEncoderImpl::~QuickSyncEncoderImpl()
 {
        shutdown();
-       av_frame_free(&audio_frame);
-       avresample_free(&resampler_audio_file);
-       avresample_free(&resampler_audio_stream);
-       avcodec_free_context(&context_audio_file);
-       avcodec_free_context(&context_audio_stream);
+       release_gl_resources();
 }
 
 bool QuickSyncEncoderImpl::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
@@ -2059,11 +1861,7 @@ bool QuickSyncEncoderImpl::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
 void QuickSyncEncoderImpl::add_audio(int64_t pts, vector<float> audio)
 {
        assert(!is_shutdown);
-       {
-               unique_lock<mutex> lock(frame_queue_mutex);
-               pending_audio_frames[pts] = move(audio);
-       }
-       frame_queue_nonempty.notify_all();
+       file_audio_encoder->encode_audio(audio, pts + global_delay());
 }
 
 RefCountedGLsync QuickSyncEncoderImpl::end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames)
@@ -2124,7 +1922,6 @@ void QuickSyncEncoderImpl::shutdown()
                frame_queue_nonempty.notify_all();
        }
        encode_thread.join();
-       x264_encoder.reset();
        {
                unique_lock<mutex> lock(storage_task_queue_mutex);
                storage_thread_should_quit = true;
@@ -2132,10 +1929,13 @@ void QuickSyncEncoderImpl::shutdown()
                storage_task_queue_changed.notify_all();
        }
        storage_thread.join();
-       encode_remaining_audio();
+
+       // Encode any leftover audio in the queues, and also any delayed frames.
+       file_audio_encoder->encode_last_audio();
 
        release_encode();
        deinit_va();
+       file_mux.reset();
        is_shutdown = true;
 }
 
@@ -2154,12 +1954,10 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
                exit(1);
        }
 
-       file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, context_audio_file->codec, TIMEBASE, DEFAULT_AUDIO_OUTPUT_BIT_RATE, nullptr));
-}
-
-void QuickSyncEncoderImpl::close_output_file()
-{
-        file_mux.reset();
+       string video_extradata = "";  // FIXME: See other comment about global headers.
+       AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
+       file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
+               std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1)));
 }
 
 void QuickSyncEncoderImpl::encode_thread_func()
@@ -2242,32 +2040,6 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num,
        }
 }
 
-void QuickSyncEncoderImpl::encode_remaining_audio()
-{
-       // This really ought to be empty by now, but just to be sure...
-       for (auto &pending_frame : pending_audio_frames) {
-               int64_t audio_pts = pending_frame.first;
-               vector<float> audio = move(pending_frame.second);
-
-               if (context_audio_stream) {
-                       encode_audio(audio, &audio_queue_file, audio_pts, context_audio_file, resampler_audio_file, { file_mux.get() });
-                       encode_audio(audio, &audio_queue_stream, audio_pts, context_audio_stream, resampler_audio_stream, { stream_mux });
-               } else {
-                       encode_audio(audio, &audio_queue_file, audio_pts, context_audio_file, resampler_audio_file, { stream_mux, file_mux.get() });
-               }
-               last_audio_pts = audio_pts + audio.size() * TIMEBASE / (OUTPUT_FREQUENCY * 2);
-       }
-       pending_audio_frames.clear();
-
-       // Encode any leftover audio in the queues, and also any delayed frames.
-       if (context_audio_stream) {
-               encode_last_audio(&audio_queue_file, last_audio_pts, context_audio_file, resampler_audio_file, { file_mux.get() });
-               encode_last_audio(&audio_queue_stream, last_audio_pts, context_audio_stream, resampler_audio_stream, { stream_mux });
-       } else {
-               encode_last_audio(&audio_queue_file, last_audio_pts, context_audio_file, resampler_audio_file, { stream_mux, file_mux.get() });
-       }
-}
-
 void QuickSyncEncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data)
 {
        AVPacket pkt;
@@ -2358,6 +2130,9 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
        CHECK_VASTATUS(va_status, "vaBeginPicture");
 
        if (frame_type == FRAME_IDR) {
+               // FIXME: If the mux wants global headers, we should not put the
+               // SPS/PPS before each IDR frame, but rather put it into the
+               // codec extradata (formatted differently?).
                render_sequence();
                render_picture(frame_type, display_frame_num, gop_start_display_frame_num);
                if (h264_packedheader) {
@@ -2387,8 +2162,8 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
 }
 
 // Proxy object.
-QuickSyncEncoder::QuickSyncEncoder(QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux)
-       : impl(new QuickSyncEncoderImpl(surface, va_display, width, height, stream_mux)) {}
+QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+       : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {}
 
 // Must be defined here because unique_ptr<> destructor needs to know the impl.
 QuickSyncEncoder::~QuickSyncEncoder() {}
@@ -2413,12 +2188,11 @@ void QuickSyncEncoder::shutdown()
        impl->shutdown();
 }
 
-void QuickSyncEncoder::open_output_file(const std::string &filename)
+void QuickSyncEncoder::set_stream_mux(Mux *mux)
 {
-       impl->open_output_file(filename);
+       impl->set_stream_mux(mux);
 }
 
-void QuickSyncEncoder::close_output_file()
-{
-       impl->close_output_file();
+int64_t QuickSyncEncoder::global_delay() const {
+       return impl->global_delay();
 }