IWYU-fix nageru/*.cpp.

[nageru] / nageru / quicksync_encoder.cpp
diff --git a/nageru/quicksync_encoder.cpp b/nageru/quicksync_encoder.cpp

index b5d7c2d9dccf428665c954b161a57e6f76c3570c..36bf15883a3db975f40e90118102950f0f083390 100644 (file)
--- a/nageru/quicksync_encoder.cpp
+++ b/nageru/quicksync_encoder.cpp
@@ -1,25 +1,24 @@
  #include "quicksync_encoder.h"
  
+#include <atomic>
+#include <errno.h>
+#include <epoxy/egl.h>
+#include <epoxy/gl.h>
  #include <movit/image_format.h>
  #include <movit/resource_pool.h>  // Must be above the Xlib includes.
  #include <movit/util.h>
  
  #include <EGL/eglplatform.h>
-#include <X11/Xlib.h>
  #include <assert.h>
  #include <epoxy/egl.h>
-#include <fcntl.h>
  #include <glob.h>
  #include <pthread.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-#include <unistd.h>
  #include <va/va.h>
-#include <va/va_drm.h>
  #include <va/va_drmcommon.h>
  #include <va/va_enc_h264.h>
-#include <va/va_x11.h>
  #include <algorithm>
  #include <chrono>
  #include <condition_variable>
@@ -30,31 +29,36 @@
  #include <memory>
  #include <mutex>
  #include <queue>
-#include <stack>
  #include <string>
  #include <thread>
  #include <utility>
+#include <vector>
  
  extern "C" {
  
-#include <libavcodec/avcodec.h>
+#include <drm_fourcc.h>
+#include <libavcodec/packet.h>
+#include <libavformat/avformat.h>
  #include <libavformat/avio.h>
  #include <libavutil/error.h>
-#include <libdrm/drm_fourcc.h>
  
  }  // namespace
  
  #include "audio_encoder.h"
-#include "shared/context.h"
  #include "defs.h"
-#include "shared/disk_space_estimator.h"
-#include "shared/ffmpeg_raii.h"
  #include "flags.h"
-#include "shared/mux.h"
  #include "print_latency.h"
  #include "quicksync_encoder_impl.h"
  #include "ref_counted_frame.h"
+#include "shared/context.h"
+#include "shared/disk_space_estimator.h"
+#include "shared/ffmpeg_raii.h"
+#include "shared/metrics.h"
+#include "shared/mux.h"
+#include "shared/ref_counted_gl_sync.h"
  #include "shared/timebase.h"
+#include "shared/va_display.h"
+#include "v4l_output.h"
  #include "x264_encoder.h"
  
  using namespace movit;
@@ -80,7 +84,7 @@ std::atomic<int64_t> metric_quick_sync_stalled_frames{0};
  #define CHECK_VASTATUS(va_status, func)                                 \
      if (va_status != VA_STATUS_SUCCESS) {                               \
          fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
-        exit(1);                                                        \
+        abort();                                                        \
      }
  
  #undef BUFFER_OFFSET
@@ -592,51 +596,6 @@ int QuickSyncEncoderImpl::build_packed_slice_buffer(unsigned char **header_buffe
                                             {IDR(PBB)(PBB)}.
  */
  
-// General pts/dts strategy:
-//
-// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a
-// bit tricky. We assume first of all that the frame rate never goes _above_
-// MAX_FPS, which gives us a frame period N. The decoder can always decode
-// in at least this speed, as long at dts <= pts (the frame is not attempted
-// presented before it is decoded). Furthermore, we never have longer chains of
-// B-frames than a fixed constant C. (In a B-frame chain, we say that the base
-// I/P-frame has order O=0, the B-frame depending on it directly has order O=1,
-// etc. The last frame in the chain, which no B-frames depend on, is the “tip”
-// frame, with an order O <= C.)
-//
-// Many strategies are possible, but we establish these rules:
-//
-//  - Tip frames have dts = pts - (C-O)*N.
-//  - Non-tip frames have dts = dts_last + N.
-//
-// An example, with C=2 and N=10 and the data flow showed with arrows:
-//
-//        I  B  P  B  B  P
-//   pts: 30 40 50 60 70 80
-//        ↓  ↓     ↓
-//   dts: 10 30 20 60 50←40
-//         |  |  ↑        ↑
-//         `--|--'        |
-//             `----------'
-//
-// To show that this works fine also with irregular spacings, let's say that
-// the third frame is delayed a bit (something earlier was dropped). Now the
-// situation looks like this:
-//
-//        I  B  P  B  B   P
-//   pts: 30 40 80 90 100 110
-//        ↓  ↓     ↓
-//   dts: 10 30 20 90 50←40
-//         |  |  ↑        ↑
-//         `--|--'        |
-//             `----------'
-//
-// The resetting on every tip frame makes sure dts never ends up lagging a lot
-// behind pts, and the subtraction of (C-O)*N makes sure pts <= dts.
-//
-// In the output of this function, if <dts_lag> is >= 0, it means to reset the
-// dts from the current pts minus <dts_lag>, while if it's -1, the frame is not
-// a tip frame and should be given a dts based on the previous one.
  #define FRAME_P 0
  #define FRAME_B 1
  #define FRAME_I 2
@@ -645,12 +604,10 @@ void encoding2display_order(
      int encoding_order, int intra_period,
      int intra_idr_period, int ip_period,
      int *displaying_order,
-    int *frame_type, int *pts_lag)
+    int *frame_type)
  {
      int encoding_order_gop = 0;
  
-    *pts_lag = 0;
-
      if (intra_period == 1) { /* all are I/IDR frames */
          *displaying_order = encoding_order;
          if (intra_idr_period == 0)
@@ -682,20 +639,13 @@ void encoding2display_order(
  
      // We have B-frames. Sequence is like IDR (PBB)(PBB)(IBB)(PBB).
      encoding_order_gop = (intra_idr_period == 0) ? encoding_order : (encoding_order % (intra_idr_period + 1));
-    *pts_lag = -1;  // Most frames are not tip frames.
           
      if (encoding_order_gop == 0) { /* the first frame */
          *frame_type = FRAME_IDR;
          *displaying_order = encoding_order;
-        // IDR frames are a special case; I honestly can't find the logic behind
-        // why this is the right thing, but it seems to line up nicely in practice :-)
-        *pts_lag = TIMEBASE / MAX_FPS;
      } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */
          *frame_type = FRAME_B;
          *displaying_order = encoding_order - 1;
-        if ((encoding_order_gop % ip_period) == 0) {
-            *pts_lag = 0;  // Last B-frame.
-        }
      } else if (intra_period != 0 && /* have I frames */
                 encoding_order_gop >= 2 &&
                 ((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0) {
@@ -707,17 +657,83 @@ void encoding2display_order(
      }
  }
  
+// General pts/dts strategy:
+//
+// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a
+// bit tricky. This strategy roughly matches what x264 seems to do: We take in
+// the pts as the frames are encoded, and reuse that as dts in the same order,
+// slightly offset.
+//
+// If we don't have B-frames (only I and P), this means pts == dts always.
+// This is the simple case. Now consider the case with a single B-frame:
+//
+//        I  B  P  B  P
+//   pts: 30 40 50 60 70
+//
+// Since we always inherently encode P-frames before B-frames, this means that
+// we see them in this order, which we can _almost_ use for dts:
+//
+//   dts: 30 50 40 70 60
+//
+// the only problem here is that for the B-frames, pts < dts. We solve this by
+// priming the queue at the very start with some made-up dts:
+//
+//        I  B  P  B  P
+//   pts: 30 40 50 60 70
+//   dts: xx 30 50 40 70 60
+//
+// Now we have all the desirable properties: pts >= dts, successive dts delta
+// is never larger than the decoder can figure out (assuming, of course,
+// the pts has that property), and there's minimal lag between pts and dts.
+// For the made-up dts, we assume 1/60 sec per frame, which should generally
+// be reasonable. dts can go negative, but this is corrected using global_delay()
+// by delaying both pts and dts (although we probably don't need to).
+//
+// If there's more than one B-frame possible, we simply insert more of them
+// (here shown with some irregular spacing, assuming B-frames don't depend
+// on each other and simply go back-to-front):
+//
+//        I  B  B  B  P  B  B  B  P
+//   pts: 30 40 55 60 65 66 67 68 80
+//   dts: xx yy zz 30 65 60 55 40 80 68 67 66
+class DTSReorderer {
+public:
+       DTSReorderer(int num_b_frames) : num_b_frames(num_b_frames) {}
+
+       void push_pts(int64_t pts)
+       {
+               if (buf.empty() && num_b_frames > 0) {  // First frame.
+                       int64_t base_dts = pts - num_b_frames * (TIMEBASE / TYPICAL_FPS);
+                       for (int i = 0; i < num_b_frames; ++i) {
+                               buf.push(base_dts + i * (TIMEBASE / TYPICAL_FPS));
+                       }
+               }
+               buf.push(pts);
+       }
+
+       int64_t pop_dts()
+       {
+               assert(!buf.empty());
+               int64_t dts = buf.front();
+               buf.pop();
+               return dts;
+       }
+
+private:
+       const int num_b_frames;
+       queue<int64_t> buf;
+};
  
  void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
  {
         if (global_flags.x264_video_to_disk) {
                 // Quick Sync is entirely disabled.
                 use_zerocopy = false;
-       } else if (global_flags.uncompressed_video_to_http) {
-               fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-uncompressed-video.\n");
-               use_zerocopy = false;
         } else if (global_flags.x264_video_to_http) {
-               fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-x264-video.\n");
+               use_zerocopy = false;
+       } else if (global_flags.av1_video_to_http) {
+               use_zerocopy = false;
+       } else if (!global_flags.v4l_output_device.empty()) {
                 use_zerocopy = false;
         } else {
                 use_zerocopy = true;
@@ -725,104 +741,19 @@ void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
         global_flags.use_zerocopy = use_zerocopy;
  }
  
-VADisplayWithCleanup::~VADisplayWithCleanup()
+static unique_ptr<VADisplayWithCleanup> try_open_va_h264(const string &va_display, VAProfile *h264_profile, string *error)
  {
-       if (va_dpy != nullptr) {
-               vaTerminate(va_dpy);
-       }
-       if (x11_display != nullptr) {
-               XCloseDisplay(x11_display);
-       }
-       if (drm_fd != -1) {
-               close(drm_fd);
-       }
-}
-
-unique_ptr<VADisplayWithCleanup> va_open_display(const string &va_display)
-{
-       if (va_display.empty() || va_display[0] != '/') {  // An X display.
-               Display *x11_display = XOpenDisplay(va_display.empty() ? nullptr : va_display.c_str());
-               if (x11_display == nullptr) {
-                       fprintf(stderr, "error: can't connect to X server!\n");
-                       return nullptr;
-               }
-
-               unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
-               ret->x11_display = x11_display;
-               ret->can_use_zerocopy = true;
-               ret->va_dpy = vaGetDisplay(x11_display);
-               if (ret->va_dpy == nullptr) {
-                       return nullptr;
-               }
-               return ret;
-       } else {  // A DRM node on the filesystem (e.g. /dev/dri/renderD128).
-               int drm_fd = open(va_display.c_str(), O_RDWR);
-               if (drm_fd == -1) {
-                       perror(va_display.c_str());
-                       return NULL;
-               }
-               unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
-               ret->drm_fd = drm_fd;
-               ret->can_use_zerocopy = false;
-               ret->va_dpy = vaGetDisplayDRM(drm_fd);
-               if (ret->va_dpy == nullptr) {
-                       return nullptr;
-               }
-               return ret;
-       }
-}
-
-unique_ptr<VADisplayWithCleanup> try_open_va(const string &va_display, VAProfile *h264_profile, string *error)
-{
-       unique_ptr<VADisplayWithCleanup> va_dpy = va_open_display(va_display);
-       if (va_dpy == nullptr) {
-               if (error) *error = "Opening VA display failed";
-               return nullptr;
-       }
-       int major_ver, minor_ver;
-       VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver);
-       if (va_status != VA_STATUS_SUCCESS) {
-               char buf[256];
-               snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status);
-               if (error != nullptr) *error = buf;
-               return nullptr;
-       }
-
-       int num_entrypoints = vaMaxNumEntrypoints(va_dpy->va_dpy);
-       unique_ptr<VAEntrypoint[]> entrypoints(new VAEntrypoint[num_entrypoints]);
-       if (entrypoints == nullptr) {
-               if (error != nullptr) *error = "Failed to allocate memory for VA entry points";
-               return nullptr;
-       }
-
-       // Try the profiles from highest to lowest until we find one that can be encoded.
-       constexpr VAProfile profile_list[] = { VAProfileH264High, VAProfileH264Main, VAProfileH264ConstrainedBaseline };
-       for (unsigned i = 0; i < sizeof(profile_list) / sizeof(profile_list[0]); ++i) {
-               vaQueryConfigEntrypoints(va_dpy->va_dpy, profile_list[i], entrypoints.get(), &num_entrypoints);
-               for (int slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
-                       if (entrypoints[slice_entrypoint] != VAEntrypointEncSlice) {
-                               continue;
-                       }
-
-                       // We found a usable encoder, so return it.
-                       if (h264_profile != nullptr) {
-                               *h264_profile = profile_list[i];
-                       }
-                       return va_dpy;
-               }
-       }
-
-       if (error != nullptr) *error = "Can't find VAEntrypointEncSlice for H264 profiles";
-       return nullptr;
+       return try_open_va(va_display, { VAProfileH264High, VAProfileH264Main, VAProfileH264ConstrainedBaseline },
+               VAEntrypointEncSlice, /*desired_configs=*/{}, h264_profile, error);
  }
  
  int QuickSyncEncoderImpl::init_va(const string &va_display)
  {
      string error;
-    va_dpy = try_open_va(va_display, &h264_profile, &error);
+    va_dpy = try_open_va_h264(va_display, &h264_profile, &error);
      if (va_dpy == nullptr) {
         fprintf(stderr, "error: %s\n", error.c_str());
-        exit(1);
+        abort();
      }
      if (!va_dpy->can_use_zerocopy) {
          use_zerocopy = false;
@@ -860,7 +791,7 @@ int QuickSyncEncoderImpl::init_va(const string &va_display)
      /* check the interested configattrib */
      if ((attrib[VAConfigAttribRTFormat].value & VA_RT_FORMAT_YUV420) == 0) {
          printf("Not find desired YUV420 RT format\n");
-        exit(1);
+        abort();
      } else {
          config_attrib[config_attrib_num].type = VAConfigAttribRTFormat;
          config_attrib[config_attrib_num].value = VA_RT_FORMAT_YUV420;
@@ -870,7 +801,7 @@ int QuickSyncEncoderImpl::init_va(const string &va_display)
      if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) {
          if (!(attrib[VAConfigAttribRateControl].value & VA_RC_CQP)) {
              fprintf(stderr, "ERROR: VA-API encoder does not support CQP mode.\n");
-            exit(1);
+            abort();
          }
  
          config_attrib[config_attrib_num].type = VAConfigAttribRateControl;
@@ -985,7 +916,7 @@ int QuickSyncEncoderImpl::setup_encode()
                         gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
                         gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
                 } else {
-                       size_t bytes_per_pixel = (global_flags.x264_bit_depth > 8) ? 2 : 1;
+                       size_t bytes_per_pixel = (global_flags.bit_depth > 8) ? 2 : 1;
  
                         // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
                         // buffers, due to potentially differing pitch.
@@ -1023,7 +954,7 @@ void QuickSyncEncoderImpl::update_ReferenceFrames(int current_display_frame, int
      pic_param.CurrPic.frame_idx = current_ref_frame_num;
  
      CurrentCurrPic.flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE;
-    unique_lock<mutex> lock(storage_task_queue_mutex);
+    lock_guard<mutex> lock(storage_task_queue_mutex);
  
      // Insert the new frame at the start of the reference queue.
      reference_frames.push_front(ReferenceFrame{ CurrentCurrPic, current_display_frame });
@@ -1423,9 +1354,12 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task)
                 if (file_mux) {
                         file_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
                 }
-               if (!global_flags.uncompressed_video_to_http &&
-                   !global_flags.x264_video_to_http) {
-                       stream_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
+               if (!global_flags.x264_video_to_http &&
+                   !global_flags.av1_video_to_http) {
+                       http_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
+                       if (srt_mux != nullptr) {
+                               srt_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
+                       }
                 }
         }
  }
@@ -1434,7 +1368,8 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task)
  // this is weird. but it seems to put a new frame onto the queue
  void QuickSyncEncoderImpl::storage_task_enqueue(storage_task task)
  {
-       unique_lock<mutex> lock(storage_task_queue_mutex);
+       assert(task.pts >= task.dts);
+       lock_guard<mutex> lock(storage_task_queue_mutex);
         storage_task_queue.push(move(task));
         storage_task_queue_changed.notify_all();
  }
@@ -1468,7 +1403,7 @@ void QuickSyncEncoderImpl::storage_task_thread()
  
                 // Unlock the frame, and all its references.
                 {
-                       unique_lock<mutex> lock(storage_task_queue_mutex);
+                       lock_guard<mutex> lock(storage_task_queue_mutex);
                         release_gl_surface(display_order);
  
                         for (size_t frame_num : ref_display_frame_numbers) {
@@ -1481,13 +1416,18 @@ void QuickSyncEncoderImpl::storage_task_thread()
  void QuickSyncEncoderImpl::release_encode()
  {
         for (unsigned i = 0; i < SURFACE_NUM; i++) {
-               vaDestroyBuffer(va_dpy->va_dpy, gl_surfaces[i].coded_buf);
-               vaDestroySurfaces(va_dpy->va_dpy, &gl_surfaces[i].src_surface, 1);
-               vaDestroySurfaces(va_dpy->va_dpy, &gl_surfaces[i].ref_surface, 1);
+               VAStatus va_status = vaDestroyBuffer(va_dpy->va_dpy, gl_surfaces[i].coded_buf);
+               CHECK_VASTATUS(va_status, "vaDestroyBuffer");
+               va_status = vaDestroySurfaces(va_dpy->va_dpy, &gl_surfaces[i].src_surface, 1);
+               CHECK_VASTATUS(va_status, "vaDestroySurfaces");
+               va_status = vaDestroySurfaces(va_dpy->va_dpy, &gl_surfaces[i].ref_surface, 1);
+               CHECK_VASTATUS(va_status, "vaDestroySurfaces");
         }
  
-       vaDestroyContext(va_dpy->va_dpy, context_id);
-       vaDestroyConfig(va_dpy->va_dpy, config_id);
+       VAStatus va_status = vaDestroyContext(va_dpy->va_dpy, context_id);
+       CHECK_VASTATUS(va_status, "vaDestroyContext");
+       va_status = vaDestroyConfig(va_dpy->va_dpy, config_id);
+       CHECK_VASTATUS(va_status, "vaDestroyConfig");
  }
  
  void QuickSyncEncoderImpl::release_gl_resources()
@@ -1512,8 +1452,8 @@ void QuickSyncEncoderImpl::release_gl_resources()
         has_released_gl_resources = true;
  }
  
-QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
-       : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
+QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, const AVOutputFormat *oformat, VideoCodecInterface *http_encoder, VideoCodecInterface *disk_encoder, DiskSpaceEstimator *disk_space_estimator)
+       : current_storage_frame(0), resource_pool(resource_pool), surface(surface), http_encoder(http_encoder), disk_encoder(disk_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
  {
         file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
         open_output_file(filename);
@@ -1525,9 +1465,13 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource
         //print_input();
  
         if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
-               assert(x264_encoder != nullptr);
+               assert(http_encoder != nullptr);
+               assert(disk_encoder != nullptr);
+       } else if (global_flags.av1_video_to_http) {
+               assert(http_encoder != nullptr);
         } else {
-               assert(x264_encoder == nullptr);
+               assert(http_encoder == nullptr);
+               assert(disk_encoder == nullptr);
         }
  
         enable_zerocopy_if_possible();
@@ -1542,6 +1486,10 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource
                 memset(&slice_param, 0, sizeof(slice_param));
         }
  
+       if (!global_flags.v4l_output_device.empty()) {
+               v4l_output.reset(new V4LOutput(global_flags.v4l_output_device.c_str(), width, height));
+       }
+
         call_once(quick_sync_metrics_inited, [](){
                 mixer_latency_histogram.init("mixer");
                 qs_latency_histogram.init("quick_sync");
@@ -1559,7 +1507,7 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource
                 if (!make_current(context, this->surface)) {
                         printf("display=%p surface=%p context=%p curr=%p err=%d\n", eglGetCurrentDisplay(), this->surface, context, eglGetCurrentContext(),
                                 eglGetError());
-                       exit(1);
+                       abort();
                 }
                 encode_thread_func();
                 delete_context(context);
@@ -1698,10 +1646,10 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame()
         assert(!is_shutdown);
  
         if (!use_zerocopy) {
-               GLenum type = global_flags.x264_bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
+               GLenum type = global_flags.bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
                 GLSurface *surf;
                 {
-                       unique_lock<mutex> lock(storage_task_queue_mutex);
+                       lock_guard<mutex> lock(storage_task_queue_mutex);
                         surf = surface_for_frame[current_storage_frame];
                         assert(surf != nullptr);
                 }
@@ -1740,7 +1688,7 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame()
         check_error();
  
         {
-               unique_lock<mutex> lock(frame_queue_mutex);
+               lock_guard<mutex> lock(frame_queue_mutex);
                 current_video_frame.fence = fence;
                 pending_video_frames.push(move(current_video_frame));
                 ++current_storage_frame;
@@ -1756,13 +1704,13 @@ void QuickSyncEncoderImpl::shutdown()
         }
  
         {
-               unique_lock<mutex> lock(frame_queue_mutex);
+               lock_guard<mutex> lock(frame_queue_mutex);
                 encode_thread_should_quit = true;
                 frame_queue_nonempty.notify_all();
         }
         encode_thread.join();
         {
-               unique_lock<mutex> lock(storage_task_queue_mutex);
+               lock_guard<mutex> lock(storage_task_queue_mutex);
                 storage_thread_should_quit = true;
                 frame_queue_nonempty.notify_all();
                 storage_task_queue_changed.notify_all();
@@ -1792,20 +1740,19 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
  {
         AVFormatContext *avctx = avformat_alloc_context();
         avctx->oformat = av_guess_format(NULL, filename.c_str(), NULL);
-       assert(filename.size() < sizeof(avctx->filename) - 1);
-       strcpy(avctx->filename, filename.c_str());
+       avctx->url = strdup(filename.c_str());
  
         string url = "file:" + filename;
         int ret = avio_open2(&avctx->pb, url.c_str(), AVIO_FLAG_WRITE, &avctx->interrupt_callback, NULL);
         if (ret < 0) {
                 char tmp[AV_ERROR_MAX_STRING_SIZE];
                 fprintf(stderr, "%s: avio_open2() failed: %s\n", filename.c_str(), av_make_error_string(tmp, sizeof(tmp), ret));
-               exit(1);
+               abort();
         }
  
         string video_extradata;  // FIXME: See other comment about global headers.
         if (global_flags.x264_video_to_disk) {
-               video_extradata = x264_encoder->get_global_headers();
+               video_extradata = disk_encoder->get_global_headers();
         }
  
         current_file_mux_metrics.reset();
@@ -1813,7 +1760,7 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
         {
                 lock_guard<mutex> lock(file_audio_encoder_mutex);
                 AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
-               file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), get_color_space(global_flags.ycbcr_rec709_coefficients), Mux::WITH_AUDIO, TIMEBASE,
+               file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), get_color_space(global_flags.ycbcr_rec709_coefficients), TIMEBASE,
                         std::bind(&DiskSpaceEstimator::report_append, disk_space_estimator, filename, _1),
                         Mux::WRITE_BACKGROUND,
                         { &current_file_mux_metrics, &total_mux_metrics }));
@@ -1821,7 +1768,7 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
         metric_current_file_start_time_seconds = get_timestamp_for_metrics();
  
         if (global_flags.x264_video_to_disk) {
-               x264_encoder->add_mux(file_mux.get());
+               disk_encoder->add_mux(file_mux.get());
         }
  }
  
@@ -1829,6 +1776,8 @@ void QuickSyncEncoderImpl::encode_thread_func()
  {
         pthread_setname_np(pthread_self(), "QS_Encode");
  
+       DTSReorderer dts_reorder_buf(ip_period - 1);
+
         int64_t last_dts = -1;
         int gop_start_display_frame_num = 0;
         for (int display_frame_num = 0; ; ++display_frame_num) {
@@ -1855,12 +1804,14 @@ void QuickSyncEncoderImpl::encode_thread_func()
                         }
                 }
  
+               dts_reorder_buf.push_pts(frame.pts);
+
                 // Pass the frame on to x264 (or uncompressed to HTTP) as needed.
                 // Note that this implicitly waits for the frame to be done rendering.
                 pass_frame(frame, display_frame_num, frame.pts, frame.duration);
  
                 if (global_flags.x264_video_to_disk) {
-                       unique_lock<mutex> lock(storage_task_queue_mutex);
+                       lock_guard<mutex> lock(storage_task_queue_mutex);
                         release_gl_surface(display_frame_num);
                         continue;
                 }
@@ -1870,10 +1821,9 @@ void QuickSyncEncoderImpl::encode_thread_func()
                 // Now encode as many QuickSync frames as we can using the frames we have available.
                 // (It could be zero, or it could be multiple.) FIXME: make a function.
                 for ( ;; ) {
-                       int pts_lag;
                         int frame_type, quicksync_display_frame_num;
                         encoding2display_order(quicksync_encoding_frame_num, intra_period, intra_idr_period, ip_period,
-                                              &quicksync_display_frame_num, &frame_type, &pts_lag);
+                                              &quicksync_display_frame_num, &frame_type);
                         if (!reorder_buffer.count(quicksync_display_frame_num)) {
                                 break;
                         }
@@ -1883,7 +1833,7 @@ void QuickSyncEncoderImpl::encode_thread_func()
                         if (frame_type == FRAME_IDR) {
                                 // Release any reference frames from the previous GOP.
                                 {
-                                       unique_lock<mutex> lock(storage_task_queue_mutex);
+                                       lock_guard<mutex> lock(storage_task_queue_mutex);
                                         for (const ReferenceFrame &frame : reference_frames) {
                                                 release_gl_surface(frame.display_number);
                                         }
@@ -1893,14 +1843,7 @@ void QuickSyncEncoderImpl::encode_thread_func()
                                 gop_start_display_frame_num = quicksync_display_frame_num;
                         }
  
-                       // Determine the dts of this frame.
-                       int64_t dts;
-                       if (pts_lag == -1) {
-                               assert(last_dts != -1);
-                               dts = last_dts + (TIMEBASE / MAX_FPS);
-                       } else {
-                               dts = frame.pts - pts_lag;
-                       }
+                       const int64_t dts = dts_reorder_buf.pop_dts();
                         last_dts = dts;
  
                         encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
@@ -1919,26 +1862,12 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num,
                 int display_frame_num = pending_frame.first;
                 assert(display_frame_num > 0);
                 PendingFrame frame = move(pending_frame.second);
-               int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
-               printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
+               int64_t dts = last_dts + (TIMEBASE / TYPICAL_FPS);
                 encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
                 last_dts = dts;
         }
  }
  
-void QuickSyncEncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data)
-{
-       AVPacket pkt;
-       memset(&pkt, 0, sizeof(pkt));
-       pkt.buf = nullptr;
-       pkt.data = const_cast<uint8_t *>(data);
-       pkt.size = frame_width * frame_height * 2;
-       pkt.stream_index = 0;
-       pkt.flags = AV_PKT_FLAG_KEY;
-       pkt.duration = duration;
-       stream_mux->add_packet(pkt, pts, pts);
-}
-
  void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height)
  {
         if (src_width == dst_pitch) {
@@ -1976,15 +1905,19 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame,
  
         GLSurface *surf;
         {
-               unique_lock<mutex> lock(storage_task_queue_mutex);
+               lock_guard<mutex> lock(storage_task_queue_mutex);
                 surf = surface_for_frame[display_frame_num];
                 assert(surf != nullptr);
         }
         uint8_t *data = reinterpret_cast<uint8_t *>(surf->y_ptr);
-       if (global_flags.uncompressed_video_to_http) {
-               add_packet_for_uncompressed_frame(pts, duration, data);
-       } else if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
-               x264_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
+       if (http_encoder != nullptr) {
+               http_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
+       } if (disk_encoder != nullptr && disk_encoder != http_encoder) {
+               disk_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
+       }
+
+       if (v4l_output != nullptr) {
+               v4l_output->send_frame(data);
         }
  }
  
@@ -1995,7 +1928,7 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
  
         GLSurface *surf;
         {
-               unique_lock<mutex> lock(storage_task_queue_mutex);
+               lock_guard<mutex> lock(storage_task_queue_mutex);
                 surf = surface_for_frame[display_frame_num];
                 assert(surf != nullptr);
         }
@@ -2059,7 +1992,7 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
         // Lock the references for this frame; otherwise, they could be
         // rendered to before this frame is done encoding.
         {
-               unique_lock<mutex> lock(storage_task_queue_mutex);
+               lock_guard<mutex> lock(storage_task_queue_mutex);
                 for (const ReferenceFrame &frame : reference_frames) {
                         assert(surface_for_frame.count(frame.display_number));
                         ++surface_for_frame[frame.display_number]->refcount;
@@ -2082,8 +2015,8 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
  }
  
  // Proxy object.
-QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
-       : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {}
+QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, const AVOutputFormat *oformat, VideoCodecInterface *http_encoder, VideoCodecInterface *disk_encoder, DiskSpaceEstimator *disk_space_estimator)
+       : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, http_encoder, disk_encoder, disk_space_estimator)) {}
  
  // Must be defined here because unique_ptr<> destructor needs to know the impl.
  QuickSyncEncoder::~QuickSyncEncoder() {}
@@ -2118,9 +2051,14 @@ void QuickSyncEncoder::close_file()
         impl->shutdown();
  }
  
-void QuickSyncEncoder::set_stream_mux(Mux *mux)
+void QuickSyncEncoder::set_http_mux(Mux *mux)
+{
+       impl->set_http_mux(mux);
+}
+
+void QuickSyncEncoder::set_srt_mux(Mux *mux)
  {
-       impl->set_stream_mux(mux);
+       impl->set_srt_mux(mux);
  }
  
  int64_t QuickSyncEncoder::global_delay() const {
@@ -2138,7 +2076,7 @@ string QuickSyncEncoder::get_usable_va_display()
         }
  
         // First try the default (ie., whatever $DISPLAY is set to).
-       unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va("", nullptr, nullptr);
+       unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va_h264("", nullptr, nullptr);
         if (va_dpy != nullptr) {
                 if (need_env_reset) {
                         unsetenv("LIBVA_MESSAGING_LEVEL");
@@ -2156,7 +2094,7 @@ string QuickSyncEncoder::get_usable_va_display()
         } else {
                 for (size_t i = 0; i < g.gl_pathc; ++i) {
                         string path = g.gl_pathv[i];
-                       va_dpy = try_open_va(path, nullptr, nullptr);
+                       va_dpy = try_open_va_h264(path, nullptr, nullptr);
                         if (va_dpy != nullptr) {
                                 fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
                                         path.c_str());
@@ -2175,5 +2113,5 @@ string QuickSyncEncoder::get_usable_va_display()
         fprintf(stderr, "to expose Quick Sync. Alternatively, you can use --record-x264-video\n");
         fprintf(stderr, "to use software instead of hardware H.264 encoding, at the expense\n");
         fprintf(stderr, "of increased CPU usage and possibly bit rate.\n");
-       exit(1);
+       abort();
  }