#include "quicksync_encoder.h"
+#include <atomic>
+#include <errno.h>
+#include <epoxy/egl.h>
+#include <epoxy/gl.h>
#include <movit/image_format.h>
#include <movit/resource_pool.h> // Must be above the Xlib includes.
#include <movit/util.h>
#include <EGL/eglplatform.h>
-#include <X11/Xlib.h>
#include <assert.h>
#include <epoxy/egl.h>
-#include <fcntl.h>
#include <glob.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <unistd.h>
#include <va/va.h>
-#include <va/va_drm.h>
#include <va/va_drmcommon.h>
#include <va/va_enc_h264.h>
-#include <va/va_x11.h>
#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <queue>
-#include <stack>
#include <string>
#include <thread>
#include <utility>
+#include <vector>
extern "C" {
-#include <libavcodec/avcodec.h>
+#include <drm_fourcc.h>
+#include <libavcodec/packet.h>
+#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/error.h>
-#include <drm_fourcc.h>
} // namespace
#include "audio_encoder.h"
-#include "shared/context.h"
#include "defs.h"
-#include "shared/disk_space_estimator.h"
-#include "shared/ffmpeg_raii.h"
#include "flags.h"
-#include "shared/mux.h"
#include "print_latency.h"
#include "quicksync_encoder_impl.h"
#include "ref_counted_frame.h"
+#include "shared/context.h"
+#include "shared/disk_space_estimator.h"
+#include "shared/ffmpeg_raii.h"
+#include "shared/metrics.h"
+#include "shared/mux.h"
+#include "shared/ref_counted_gl_sync.h"
#include "shared/timebase.h"
+#include "shared/va_display.h"
+#include "v4l_output.h"
#include "x264_encoder.h"
using namespace movit;
{IDR(PBB)(PBB)}.
*/
-// General pts/dts strategy:
-//
-// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a
-// bit tricky. We assume first of all that the frame rate never goes _above_
-// MAX_FPS, which gives us a frame period N. The decoder can always decode
-// in at least this speed, as long at dts <= pts (the frame is not attempted
-// presented before it is decoded). Furthermore, we never have longer chains of
-// B-frames than a fixed constant C. (In a B-frame chain, we say that the base
-// I/P-frame has order O=0, the B-frame depending on it directly has order O=1,
-// etc. The last frame in the chain, which no B-frames depend on, is the “tip”
-// frame, with an order O <= C.)
-//
-// Many strategies are possible, but we establish these rules:
-//
-// - Tip frames have dts = pts - (C-O)*N.
-// - Non-tip frames have dts = dts_last + N.
-//
-// An example, with C=2 and N=10 and the data flow showed with arrows:
-//
-// I B P B B P
-// pts: 30 40 50 60 70 80
-// ↓ ↓ ↓
-// dts: 10 30 20 60 50←40
-// | | ↑ ↑
-// `--|--' |
-// `----------'
-//
-// To show that this works fine also with irregular spacings, let's say that
-// the third frame is delayed a bit (something earlier was dropped). Now the
-// situation looks like this:
-//
-// I B P B B P
-// pts: 30 40 80 90 100 110
-// ↓ ↓ ↓
-// dts: 10 30 20 90 50←40
-// | | ↑ ↑
-// `--|--' |
-// `----------'
-//
-// The resetting on every tip frame makes sure dts never ends up lagging a lot
-// behind pts, and the subtraction of (C-O)*N makes sure pts <= dts.
-//
-// In the output of this function, if <dts_lag> is >= 0, it means to reset the
-// dts from the current pts minus <dts_lag>, while if it's -1, the frame is not
-// a tip frame and should be given a dts based on the previous one.
#define FRAME_P 0
#define FRAME_B 1
#define FRAME_I 2
int encoding_order, int intra_period,
int intra_idr_period, int ip_period,
int *displaying_order,
- int *frame_type, int *pts_lag)
+ int *frame_type)
{
int encoding_order_gop = 0;
- *pts_lag = 0;
-
if (intra_period == 1) { /* all are I/IDR frames */
*displaying_order = encoding_order;
if (intra_idr_period == 0)
// We have B-frames. Sequence is like IDR (PBB)(PBB)(IBB)(PBB).
encoding_order_gop = (intra_idr_period == 0) ? encoding_order : (encoding_order % (intra_idr_period + 1));
- *pts_lag = -1; // Most frames are not tip frames.
if (encoding_order_gop == 0) { /* the first frame */
*frame_type = FRAME_IDR;
*displaying_order = encoding_order;
- // IDR frames are a special case; I honestly can't find the logic behind
- // why this is the right thing, but it seems to line up nicely in practice :-)
- *pts_lag = TIMEBASE / MAX_FPS;
} else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */
*frame_type = FRAME_B;
*displaying_order = encoding_order - 1;
- if ((encoding_order_gop % ip_period) == 0) {
- *pts_lag = 0; // Last B-frame.
- }
} else if (intra_period != 0 && /* have I frames */
encoding_order_gop >= 2 &&
((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0) {
}
}
+// General pts/dts strategy:
+//
+// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a
+// bit tricky. This strategy roughly matches what x264 seems to do: We take in
+// the pts as the frames are encoded, and reuse that as dts in the same order,
+// slightly offset.
+//
+// If we don't have B-frames (only I and P), this means pts == dts always.
+// This is the simple case. Now consider the case with a single B-frame:
+//
+// I B P B P
+// pts: 30 40 50 60 70
+//
+// Since we always inherently encode P-frames before B-frames, this means that
+// we see them in this order, which we can _almost_ use for dts:
+//
+// dts: 30 50 40 70 60
+//
+// the only problem here is that for the B-frames, pts < dts. We solve this by
+// priming the queue at the very start with some made-up dts:
+//
+// I B P B P
+// pts: 30 40 50 60 70
+// dts: xx 30 50 40 70 60
+//
+// Now we have all the desirable properties: pts >= dts, successive dts delta
+// is never larger than the decoder can figure out (assuming, of course,
+// the pts has that property), and there's minimal lag between pts and dts.
+// For the made-up dts, we assume 1/60 sec per frame, which should generally
+// be reasonable. dts can go negative, but this is corrected using global_delay()
+// by delaying both pts and dts (although we probably don't need to).
+//
+// If there's more than one B-frame possible, we simply insert more of them
+// (here shown with some irregular spacing, assuming B-frames don't depend
+// on each other and simply go back-to-front):
+//
+// I B B B P B B B P
+// pts: 30 40 55 60 65 66 67 68 80
+// dts: xx yy zz 30 65 60 55 40 80 68 67 66
+class DTSReorderer {
+public:
+ DTSReorderer(int num_b_frames) : num_b_frames(num_b_frames) {}
+
+ void push_pts(int64_t pts)
+ {
+ if (buf.empty() && num_b_frames > 0) { // First frame.
+ int64_t base_dts = pts - num_b_frames * (TIMEBASE / TYPICAL_FPS);
+ for (int i = 0; i < num_b_frames; ++i) {
+ buf.push(base_dts + i * (TIMEBASE / TYPICAL_FPS));
+ }
+ }
+ buf.push(pts);
+ }
+
+ int64_t pop_dts()
+ {
+ assert(!buf.empty());
+ int64_t dts = buf.front();
+ buf.pop();
+ return dts;
+ }
+
+private:
+ const int num_b_frames;
+ queue<int64_t> buf;
+};
void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
{
if (global_flags.x264_video_to_disk) {
// Quick Sync is entirely disabled.
use_zerocopy = false;
- } else if (global_flags.uncompressed_video_to_http) {
- fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-uncompressed-video.\n");
- use_zerocopy = false;
} else if (global_flags.x264_video_to_http) {
- fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-x264-video.\n");
+ use_zerocopy = false;
+ } else if (global_flags.av1_video_to_http) {
use_zerocopy = false;
} else if (!global_flags.v4l_output_device.empty()) {
- fprintf(stderr, "Disabling zerocopy H.264 encoding due to --v4l-output.\n");
use_zerocopy = false;
} else {
use_zerocopy = true;
gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
} else {
- size_t bytes_per_pixel = (global_flags.x264_bit_depth > 8) ? 2 : 1;
+ size_t bytes_per_pixel = (global_flags.bit_depth > 8) ? 2 : 1;
// Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
// buffers, due to potentially differing pitch.
if (file_mux) {
file_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
}
- if (!global_flags.uncompressed_video_to_http &&
- !global_flags.x264_video_to_http) {
- stream_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
+ if (!global_flags.x264_video_to_http &&
+ !global_flags.av1_video_to_http) {
+ http_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
+ if (srt_mux != nullptr) {
+ srt_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
+ }
}
}
}
// this is weird. but it seems to put a new frame onto the queue
void QuickSyncEncoderImpl::storage_task_enqueue(storage_task task)
{
+ assert(task.pts >= task.dts);
lock_guard<mutex> lock(storage_task_queue_mutex);
storage_task_queue.push(move(task));
storage_task_queue_changed.notify_all();
has_released_gl_resources = true;
}
-QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *http_encoder, X264Encoder *disk_encoder, DiskSpaceEstimator *disk_space_estimator)
- : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_http_encoder(http_encoder), x264_disk_encoder(disk_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
+QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, const AVOutputFormat *oformat, VideoCodecInterface *http_encoder, VideoCodecInterface *disk_encoder, DiskSpaceEstimator *disk_space_estimator)
+ : current_storage_frame(0), resource_pool(resource_pool), surface(surface), http_encoder(http_encoder), disk_encoder(disk_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
{
file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
open_output_file(filename);
//print_input();
if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
- assert(x264_http_encoder != nullptr);
- } else {
- assert(x264_http_encoder == nullptr);
- }
- if (global_flags.x264_separate_disk_encode) {
- assert(x264_disk_encoder != nullptr);
+ assert(http_encoder != nullptr);
+ assert(disk_encoder != nullptr);
+ } else if (global_flags.av1_video_to_http) {
+ assert(http_encoder != nullptr);
} else {
- assert(x264_disk_encoder == nullptr);
+ assert(http_encoder == nullptr);
+ assert(disk_encoder == nullptr);
}
enable_zerocopy_if_possible();
assert(!is_shutdown);
if (!use_zerocopy) {
- GLenum type = global_flags.x264_bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
+ GLenum type = global_flags.bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
GLSurface *surf;
{
lock_guard<mutex> lock(storage_task_queue_mutex);
string video_extradata; // FIXME: See other comment about global headers.
if (global_flags.x264_video_to_disk) {
- video_extradata = x264_disk_encoder->get_global_headers();
+ video_extradata = disk_encoder->get_global_headers();
}
current_file_mux_metrics.reset();
metric_current_file_start_time_seconds = get_timestamp_for_metrics();
if (global_flags.x264_video_to_disk) {
- x264_disk_encoder->add_mux(file_mux.get());
+ disk_encoder->add_mux(file_mux.get());
}
}
{
pthread_setname_np(pthread_self(), "QS_Encode");
+ DTSReorderer dts_reorder_buf(ip_period - 1);
+
int64_t last_dts = -1;
int gop_start_display_frame_num = 0;
for (int display_frame_num = 0; ; ++display_frame_num) {
}
}
+ dts_reorder_buf.push_pts(frame.pts);
+
// Pass the frame on to x264 (or uncompressed to HTTP) as needed.
// Note that this implicitly waits for the frame to be done rendering.
pass_frame(frame, display_frame_num, frame.pts, frame.duration);
// Now encode as many QuickSync frames as we can using the frames we have available.
// (It could be zero, or it could be multiple.) FIXME: make a function.
for ( ;; ) {
- int pts_lag;
int frame_type, quicksync_display_frame_num;
encoding2display_order(quicksync_encoding_frame_num, intra_period, intra_idr_period, ip_period,
- &quicksync_display_frame_num, &frame_type, &pts_lag);
+ &quicksync_display_frame_num, &frame_type);
if (!reorder_buffer.count(quicksync_display_frame_num)) {
break;
}
gop_start_display_frame_num = quicksync_display_frame_num;
}
- // Determine the dts of this frame.
- int64_t dts;
- if (pts_lag == -1) {
- assert(last_dts != -1);
- dts = last_dts + (TIMEBASE / MAX_FPS);
- } else {
- dts = frame.pts - pts_lag;
- }
+ const int64_t dts = dts_reorder_buf.pop_dts();
last_dts = dts;
encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
int display_frame_num = pending_frame.first;
assert(display_frame_num > 0);
PendingFrame frame = move(pending_frame.second);
- int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
- printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
+ int64_t dts = last_dts + (TIMEBASE / TYPICAL_FPS);
encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
last_dts = dts;
}
}
-void QuickSyncEncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data)
-{
- AVPacket pkt;
- memset(&pkt, 0, sizeof(pkt));
- pkt.buf = nullptr;
- pkt.data = const_cast<uint8_t *>(data);
- pkt.size = frame_width * frame_height * 2;
- pkt.stream_index = 0;
- pkt.flags = AV_PKT_FLAG_KEY;
- pkt.duration = duration;
- stream_mux->add_packet(pkt, pts, pts);
-}
-
void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height)
{
if (src_width == dst_pitch) {
assert(surf != nullptr);
}
uint8_t *data = reinterpret_cast<uint8_t *>(surf->y_ptr);
- if (global_flags.uncompressed_video_to_http) {
- add_packet_for_uncompressed_frame(pts, duration, data);
- } else if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
- x264_http_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
- }
- if (global_flags.x264_separate_disk_encode) {
- x264_disk_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
+ if (http_encoder != nullptr) {
+ http_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
+ } if (disk_encoder != nullptr && disk_encoder != http_encoder) {
+ disk_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
}
if (v4l_output != nullptr) {
}
// Proxy object.
-QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *http_encoder, X264Encoder *disk_encoder, DiskSpaceEstimator *disk_space_estimator)
+QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, const AVOutputFormat *oformat, VideoCodecInterface *http_encoder, VideoCodecInterface *disk_encoder, DiskSpaceEstimator *disk_space_estimator)
: impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, http_encoder, disk_encoder, disk_space_estimator)) {}
// Must be defined here because unique_ptr<> destructor needs to know the impl.
impl->shutdown();
}
-void QuickSyncEncoder::set_stream_mux(Mux *mux)
+void QuickSyncEncoder::set_http_mux(Mux *mux)
+{
+ impl->set_http_mux(mux);
+}
+
+void QuickSyncEncoder::set_srt_mux(Mux *mux)
{
- impl->set_stream_mux(mux);
+ impl->set_srt_mux(mux);
}
int64_t QuickSyncEncoder::global_delay() const {