X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=quicksync_encoder.cpp;h=875b264fdfe90342a0efcdfba78c38d33243ca66;hb=498987ac24daccf573b385107540f12929eb89b2;hp=41ace54263d1d235e8a02c4a905ac9fc34e8f3c0;hpb=3f34da3ebb9a6fd1ed267f9186d17433321a9214;p=nageru diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 41ace54..875b264 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -1,42 +1,62 @@ -//#include "sysdeps.h" #include "quicksync_encoder.h" +#include // Must be above the Xlib includes. #include + #include -#include #include #include #include -#include +#include #include #include #include -#include +#include #include #include #include #include #include #include +#include #include +#include #include +#include #include #include #include #include +#include #include #include #include +extern "C" { + +#include +#include +#include +#include + +} // namespace + #include "audio_encoder.h" #include "context.h" #include "defs.h" +#include "disk_space_estimator.h" +#include "ffmpeg_raii.h" #include "flags.h" #include "mux.h" +#include "print_latency.h" +#include "quicksync_encoder_impl.h" +#include "ref_counted_frame.h" #include "timebase.h" #include "x264_encoder.h" using namespace std; +using namespace std::chrono; +using namespace std::placeholders; class QOpenGLContext; class QSurface; @@ -78,9 +98,6 @@ class QSurface; #define PROFILE_IDC_HIGH 100 #define BITSTREAM_ALLOCATE_STEPPING 4096 -#define SURFACE_NUM 16 /* 16 surfaces for source YUV */ -#define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM -#define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM static constexpr unsigned int MaxFrameNum = (2<<16); static constexpr unsigned int MaxPicOrderCntLsb = (2<<8); @@ -99,247 +116,8 @@ static constexpr int rc_default_modes[] = { // Priority list of modes. #define SRC_SURFACE_FREE 0 #define SRC_SURFACE_IN_ENCODING 1 -struct __bitstream { - unsigned int *buffer; - int bit_offset; - int max_size_in_dword; -}; -typedef struct __bitstream bitstream; - using namespace std; -// H.264 video comes out in encoding order (e.g. with two B-frames: -// 0, 3, 1, 2, 6, 4, 5, etc.), but uncompressed video needs to -// come in the right order. Since we do everything, including waiting -// for the frames to come out of OpenGL, in encoding order, we need -// a reordering buffer for uncompressed frames so that they come out -// correctly. We go the super-lazy way of not making it understand -// anything about the true order (which introduces some extra latency, -// though); we know that for N B-frames we need at most (N-1) frames -// in the reorder buffer, and can just sort on that. -// -// The class also deals with keeping a freelist as needed. -class FrameReorderer { -public: - FrameReorderer(unsigned queue_length, int width, int height); - - struct Frame { - int64_t pts, duration; - uint8_t *data; - - // Invert to get the smallest pts first. - bool operator< (const Frame &other) const { return pts > other.pts; } - }; - - // Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr. - // Does _not_ take ownership of data; a copy is taken if needed. - // The returned pointer is valid until the next call to reorder_frame, or destruction. - // As a special case, if queue_length == 0, will just return pts and data (no reordering needed). - Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data); - - // The same as reorder_frame, but without inserting anything. Used to empty the queue. - Frame get_first_frame(); - - bool empty() const { return frames.empty(); } - -private: - unsigned queue_length; - int width, height; - - priority_queue frames; - stack freelist; // Includes the last value returned from reorder_frame. - - // Owns all the pointers. Normally, freelist and frames could do this themselves, - // except priority_queue doesn't work well with movable-only types. - vector> owner; -}; - -FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height) - : queue_length(queue_length), width(width), height(height) -{ - for (unsigned i = 0; i < queue_length; ++i) { - owner.emplace_back(new uint8_t[width * height * 2]); - freelist.push(owner.back().get()); - } -} - -FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data) -{ - if (queue_length == 0) { - return Frame{pts, duration, data}; - } - - assert(!freelist.empty()); - uint8_t *storage = freelist.top(); - freelist.pop(); - memcpy(storage, data, width * height * 2); - frames.push(Frame{pts, duration, storage}); - - if (frames.size() >= queue_length) { - return get_first_frame(); - } else { - return Frame{-1, -1, nullptr}; - } -} - -FrameReorderer::Frame FrameReorderer::get_first_frame() -{ - assert(!frames.empty()); - Frame storage = frames.top(); - frames.pop(); - freelist.push(storage.data); - return storage; -} - -class QuickSyncEncoderImpl { -public: - QuickSyncEncoderImpl(const std::string &filename, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder); - ~QuickSyncEncoderImpl(); - void add_audio(int64_t pts, vector audio); - bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); - RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector &input_frames); - void shutdown(); - -private: - struct storage_task { - unsigned long long display_order; - int frame_type; - vector audio; - int64_t pts, dts, duration; - }; - struct PendingFrame { - RefCountedGLsync fence; - vector input_frames; - int64_t pts, duration; - }; - - // So we never get negative dts. - int64_t global_delay() const { - return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS); - } - - void open_output_file(const std::string &filename); - void encode_thread_func(); - void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts); - void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data); - void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration); - void storage_task_thread(); - void encode_remaining_audio(); - void storage_task_enqueue(storage_task task); - void save_codeddata(storage_task task); - int render_packedsequence(); - int render_packedpicture(); - void render_packedslice(); - int render_sequence(); - int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num); - void sps_rbsp(bitstream *bs); - void pps_rbsp(bitstream *bs); - int build_packed_pic_buffer(unsigned char **header_buffer); - int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type); - void slice_header(bitstream *bs); - int build_packed_seq_buffer(unsigned char **header_buffer); - int build_packed_slice_buffer(unsigned char **header_buffer); - int init_va(const string &va_display); - int deinit_va(); - void enable_zerocopy_if_possible(); - VADisplay va_open_display(const string &va_display); - void va_close_display(VADisplay va_dpy); - int setup_encode(); - int release_encode(); - void update_ReferenceFrames(int frame_type); - int update_RefPicList(int frame_type); - - bool is_shutdown = false; - bool use_zerocopy; - int drm_fd = -1; - - thread encode_thread, storage_thread; - - mutex storage_task_queue_mutex; - condition_variable storage_task_queue_changed; - int srcsurface_status[SURFACE_NUM]; // protected by storage_task_queue_mutex - queue storage_task_queue; // protected by storage_task_queue_mutex - bool storage_thread_should_quit = false; // protected by storage_task_queue_mutex - - mutex frame_queue_mutex; - condition_variable frame_queue_nonempty; - bool encode_thread_should_quit = false; // under frame_queue_mutex - - int current_storage_frame; - - map pending_video_frames; // under frame_queue_mutex - map> pending_audio_frames; // under frame_queue_mutex - QSurface *surface; - - unique_ptr file_audio_encoder; - AudioEncoder *stream_audio_encoder; - - unique_ptr reorderer; - X264Encoder *x264_encoder; // nullptr if not using x264. - - Mux* stream_mux; // To HTTP. - unique_ptr file_mux; // To local disk. - - Display *x11_display = nullptr; - - // Encoder parameters - VADisplay va_dpy; - VAProfile h264_profile = (VAProfile)~0; - VAConfigAttrib config_attrib[VAConfigAttribTypeMax]; - int config_attrib_num = 0, enc_packed_header_idx; - - struct GLSurface { - VASurfaceID src_surface, ref_surface; - VABufferID coded_buf; - - VAImage surface_image; - GLuint y_tex, cbcr_tex; - - // Only if use_zerocopy == true. - EGLImage y_egl_image, cbcr_egl_image; - - // Only if use_zerocopy == false. - GLuint pbo; - uint8_t *y_ptr, *cbcr_ptr; - size_t y_offset, cbcr_offset; - }; - GLSurface gl_surfaces[SURFACE_NUM]; - - VAConfigID config_id; - VAContextID context_id; - VAEncSequenceParameterBufferH264 seq_param; - VAEncPictureParameterBufferH264 pic_param; - VAEncSliceParameterBufferH264 slice_param; - VAPictureH264 CurrentCurrPic; - VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2]; - - // Static quality settings. - static constexpr unsigned int frame_bitrate = 15000000 / 60; // Doesn't really matter; only initial_qp does. - static constexpr unsigned int num_ref_frames = 2; - static constexpr int initial_qp = 15; - static constexpr int minimal_qp = 0; - static constexpr int intra_period = 30; - static constexpr int intra_idr_period = MAX_FPS; // About a second; more at lower frame rates. Not ideal. - - // Quality settings that are meant to be static, but might be overridden - // by the profile. - int constraint_set_flag = 0; - int h264_packedheader = 0; /* support pack header? */ - int h264_maxref = (1<<16|1); - int h264_entropy_mode = 1; /* cabac */ - int ip_period = 3; - - int rc_mode = -1; - unsigned int current_frame_num = 0; - unsigned int numShortTerm = 0; - - int frame_width; - int frame_height; - int frame_width_mbaligned; - int frame_height_mbaligned; -}; - // Supposedly vaRenderPicture() is supposed to destroy the buffer implicitly, // but if we don't delete it here, we get leaks. The GStreamer implementation // does the same. @@ -544,6 +322,7 @@ void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) if ( false ) { bitstream_put_ui(bs, 0, 1); /* vui_parameters_present_flag */ } else { + // See H.264 annex E for the definition of this header. bitstream_put_ui(bs, 1, 1); /* vui_parameters_present_flag */ bitstream_put_ui(bs, 0, 1); /* aspect_ratio_info_present_flag */ bitstream_put_ui(bs, 0, 1); /* overscan_info_present_flag */ @@ -555,7 +334,11 @@ void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) { bitstream_put_ui(bs, 1, 8); /* colour_primaries (1 = BT.709) */ bitstream_put_ui(bs, 2, 8); /* transfer_characteristics (2 = unspecified, since we use sRGB) */ - bitstream_put_ui(bs, 6, 8); /* matrix_coefficients (6 = BT.601/SMPTE 170M) */ + if (global_flags.ycbcr_rec709_coefficients) { + bitstream_put_ui(bs, 1, 8); /* matrix_coefficients (1 = BT.709) */ + } else { + bitstream_put_ui(bs, 6, 8); /* matrix_coefficients (6 = BT.601/SMPTE 170M) */ + } } } bitstream_put_ui(bs, 0, 1); /* chroma_loc_info_present_flag */ @@ -1149,8 +932,8 @@ int QuickSyncEncoderImpl::setup_encode() VAStatus va_status; VASurfaceID *tmp_surfaceid; int codedbuf_size, i; - static VASurfaceID src_surface[SURFACE_NUM]; - static VASurfaceID ref_surface[SURFACE_NUM]; + VASurfaceID src_surface[SURFACE_NUM]; + VASurfaceID ref_surface[SURFACE_NUM]; va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, &config_attrib[0], config_attrib_num, &config_id); @@ -1201,17 +984,12 @@ int QuickSyncEncoderImpl::setup_encode() //glGenFramebuffers(SURFACE_NUM, fbos); for (i = 0; i < SURFACE_NUM; i++) { - glGenTextures(1, &gl_surfaces[i].y_tex); - glGenTextures(1, &gl_surfaces[i].cbcr_tex); - - if (!use_zerocopy) { - // Create Y image. - glBindTexture(GL_TEXTURE_2D, gl_surfaces[i].y_tex); - glTexStorage2D(GL_TEXTURE_2D, 1, GL_R8, frame_width, frame_height); - - // Create CbCr image. - glBindTexture(GL_TEXTURE_2D, gl_surfaces[i].cbcr_tex); - glTexStorage2D(GL_TEXTURE_2D, 1, GL_RG8, frame_width / 2, frame_height / 2); + if (use_zerocopy) { + gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1); + gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1); + } else { + gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height); + gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2); // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API // buffers, due to potentially differing pitch. @@ -1613,6 +1391,10 @@ void QuickSyncEncoderImpl::save_codeddata(storage_task task) } vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf); + static int frameno = 0; + print_latency("Current QuickSync latency (video inputs → disk mux):", + task.received_ts, (task.frame_type == FRAME_B), &frameno); + { // Add video. AVPacket pkt; @@ -1635,26 +1417,6 @@ void QuickSyncEncoderImpl::save_codeddata(storage_task task) stream_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay()); } } - // Encode and add all audio frames up to and including the pts of this video frame. - for ( ;; ) { - int64_t audio_pts; - vector audio; - { - unique_lock lock(frame_queue_mutex); - frame_queue_nonempty.wait(lock, [this]{ return storage_thread_should_quit || !pending_audio_frames.empty(); }); - if (storage_thread_should_quit && pending_audio_frames.empty()) return; - auto it = pending_audio_frames.begin(); - if (it->first > task.pts) break; - audio_pts = it->first; - audio = move(it->second); - pending_audio_frames.erase(it); - } - - file_audio_encoder->encode_audio(audio, audio_pts + global_delay()); - stream_audio_encoder->encode_audio(audio, audio_pts + global_delay()); - - if (audio_pts == task.pts) break; - } } @@ -1694,27 +1456,37 @@ void QuickSyncEncoderImpl::storage_task_thread() } } -int QuickSyncEncoderImpl::release_encode() +void QuickSyncEncoderImpl::release_encode() { for (unsigned i = 0; i < SURFACE_NUM; i++) { vaDestroyBuffer(va_dpy, gl_surfaces[i].coded_buf); vaDestroySurfaces(va_dpy, &gl_surfaces[i].src_surface, 1); vaDestroySurfaces(va_dpy, &gl_surfaces[i].ref_surface, 1); + } + + vaDestroyContext(va_dpy, context_id); + vaDestroyConfig(va_dpy, config_id); +} + +void QuickSyncEncoderImpl::release_gl_resources() +{ + assert(is_shutdown); + if (has_released_gl_resources) { + return; + } + for (unsigned i = 0; i < SURFACE_NUM; i++) { if (!use_zerocopy) { glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo); glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glDeleteBuffers(1, &gl_surfaces[i].pbo); } - glDeleteTextures(1, &gl_surfaces[i].y_tex); - glDeleteTextures(1, &gl_surfaces[i].cbcr_tex); + resource_pool->release_2d_texture(gl_surfaces[i].y_tex); + resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex); } - vaDestroyContext(va_dpy, context_id); - vaDestroyConfig(va_dpy, config_id); - - return 0; + has_released_gl_resources = true; } int QuickSyncEncoderImpl::deinit_va() @@ -1726,14 +1498,10 @@ int QuickSyncEncoderImpl::deinit_va() return 0; } -namespace { - -} // namespace - -QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder) - : current_storage_frame(0), surface(surface), stream_audio_encoder(stream_audio_encoder), x264_encoder(x264_encoder), stream_mux(stream_mux), frame_width(width), frame_height(height) +QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) + : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator) { - file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE)); + file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); open_output_file(filename); file_audio_encoder->add_mux(file_mux.get()); @@ -1742,10 +1510,6 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, QSurface //print_input(); - if (global_flags.uncompressed_video_to_http || - global_flags.x264_video_to_http) { - reorderer.reset(new FrameReorderer(ip_period - 1, frame_width, frame_height)); - } if (global_flags.x264_video_to_http) { assert(x264_encoder != nullptr); } else { @@ -1774,12 +1538,14 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, QSurface exit(1); } encode_thread_func(); + delete_context(context); }); } QuickSyncEncoderImpl::~QuickSyncEncoderImpl() { shutdown(); + release_gl_resources(); } bool QuickSyncEncoderImpl::begin_frame(GLuint *y_tex, GLuint *cbcr_tex) @@ -1856,11 +1622,7 @@ bool QuickSyncEncoderImpl::begin_frame(GLuint *y_tex, GLuint *cbcr_tex) void QuickSyncEncoderImpl::add_audio(int64_t pts, vector audio) { assert(!is_shutdown); - { - unique_lock lock(frame_queue_mutex); - pending_audio_frames[pts] = move(audio); - } - frame_queue_nonempty.notify_all(); + file_audio_encoder->encode_audio(audio, pts + global_delay()); } RefCountedGLsync QuickSyncEncoderImpl::end_frame(int64_t pts, int64_t duration, const vector &input_frames) @@ -1902,7 +1664,7 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame(int64_t pts, int64_t duration, { unique_lock lock(frame_queue_mutex); - pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts, duration }; + pending_video_frames.push(PendingFrame{ fence, input_frames, pts, duration }); ++current_storage_frame; } frame_queue_nonempty.notify_all(); @@ -1928,7 +1690,9 @@ void QuickSyncEncoderImpl::shutdown() storage_task_queue_changed.notify_all(); } storage_thread.join(); - encode_remaining_audio(); + + // Encode any leftover audio in the queues, and also any delayed frames. + file_audio_encoder->encode_last_audio(); release_encode(); deinit_va(); @@ -1951,65 +1715,87 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename) exit(1); } - file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, file_audio_encoder->get_codec(), TIMEBASE, DEFAULT_AUDIO_OUTPUT_BIT_RATE, nullptr)); + string video_extradata = ""; // FIXME: See other comment about global headers. + AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters(); + file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE, + std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1))); } void QuickSyncEncoderImpl::encode_thread_func() { int64_t last_dts = -1; int gop_start_display_frame_num = 0; - for (int encoding_frame_num = 0; ; ++encoding_frame_num) { + for (int display_frame_num = 0; ; ++display_frame_num) { + // Wait for the frame to be in the queue. Note that this only means + // we started rendering it. PendingFrame frame; - int pts_lag; - int frame_type, display_frame_num; - encoding2display_order(encoding_frame_num, intra_period, intra_idr_period, ip_period, - &display_frame_num, &frame_type, &pts_lag); - if (frame_type == FRAME_IDR) { - numShortTerm = 0; - current_frame_num = 0; - gop_start_display_frame_num = display_frame_num; - } - { unique_lock lock(frame_queue_mutex); - frame_queue_nonempty.wait(lock, [this, display_frame_num]{ - return encode_thread_should_quit || pending_video_frames.count(display_frame_num) != 0; + frame_queue_nonempty.wait(lock, [this]{ + return encode_thread_should_quit || !pending_video_frames.empty(); }); - if (encode_thread_should_quit && pending_video_frames.count(display_frame_num) == 0) { - // We have queued frames that were supposed to be B-frames, - // but will be no P-frame to encode them against. Encode them all - // as P-frames instead. Note that this happens under the mutex, + if (encode_thread_should_quit && pending_video_frames.empty()) { + // We may have queued frames left in the reorder buffer + // that were supposed to be B-frames, but have no P-frame + // to be encoded against. If so, encode them all as + // P-frames instead. Note that this happens under the mutex, // but nobody else uses it at this point, since we're shutting down, // so there's no contention. - encode_remaining_frames_as_p(encoding_frame_num, gop_start_display_frame_num, last_dts); + encode_remaining_frames_as_p(quicksync_encoding_frame_num, gop_start_display_frame_num, last_dts); return; } else { - frame = move(pending_video_frames[display_frame_num]); - pending_video_frames.erase(display_frame_num); + frame = move(pending_video_frames.front()); + pending_video_frames.pop(); } } - // Determine the dts of this frame. - int64_t dts; - if (pts_lag == -1) { - assert(last_dts != -1); - dts = last_dts + (TIMEBASE / MAX_FPS); - } else { - dts = frame.pts - pts_lag; - } - last_dts = dts; + // Pass the frame on to x264 (or uncompressed to HTTP) as needed. + // Note that this implicitly waits for the frame to be done rendering. + pass_frame(frame, display_frame_num, frame.pts, frame.duration); + reorder_buffer[display_frame_num] = move(frame); + + // Now encode as many QuickSync frames as we can using the frames we have available. + // (It could be zero, or it could be multiple.) FIXME: make a function. + for ( ;; ) { + int pts_lag; + int frame_type, quicksync_display_frame_num; + encoding2display_order(quicksync_encoding_frame_num, intra_period, intra_idr_period, ip_period, + &quicksync_display_frame_num, &frame_type, &pts_lag); + if (!reorder_buffer.count(quicksync_display_frame_num)) { + break; + } + frame = move(reorder_buffer[quicksync_display_frame_num]); + reorder_buffer.erase(quicksync_display_frame_num); - encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration); + if (frame_type == FRAME_IDR) { + numShortTerm = 0; + current_frame_num = 0; + gop_start_display_frame_num = quicksync_display_frame_num; + } + + // Determine the dts of this frame. + int64_t dts; + if (pts_lag == -1) { + assert(last_dts != -1); + dts = last_dts + (TIMEBASE / MAX_FPS); + } else { + dts = frame.pts - pts_lag; + } + last_dts = dts; + + encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration); + ++quicksync_encoding_frame_num; + } } } void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts) { - if (pending_video_frames.empty()) { + if (reorder_buffer.empty()) { return; } - for (auto &pending_frame : pending_video_frames) { + for (auto &pending_frame : reorder_buffer) { int display_frame_num = pending_frame.first; assert(display_frame_num > 0); PendingFrame frame = move(pending_frame.second); @@ -2018,39 +1804,6 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration); last_dts = dts; } - - if (global_flags.uncompressed_video_to_http || - global_flags.x264_video_to_http) { - // Add frames left in reorderer. - while (!reorderer->empty()) { - FrameReorderer::Frame output_frame = reorderer->get_first_frame(); - if (global_flags.uncompressed_video_to_http) { - add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data); - } else { - assert(global_flags.x264_video_to_http); - x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data); - } - } - } -} - -void QuickSyncEncoderImpl::encode_remaining_audio() -{ - // This really ought to be empty by now, but just to be sure... - for (auto &pending_frame : pending_audio_frames) { - int64_t audio_pts = pending_frame.first; - vector audio = move(pending_frame.second); - - file_audio_encoder->encode_audio(audio, audio_pts + global_delay()); - if (stream_audio_encoder) { - stream_audio_encoder->encode_audio(audio, audio_pts + global_delay()); - } - } - pending_audio_frames.clear(); - - // Encode any leftover audio in the queues, and also any delayed frames. - // Note: stream_audio_encoder is not owned by us, so don't call encode_last_audio(). - file_audio_encoder->encode_last_audio(); } void QuickSyncEncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data) @@ -2083,8 +1836,7 @@ void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_ } // namespace -void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration) +void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration) { // Wait for the GPU to be done with the frame. GLenum sync_status; @@ -2094,9 +1846,28 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame } while (sync_status == GL_TIMEOUT_EXPIRED); assert(sync_status != GL_WAIT_FAILED); + ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); + static int frameno = 0; + print_latency("Current mixer latency (video inputs → ready for encode):", + received_ts, false, &frameno); + // Release back any input frames we needed to render this frame. frame.input_frames.clear(); + GLSurface *surf = &gl_surfaces[display_frame_num % SURFACE_NUM]; + uint8_t *data = reinterpret_cast(surf->y_ptr); + if (global_flags.uncompressed_video_to_http) { + add_packet_for_uncompressed_frame(pts, duration, data); + } else if (global_flags.x264_video_to_http) { + x264_encoder->add_frame(pts, duration, data, received_ts); + } +} + +void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, + int frame_type, int64_t pts, int64_t dts, int64_t duration) +{ + const ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); + GLSurface *surf = &gl_surfaces[display_frame_num % SURFACE_NUM]; VAStatus va_status; @@ -2106,6 +1877,7 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame va_status = vaReleaseBufferHandle(va_dpy, surf->surface_image.buf); CHECK_VASTATUS(va_status, "vaReleaseBufferHandle"); } else { + // Upload the frame to VA-API. unsigned char *surface_p = nullptr; vaMapBuffer(va_dpy, surf->surface_image.buf, (void **)&surface_p); @@ -2117,21 +1889,6 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame va_status = vaUnmapBuffer(va_dpy, surf->surface_image.buf); CHECK_VASTATUS(va_status, "vaUnmapBuffer"); - - if (global_flags.uncompressed_video_to_http || - global_flags.x264_video_to_http) { - // Add uncompressed video. (Note that pts == dts here.) - // Delay needs to match audio. - FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast(surf->y_ptr)); - if (output_frame.data != nullptr) { - if (global_flags.uncompressed_video_to_http) { - add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data); - } else { - assert(global_flags.x264_video_to_http); - x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data); - } - } - } } va_status = vaDestroyImage(va_dpy, surf->surface_image.image_id); @@ -2143,6 +1900,9 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame CHECK_VASTATUS(va_status, "vaBeginPicture"); if (frame_type == FRAME_IDR) { + // FIXME: If the mux wants global headers, we should not put the + // SPS/PPS before each IDR frame, but rather put it into the + // codec extradata (formatted differently?). render_sequence(); render_picture(frame_type, display_frame_num, gop_start_display_frame_num); if (h264_packedheader) { @@ -2166,14 +1926,15 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame tmp.pts = pts; tmp.dts = dts; tmp.duration = duration; + tmp.received_ts = received_ts; storage_task_enqueue(move(tmp)); update_ReferenceFrames(frame_type); } // Proxy object. -QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder) - : impl(new QuickSyncEncoderImpl(filename, surface, va_display, width, height, stream_mux, stream_audio_encoder, x264_encoder)) {} +QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) + : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {} // Must be defined here because unique_ptr<> destructor needs to know the impl. QuickSyncEncoder::~QuickSyncEncoder() {} @@ -2197,3 +1958,12 @@ void QuickSyncEncoder::shutdown() { impl->shutdown(); } + +void QuickSyncEncoder::set_stream_mux(Mux *mux) +{ + impl->set_stream_mux(mux); +} + +int64_t QuickSyncEncoder::global_delay() const { + return impl->global_delay(); +}