X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=quicksync_encoder.cpp;h=875b264fdfe90342a0efcdfba78c38d33243ca66;hb=498987ac24daccf573b385107540f12929eb89b2;hp=7edb3cc03fccc67258f5ecc4066f7f4ab14a2d33;hpb=dc7db20e764306a33dbf80cb50da7822a79cfda2;p=nageru diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 7edb3cc..875b264 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -118,44 +118,6 @@ static constexpr int rc_default_modes[] = { // Priority list of modes. using namespace std; -FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height) - : queue_length(queue_length), width(width), height(height) -{ - for (unsigned i = 0; i < queue_length; ++i) { - owner.emplace_back(new uint8_t[width * height * 2]); - freelist.push(owner.back().get()); - } -} - -FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts) -{ - if (queue_length == 0) { - return Frame{pts, duration, data, received_ts}; - } - - assert(!freelist.empty()); - uint8_t *storage = freelist.top(); - freelist.pop(); - memcpy(storage, data, width * height * 2); - frames.push(Frame{pts, duration, storage, received_ts}); - - if (frames.size() >= queue_length) { - return get_first_frame(); - } else { - return Frame{-1, -1, nullptr, steady_clock::time_point::min(), steady_clock::time_point::min()}; - } -} - -FrameReorderer::Frame FrameReorderer::get_first_frame() -{ - assert(!frames.empty()); - Frame storage = frames.top(); - frames.pop(); - freelist.push(storage.data); - return storage; -} - - // Supposedly vaRenderPicture() is supposed to destroy the buffer implicitly, // but if we don't delete it here, we get leaks. The GStreamer implementation // does the same. @@ -360,6 +322,7 @@ void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) if ( false ) { bitstream_put_ui(bs, 0, 1); /* vui_parameters_present_flag */ } else { + // See H.264 annex E for the definition of this header. bitstream_put_ui(bs, 1, 1); /* vui_parameters_present_flag */ bitstream_put_ui(bs, 0, 1); /* aspect_ratio_info_present_flag */ bitstream_put_ui(bs, 0, 1); /* overscan_info_present_flag */ @@ -371,7 +334,11 @@ void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) { bitstream_put_ui(bs, 1, 8); /* colour_primaries (1 = BT.709) */ bitstream_put_ui(bs, 2, 8); /* transfer_characteristics (2 = unspecified, since we use sRGB) */ - bitstream_put_ui(bs, 6, 8); /* matrix_coefficients (6 = BT.601/SMPTE 170M) */ + if (global_flags.ycbcr_rec709_coefficients) { + bitstream_put_ui(bs, 1, 8); /* matrix_coefficients (1 = BT.709) */ + } else { + bitstream_put_ui(bs, 6, 8); /* matrix_coefficients (6 = BT.601/SMPTE 170M) */ + } } } bitstream_put_ui(bs, 0, 1); /* chroma_loc_info_present_flag */ @@ -1543,10 +1510,6 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::R //print_input(); - if (global_flags.uncompressed_video_to_http || - global_flags.x264_video_to_http) { - reorderer.reset(new FrameReorderer(ip_period - 1, frame_width, frame_height)); - } if (global_flags.x264_video_to_http) { assert(x264_encoder != nullptr); } else { @@ -1701,7 +1664,7 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame(int64_t pts, int64_t duration, { unique_lock lock(frame_queue_mutex); - pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts, duration }; + pending_video_frames.push(PendingFrame{ fence, input_frames, pts, duration }); ++current_storage_frame; } frame_queue_nonempty.notify_all(); @@ -1762,58 +1725,77 @@ void QuickSyncEncoderImpl::encode_thread_func() { int64_t last_dts = -1; int gop_start_display_frame_num = 0; - for (int encoding_frame_num = 0; ; ++encoding_frame_num) { + for (int display_frame_num = 0; ; ++display_frame_num) { + // Wait for the frame to be in the queue. Note that this only means + // we started rendering it. PendingFrame frame; - int pts_lag; - int frame_type, display_frame_num; - encoding2display_order(encoding_frame_num, intra_period, intra_idr_period, ip_period, - &display_frame_num, &frame_type, &pts_lag); - if (frame_type == FRAME_IDR) { - numShortTerm = 0; - current_frame_num = 0; - gop_start_display_frame_num = display_frame_num; - } - { unique_lock lock(frame_queue_mutex); - frame_queue_nonempty.wait(lock, [this, display_frame_num]{ - return encode_thread_should_quit || pending_video_frames.count(display_frame_num) != 0; + frame_queue_nonempty.wait(lock, [this]{ + return encode_thread_should_quit || !pending_video_frames.empty(); }); - if (encode_thread_should_quit && pending_video_frames.count(display_frame_num) == 0) { - // We have queued frames that were supposed to be B-frames, - // but will be no P-frame to encode them against. Encode them all - // as P-frames instead. Note that this happens under the mutex, + if (encode_thread_should_quit && pending_video_frames.empty()) { + // We may have queued frames left in the reorder buffer + // that were supposed to be B-frames, but have no P-frame + // to be encoded against. If so, encode them all as + // P-frames instead. Note that this happens under the mutex, // but nobody else uses it at this point, since we're shutting down, // so there's no contention. - encode_remaining_frames_as_p(encoding_frame_num, gop_start_display_frame_num, last_dts); + encode_remaining_frames_as_p(quicksync_encoding_frame_num, gop_start_display_frame_num, last_dts); return; } else { - frame = move(pending_video_frames[display_frame_num]); - pending_video_frames.erase(display_frame_num); + frame = move(pending_video_frames.front()); + pending_video_frames.pop(); } } - // Determine the dts of this frame. - int64_t dts; - if (pts_lag == -1) { - assert(last_dts != -1); - dts = last_dts + (TIMEBASE / MAX_FPS); - } else { - dts = frame.pts - pts_lag; - } - last_dts = dts; + // Pass the frame on to x264 (or uncompressed to HTTP) as needed. + // Note that this implicitly waits for the frame to be done rendering. + pass_frame(frame, display_frame_num, frame.pts, frame.duration); + reorder_buffer[display_frame_num] = move(frame); + + // Now encode as many QuickSync frames as we can using the frames we have available. + // (It could be zero, or it could be multiple.) FIXME: make a function. + for ( ;; ) { + int pts_lag; + int frame_type, quicksync_display_frame_num; + encoding2display_order(quicksync_encoding_frame_num, intra_period, intra_idr_period, ip_period, + &quicksync_display_frame_num, &frame_type, &pts_lag); + if (!reorder_buffer.count(quicksync_display_frame_num)) { + break; + } + frame = move(reorder_buffer[quicksync_display_frame_num]); + reorder_buffer.erase(quicksync_display_frame_num); - encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration); + if (frame_type == FRAME_IDR) { + numShortTerm = 0; + current_frame_num = 0; + gop_start_display_frame_num = quicksync_display_frame_num; + } + + // Determine the dts of this frame. + int64_t dts; + if (pts_lag == -1) { + assert(last_dts != -1); + dts = last_dts + (TIMEBASE / MAX_FPS); + } else { + dts = frame.pts - pts_lag; + } + last_dts = dts; + + encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration); + ++quicksync_encoding_frame_num; + } } } void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts) { - if (pending_video_frames.empty()) { + if (reorder_buffer.empty()) { return; } - for (auto &pending_frame : pending_video_frames) { + for (auto &pending_frame : reorder_buffer) { int display_frame_num = pending_frame.first; assert(display_frame_num > 0); PendingFrame frame = move(pending_frame.second); @@ -1822,20 +1804,6 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration); last_dts = dts; } - - if (global_flags.uncompressed_video_to_http || - global_flags.x264_video_to_http) { - // Add frames left in reorderer. - while (!reorderer->empty()) { - FrameReorderer::Frame output_frame = reorderer->get_first_frame(); - if (global_flags.uncompressed_video_to_http) { - add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data); - } else { - assert(global_flags.x264_video_to_http); - x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts); - } - } - } } void QuickSyncEncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data) @@ -1868,8 +1836,7 @@ void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_ } // namespace -void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration) +void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration) { // Wait for the GPU to be done with the frame. GLenum sync_status; @@ -1879,19 +1846,28 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame } while (sync_status == GL_TIMEOUT_EXPIRED); assert(sync_status != GL_WAIT_FAILED); - // Find min and max timestamp of all input frames that have a timestamp. - steady_clock::time_point min_ts = steady_clock::time_point::max(), max_ts = steady_clock::time_point::min(); - for (const RefCountedFrame &input_frame : frame.input_frames) { - if (input_frame && input_frame->received_timestamp > steady_clock::time_point::min()) { - min_ts = min(min_ts, input_frame->received_timestamp); - max_ts = max(max_ts, input_frame->received_timestamp); - } - } - const ReceivedTimestamps received_ts{ min_ts, max_ts }; + ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); + static int frameno = 0; + print_latency("Current mixer latency (video inputs → ready for encode):", + received_ts, false, &frameno); // Release back any input frames we needed to render this frame. frame.input_frames.clear(); + GLSurface *surf = &gl_surfaces[display_frame_num % SURFACE_NUM]; + uint8_t *data = reinterpret_cast(surf->y_ptr); + if (global_flags.uncompressed_video_to_http) { + add_packet_for_uncompressed_frame(pts, duration, data); + } else if (global_flags.x264_video_to_http) { + x264_encoder->add_frame(pts, duration, data, received_ts); + } +} + +void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, + int frame_type, int64_t pts, int64_t dts, int64_t duration) +{ + const ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); + GLSurface *surf = &gl_surfaces[display_frame_num % SURFACE_NUM]; VAStatus va_status; @@ -1901,6 +1877,7 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame va_status = vaReleaseBufferHandle(va_dpy, surf->surface_image.buf); CHECK_VASTATUS(va_status, "vaReleaseBufferHandle"); } else { + // Upload the frame to VA-API. unsigned char *surface_p = nullptr; vaMapBuffer(va_dpy, surf->surface_image.buf, (void **)&surface_p); @@ -1912,27 +1889,8 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame va_status = vaUnmapBuffer(va_dpy, surf->surface_image.buf); CHECK_VASTATUS(va_status, "vaUnmapBuffer"); - - if (global_flags.uncompressed_video_to_http || - global_flags.x264_video_to_http) { - // Add uncompressed video. (Note that pts == dts here.) - // Delay needs to match audio. - FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast(surf->y_ptr), received_ts); - if (output_frame.data != nullptr) { - if (global_flags.uncompressed_video_to_http) { - add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data); - } else { - assert(global_flags.x264_video_to_http); - x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts); - } - } - } } - static int frameno = 0; - print_latency("Current mixer latency (video inputs → ready for encode):", - received_ts, (frame_type == FRAME_B), &frameno); - va_status = vaDestroyImage(va_dpy, surf->surface_image.image_id); CHECK_VASTATUS(va_status, "vaDestroyImage");