]> git.sesse.net Git - nageru/blobdiff - quicksync_encoder.cpp
Move QuickSyncEncoderImpl's definition into its own header.
[nageru] / quicksync_encoder.cpp
index cc4020f69508ffe9d5706be7a88be8b7cc240da9..7edb3cc03fccc67258f5ecc4066f7f4ab14a2d33 100644 (file)
 #include <va/va_enc_h264.h>
 #include <va/va_x11.h>
 #include <algorithm>
+#include <chrono>
 #include <condition_variable>
 #include <cstddef>
 #include <cstdint>
+#include <functional>
 #include <map>
 #include <memory>
 #include <mutex>
@@ -42,14 +44,19 @@ extern "C" {
 #include "audio_encoder.h"
 #include "context.h"
 #include "defs.h"
+#include "disk_space_estimator.h"
 #include "ffmpeg_raii.h"
 #include "flags.h"
 #include "mux.h"
+#include "print_latency.h"
+#include "quicksync_encoder_impl.h"
 #include "ref_counted_frame.h"
 #include "timebase.h"
 #include "x264_encoder.h"
 
 using namespace std;
+using namespace std::chrono;
+using namespace std::placeholders;
 
 class QOpenGLContext;
 class QSurface;
@@ -91,9 +98,6 @@ class QSurface;
 #define PROFILE_IDC_HIGH        100
    
 #define BITSTREAM_ALLOCATE_STEPPING     4096
-#define SURFACE_NUM 16 /* 16 surfaces for source YUV */
-#define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
-#define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
 
 static constexpr unsigned int MaxFrameNum = (2<<16);
 static constexpr unsigned int MaxPicOrderCntLsb = (2<<8);
@@ -112,61 +116,8 @@ static constexpr int rc_default_modes[] = {  // Priority list of modes.
 #define SRC_SURFACE_FREE        0
 #define SRC_SURFACE_IN_ENCODING 1
     
-struct __bitstream {
-    unsigned int *buffer;
-    int bit_offset;
-    int max_size_in_dword;
-};
-typedef struct __bitstream bitstream;
-
 using namespace std;
 
-// H.264 video comes out in encoding order (e.g. with two B-frames:
-// 0, 3, 1, 2, 6, 4, 5, etc.), but uncompressed video needs to
-// come in the right order. Since we do everything, including waiting
-// for the frames to come out of OpenGL, in encoding order, we need
-// a reordering buffer for uncompressed frames so that they come out
-// correctly. We go the super-lazy way of not making it understand
-// anything about the true order (which introduces some extra latency,
-// though); we know that for N B-frames we need at most (N-1) frames
-// in the reorder buffer, and can just sort on that.
-//
-// The class also deals with keeping a freelist as needed.
-class FrameReorderer {
-public:
-       FrameReorderer(unsigned queue_length, int width, int height);
-
-       struct Frame {
-               int64_t pts, duration;
-               uint8_t *data;
-
-               // Invert to get the smallest pts first.
-               bool operator< (const Frame &other) const { return pts > other.pts; }
-       };
-
-       // Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr.
-       // Does _not_ take ownership of data; a copy is taken if needed.
-       // The returned pointer is valid until the next call to reorder_frame, or destruction.
-       // As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
-       Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data);
-
-       // The same as reorder_frame, but without inserting anything. Used to empty the queue.
-       Frame get_first_frame();
-
-       bool empty() const { return frames.empty(); }
-
-private:
-       unsigned queue_length;
-       int width, height;
-
-       priority_queue<Frame> frames;
-       stack<uint8_t *> freelist;  // Includes the last value returned from reorder_frame.
-
-       // Owns all the pointers. Normally, freelist and frames could do this themselves,
-       // except priority_queue doesn't work well with movable-only types.
-       vector<unique_ptr<uint8_t[]>> owner;
-};
-
 FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height)
     : queue_length(queue_length), width(width), height(height)
 {
@@ -176,22 +127,22 @@ FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height)
        }
 }
 
-FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data)
+FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts)
 {
        if (queue_length == 0) {
-               return Frame{pts, duration, data};
+               return Frame{pts, duration, data, received_ts};
        }
 
        assert(!freelist.empty());
        uint8_t *storage = freelist.top();
        freelist.pop();
        memcpy(storage, data, width * height * 2);
-       frames.push(Frame{pts, duration, storage});
+       frames.push(Frame{pts, duration, storage, received_ts});
 
        if (frames.size() >= queue_length) {
                return get_first_frame();
        } else {
-               return Frame{-1, -1, nullptr};
+               return Frame{-1, -1, nullptr, steady_clock::time_point::min(), steady_clock::time_point::min()};
        }
 }
 
@@ -204,158 +155,6 @@ FrameReorderer::Frame FrameReorderer::get_first_frame()
        return storage;
 }
 
-class QuickSyncEncoderImpl {
-public:
-       QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder);
-       ~QuickSyncEncoderImpl();
-       void add_audio(int64_t pts, vector<float> audio);
-       bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
-       RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames);
-       void shutdown();
-       void release_gl_resources();
-       void set_stream_mux(Mux *mux)
-       {
-               stream_mux = mux;
-       }
-
-       // So we never get negative dts.
-       int64_t global_delay() const {
-               return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS);
-       }
-
-private:
-       struct storage_task {
-               unsigned long long display_order;
-               int frame_type;
-               vector<float> audio;
-               int64_t pts, dts, duration;
-       };
-       struct PendingFrame {
-               RefCountedGLsync fence;
-               vector<RefCountedFrame> input_frames;
-               int64_t pts, duration;
-       };
-
-       void open_output_file(const std::string &filename);
-       void encode_thread_func();
-       void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
-       void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data);
-       void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
-                         int frame_type, int64_t pts, int64_t dts, int64_t duration);
-       void storage_task_thread();
-       void storage_task_enqueue(storage_task task);
-       void save_codeddata(storage_task task);
-       int render_packedsequence();
-       int render_packedpicture();
-       void render_packedslice();
-       int render_sequence();
-       int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num);
-       void sps_rbsp(bitstream *bs);
-       void pps_rbsp(bitstream *bs);
-       int build_packed_pic_buffer(unsigned char **header_buffer);
-       int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type);
-       void slice_header(bitstream *bs);
-       int build_packed_seq_buffer(unsigned char **header_buffer);
-       int build_packed_slice_buffer(unsigned char **header_buffer);
-       int init_va(const string &va_display);
-       int deinit_va();
-       void enable_zerocopy_if_possible();
-       VADisplay va_open_display(const string &va_display);
-       void va_close_display(VADisplay va_dpy);
-       int setup_encode();
-       void release_encode();
-       void update_ReferenceFrames(int frame_type);
-       int update_RefPicList(int frame_type);
-
-       bool is_shutdown = false;
-       bool has_released_gl_resources = false;
-       bool use_zerocopy;
-       int drm_fd = -1;
-
-       thread encode_thread, storage_thread;
-
-       mutex storage_task_queue_mutex;
-       condition_variable storage_task_queue_changed;
-       int srcsurface_status[SURFACE_NUM];  // protected by storage_task_queue_mutex
-       queue<storage_task> storage_task_queue;  // protected by storage_task_queue_mutex
-       bool storage_thread_should_quit = false;  // protected by storage_task_queue_mutex
-
-       mutex frame_queue_mutex;
-       condition_variable frame_queue_nonempty;
-       bool encode_thread_should_quit = false;  // under frame_queue_mutex
-
-       int current_storage_frame;
-
-       map<int, PendingFrame> pending_video_frames;  // under frame_queue_mutex
-       movit::ResourcePool *resource_pool;
-       QSurface *surface;
-
-       unique_ptr<AudioEncoder> file_audio_encoder;
-
-       unique_ptr<FrameReorderer> reorderer;
-       X264Encoder *x264_encoder;  // nullptr if not using x264.
-
-       Mux* stream_mux = nullptr;  // To HTTP.
-       unique_ptr<Mux> file_mux;  // To local disk.
-
-       Display *x11_display = nullptr;
-
-       // Encoder parameters
-       VADisplay va_dpy;
-       VAProfile h264_profile = (VAProfile)~0;
-       VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
-       int config_attrib_num = 0, enc_packed_header_idx;
-
-       struct GLSurface {
-               VASurfaceID src_surface, ref_surface;
-               VABufferID coded_buf;
-
-               VAImage surface_image;
-               GLuint y_tex, cbcr_tex;
-
-               // Only if use_zerocopy == true.
-               EGLImage y_egl_image, cbcr_egl_image;
-
-               // Only if use_zerocopy == false.
-               GLuint pbo;
-               uint8_t *y_ptr, *cbcr_ptr;
-               size_t y_offset, cbcr_offset;
-       };
-       GLSurface gl_surfaces[SURFACE_NUM];
-
-       VAConfigID config_id;
-       VAContextID context_id;
-       VAEncSequenceParameterBufferH264 seq_param;
-       VAEncPictureParameterBufferH264 pic_param;
-       VAEncSliceParameterBufferH264 slice_param;
-       VAPictureH264 CurrentCurrPic;
-       VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2];
-
-       // Static quality settings.
-       static constexpr unsigned int frame_bitrate = 15000000 / 60;  // Doesn't really matter; only initial_qp does.
-       static constexpr unsigned int num_ref_frames = 2;
-       static constexpr int initial_qp = 15;
-       static constexpr int minimal_qp = 0;
-       static constexpr int intra_period = 30;
-       static constexpr int intra_idr_period = MAX_FPS;  // About a second; more at lower frame rates. Not ideal.
-
-       // Quality settings that are meant to be static, but might be overridden
-       // by the profile.
-       int constraint_set_flag = 0;
-       int h264_packedheader = 0; /* support pack header? */
-       int h264_maxref = (1<<16|1);
-       int h264_entropy_mode = 1; /* cabac */
-       int ip_period = 3;
-
-       int rc_mode = -1;
-       unsigned int current_frame_num = 0;
-       unsigned int numShortTerm = 0;
-
-       int frame_width;
-       int frame_height;
-       int frame_width_mbaligned;
-       int frame_height_mbaligned;
-};
 
 // Supposedly vaRenderPicture() is supposed to destroy the buffer implicitly,
 // but if we don't delete it here, we get leaks. The GStreamer implementation
@@ -1625,6 +1424,10 @@ void QuickSyncEncoderImpl::save_codeddata(storage_task task)
        }
        vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf);
 
+       static int frameno = 0;
+       print_latency("Current QuickSync latency (video inputs → disk mux):",
+               task.received_ts, (task.frame_type == FRAME_B), &frameno);
+
        {
                // Add video.
                AVPacket pkt;
@@ -1728,8 +1531,8 @@ int QuickSyncEncoderImpl::deinit_va()
     return 0;
 }
 
-QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder)
-       : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height)
+QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+       : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
 {
        file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
        open_output_file(filename);
@@ -1951,7 +1754,8 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
 
        string video_extradata = "";  // FIXME: See other comment about global headers.
        AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
-       file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE));
+       file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
+               std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1)));
 }
 
 void QuickSyncEncoderImpl::encode_thread_func()
@@ -2028,7 +1832,7 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num,
                                add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
                        } else {
                                assert(global_flags.x264_video_to_http);
-                               x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
+                               x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts);
                        }
                }
        }
@@ -2075,6 +1879,16 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
        } while (sync_status == GL_TIMEOUT_EXPIRED);
        assert(sync_status != GL_WAIT_FAILED);
 
+       // Find min and max timestamp of all input frames that have a timestamp.
+       steady_clock::time_point min_ts = steady_clock::time_point::max(), max_ts = steady_clock::time_point::min();
+       for (const RefCountedFrame &input_frame : frame.input_frames) {
+               if (input_frame && input_frame->received_timestamp > steady_clock::time_point::min()) {
+                       min_ts = min(min_ts, input_frame->received_timestamp);
+                       max_ts = max(max_ts, input_frame->received_timestamp);
+               }
+       }
+       const ReceivedTimestamps received_ts{ min_ts, max_ts };
+
        // Release back any input frames we needed to render this frame.
        frame.input_frames.clear();
 
@@ -2103,18 +1917,22 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
                    global_flags.x264_video_to_http) {
                        // Add uncompressed video. (Note that pts == dts here.)
                        // Delay needs to match audio.
-                       FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr));
+                       FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr), received_ts);
                        if (output_frame.data != nullptr) {
                                if (global_flags.uncompressed_video_to_http) {
                                        add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
                                } else {
                                        assert(global_flags.x264_video_to_http);
-                                       x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
+                                       x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts);
                                }
                        }
                }
        }
 
+       static int frameno = 0;
+       print_latency("Current mixer latency (video inputs → ready for encode):",
+               received_ts, (frame_type == FRAME_B), &frameno);
+
        va_status = vaDestroyImage(va_dpy, surf->surface_image.image_id);
        CHECK_VASTATUS(va_status, "vaDestroyImage");
 
@@ -2150,14 +1968,15 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
        tmp.pts = pts;
        tmp.dts = dts;
        tmp.duration = duration;
+       tmp.received_ts = received_ts;
        storage_task_enqueue(move(tmp));
 
        update_ReferenceFrames(frame_type);
 }
 
 // Proxy object.
-QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder)
-       : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder)) {}
+QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+       : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {}
 
 // Must be defined here because unique_ptr<> destructor needs to know the impl.
 QuickSyncEncoder::~QuickSyncEncoder() {}