X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=quicksync_encoder_impl.h;h=0dcb9cb0e0a3a374af7ca2a7eb5ecd90b26a0b41;hb=ffd68fbfb90242069af957f2a28908f0559f8348;hp=648169276cccb725e5cd641b4372dc443f44a477;hpb=dc7db20e764306a33dbf80cb50da7822a79cfda2;p=nageru diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h index 6481692..0dcb9cb 100644 --- a/quicksync_encoder_impl.h +++ b/quicksync_encoder_impl.h @@ -2,6 +2,7 @@ #define _QUICKSYNC_ENCODER_IMPL_H 1 #include +#include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include "audio_encoder.h" #include "defs.h" @@ -29,62 +31,16 @@ struct __bitstream { }; typedef struct __bitstream bitstream; -// H.264 video comes out in encoding order (e.g. with two B-frames: -// 0, 3, 1, 2, 6, 4, 5, etc.), but uncompressed video needs to -// come in the right order. Since we do everything, including waiting -// for the frames to come out of OpenGL, in encoding order, we need -// a reordering buffer for uncompressed frames so that they come out -// correctly. We go the super-lazy way of not making it understand -// anything about the true order (which introduces some extra latency, -// though); we know that for N B-frames we need at most (N-1) frames -// in the reorder buffer, and can just sort on that. -// -// The class also deals with keeping a freelist as needed. -class FrameReorderer { -public: - FrameReorderer(unsigned queue_length, int width, int height); - - struct Frame { - int64_t pts, duration; - uint8_t *data; - ReceivedTimestamps received_ts; - - // Invert to get the smallest pts first. - bool operator< (const Frame &other) const { return pts > other.pts; } - }; - - // Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr. - // Does _not_ take ownership of data; a copy is taken if needed. - // The returned pointer is valid until the next call to reorder_frame, or destruction. - // As a special case, if queue_length == 0, will just return pts and data (no reordering needed). - Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts); - - // The same as reorder_frame, but without inserting anything. Used to empty the queue. - Frame get_first_frame(); - - bool empty() const { return frames.empty(); } - -private: - unsigned queue_length; - int width, height; - - std::priority_queue frames; - std::stack freelist; // Includes the last value returned from reorder_frame. - - // Owns all the pointers. Normally, freelist and frames could do this themselves, - // except priority_queue doesn't work well with movable-only types. - std::vector> owner; -}; - - class QuickSyncEncoderImpl { public: QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator); ~QuickSyncEncoderImpl(); void add_audio(int64_t pts, std::vector audio); - bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); - RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector &input_frames); + bool is_zerocopy() const; + bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); + RefCountedGLsync end_frame(); void shutdown(); + void close_file(); void release_gl_resources(); void set_stream_mux(Mux *mux) { @@ -102,34 +58,67 @@ private: int frame_type; std::vector audio; int64_t pts, dts, duration; + movit::YCbCrLumaCoefficients ycbcr_coefficients; ReceivedTimestamps received_ts; + std::vector ref_display_frame_numbers; }; struct PendingFrame { RefCountedGLsync fence; std::vector input_frames; int64_t pts, duration; + movit::YCbCrLumaCoefficients ycbcr_coefficients; + }; + struct GLSurface { + // Only if x264_video_to_disk == false. + VASurfaceID src_surface, ref_surface; + VABufferID coded_buf; + VAImage surface_image; + + // Only if use_zerocopy == true (which implies x264_video_to_disk == false). + GLuint y_tex, cbcr_tex; + EGLImage y_egl_image, cbcr_egl_image; + + // Only if use_zerocopy == false. + GLuint pbo; + uint8_t *y_ptr, *cbcr_ptr; + size_t y_offset, cbcr_offset; + + // Surfaces can be busy (have refcount > 0) for a variety of + // reasons: First of all because they belong to a frame that's + // under encoding. But also reference frames take refcounts; + // while a frame is being encoded, all its reference frames + // also have increased refcounts so that they are not dropped. + // Similarly, just being in increases the + // refcount. Until it is back to zero, the surface cannot be given + // out for encoding another frame. Use release_gl_surface() + // to reduce the refcount, which will free the surface if + // the refcount reaches zero. + // + // Protected by storage_task_queue_mutex. + int refcount = 0; }; void open_output_file(const std::string &filename); void encode_thread_func(); void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts); void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data); + void pass_frame(PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration); void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration); + int frame_type, int64_t pts, int64_t dts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients); void storage_task_thread(); void storage_task_enqueue(storage_task task); - void save_codeddata(storage_task task); - int render_packedsequence(); + void save_codeddata(GLSurface *surf, storage_task task); + int render_packedsequence(movit::YCbCrLumaCoefficients ycbcr_coefficients); int render_packedpicture(); void render_packedslice(); int render_sequence(); - int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num); - void sps_rbsp(bitstream *bs); + int render_picture(GLSurface *surf, int frame_type, int display_frame_num, int gop_start_display_frame_num); + void sps_rbsp(movit::YCbCrLumaCoefficients ycbcr_coefficients, bitstream *bs); void pps_rbsp(bitstream *bs); int build_packed_pic_buffer(unsigned char **header_buffer); int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type); void slice_header(bitstream *bs); - int build_packed_seq_buffer(unsigned char **header_buffer); + int build_packed_seq_buffer(movit::YCbCrLumaCoefficients ycbcr_coefficients, unsigned char **header_buffer); int build_packed_slice_buffer(unsigned char **header_buffer); int init_va(const std::string &va_display); int deinit_va(); @@ -138,19 +127,21 @@ private: void va_close_display(VADisplay va_dpy); int setup_encode(); void release_encode(); - void update_ReferenceFrames(int frame_type); - int update_RefPicList(int frame_type); + void update_ReferenceFrames(int current_display_frame, int frame_type); + void update_RefPicList_P(VAPictureH264 RefPicList0_P[MAX_NUM_REF2]); + void update_RefPicList_B(VAPictureH264 RefPicList0_B[MAX_NUM_REF2], VAPictureH264 RefPicList1_B[MAX_NUM_REF2]); + GLSurface *allocate_gl_surface(); + void release_gl_surface(size_t display_frame_num); bool is_shutdown = false; bool has_released_gl_resources = false; - bool use_zerocopy; + std::atomic use_zerocopy; int drm_fd = -1; std::thread encode_thread, storage_thread; std::mutex storage_task_queue_mutex; std::condition_variable storage_task_queue_changed; - int srcsurface_status[SURFACE_NUM]; // protected by storage_task_queue_mutex std::queue storage_task_queue; // protected by storage_task_queue_mutex bool storage_thread_should_quit = false; // protected by storage_task_queue_mutex @@ -160,13 +151,20 @@ private: int current_storage_frame; - std::map pending_video_frames; // under frame_queue_mutex + PendingFrame current_video_frame; // Used only between begin_frame() and end_frame(). + std::queue pending_video_frames; // under frame_queue_mutex movit::ResourcePool *resource_pool; QSurface *surface; + // Frames that are done rendering and passed on to x264 (if enabled), + // but have not been encoded by Quick Sync yet, and thus also not freed. + // The key is the display frame number. + std::map reorder_buffer; + int quicksync_encoding_frame_num = 0; + + std::mutex file_audio_encoder_mutex; std::unique_ptr file_audio_encoder; - std::unique_ptr reorderer; X264Encoder *x264_encoder; // nullptr if not using x264. Mux* stream_mux = nullptr; // To HTTP. @@ -180,30 +178,25 @@ private: VAConfigAttrib config_attrib[VAConfigAttribTypeMax]; int config_attrib_num = 0, enc_packed_header_idx; - struct GLSurface { - VASurfaceID src_surface, ref_surface; - VABufferID coded_buf; - - VAImage surface_image; - GLuint y_tex, cbcr_tex; - - // Only if use_zerocopy == true. - EGLImage y_egl_image, cbcr_egl_image; - - // Only if use_zerocopy == false. - GLuint pbo; - uint8_t *y_ptr, *cbcr_ptr; - size_t y_offset, cbcr_offset; - }; GLSurface gl_surfaces[SURFACE_NUM]; + // For all frames in encoding (refcount > 0), a pointer into gl_surfaces + // for the surface used for that frame. Protected by storage_task_queue_mutex. + // The key is display frame number. + std::unordered_map surface_for_frame; + VAConfigID config_id; VAContextID context_id; VAEncSequenceParameterBufferH264 seq_param; VAEncPictureParameterBufferH264 pic_param; VAEncSliceParameterBufferH264 slice_param; VAPictureH264 CurrentCurrPic; - VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2]; + + struct ReferenceFrame { + VAPictureH264 pic; + int display_number; // To track reference counts. + }; + std::deque reference_frames; // Static quality settings. static constexpr unsigned int frame_bitrate = 15000000 / 60; // Doesn't really matter; only initial_qp does. @@ -221,9 +214,7 @@ private: int h264_entropy_mode = 1; /* cabac */ int ip_period = 3; - int rc_mode = -1; - unsigned int current_frame_num = 0; - unsigned int numShortTerm = 0; + unsigned int current_ref_frame_num = 0; // Encoding frame order within this GOP, sans B-frames. int frame_width; int frame_height;