From: Steinar H. Gunderson Date: Sun, 11 Dec 2016 12:37:50 +0000 (+0100) Subject: Move QuickSyncEncoderImpl's definition into its own header. X-Git-Tag: 1.5.0~94 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=dc7db20e764306a33dbf80cb50da7822a79cfda2;p=nageru Move QuickSyncEncoderImpl's definition into its own header. It is only used from that single file (naturally enough), but it ended up cluttering the .cpp file a lot. --- diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 7dbdb57..7edb3cc 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -49,6 +49,7 @@ extern "C" { #include "flags.h" #include "mux.h" #include "print_latency.h" +#include "quicksync_encoder_impl.h" #include "ref_counted_frame.h" #include "timebase.h" #include "x264_encoder.h" @@ -97,9 +98,6 @@ class QSurface; #define PROFILE_IDC_HIGH 100 #define BITSTREAM_ALLOCATE_STEPPING 4096 -#define SURFACE_NUM 16 /* 16 surfaces for source YUV */ -#define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM -#define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM static constexpr unsigned int MaxFrameNum = (2<<16); static constexpr unsigned int MaxPicOrderCntLsb = (2<<8); @@ -118,62 +116,8 @@ static constexpr int rc_default_modes[] = { // Priority list of modes. #define SRC_SURFACE_FREE 0 #define SRC_SURFACE_IN_ENCODING 1 -struct __bitstream { - unsigned int *buffer; - int bit_offset; - int max_size_in_dword; -}; -typedef struct __bitstream bitstream; - using namespace std; -// H.264 video comes out in encoding order (e.g. with two B-frames: -// 0, 3, 1, 2, 6, 4, 5, etc.), but uncompressed video needs to -// come in the right order. Since we do everything, including waiting -// for the frames to come out of OpenGL, in encoding order, we need -// a reordering buffer for uncompressed frames so that they come out -// correctly. We go the super-lazy way of not making it understand -// anything about the true order (which introduces some extra latency, -// though); we know that for N B-frames we need at most (N-1) frames -// in the reorder buffer, and can just sort on that. -// -// The class also deals with keeping a freelist as needed. -class FrameReorderer { -public: - FrameReorderer(unsigned queue_length, int width, int height); - - struct Frame { - int64_t pts, duration; - uint8_t *data; - ReceivedTimestamps received_ts; - - // Invert to get the smallest pts first. - bool operator< (const Frame &other) const { return pts > other.pts; } - }; - - // Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr. - // Does _not_ take ownership of data; a copy is taken if needed. - // The returned pointer is valid until the next call to reorder_frame, or destruction. - // As a special case, if queue_length == 0, will just return pts and data (no reordering needed). - Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts); - - // The same as reorder_frame, but without inserting anything. Used to empty the queue. - Frame get_first_frame(); - - bool empty() const { return frames.empty(); } - -private: - unsigned queue_length; - int width, height; - - priority_queue frames; - stack freelist; // Includes the last value returned from reorder_frame. - - // Owns all the pointers. Normally, freelist and frames could do this themselves, - // except priority_queue doesn't work well with movable-only types. - vector> owner; -}; - FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height) : queue_length(queue_length), width(width), height(height) { @@ -211,161 +155,6 @@ FrameReorderer::Frame FrameReorderer::get_first_frame() return storage; } -class QuickSyncEncoderImpl { -public: - QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator); - ~QuickSyncEncoderImpl(); - void add_audio(int64_t pts, vector audio); - bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); - RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector &input_frames); - void shutdown(); - void release_gl_resources(); - void set_stream_mux(Mux *mux) - { - stream_mux = mux; - } - - // So we never get negative dts. - int64_t global_delay() const { - return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS); - } - -private: - struct storage_task { - unsigned long long display_order; - int frame_type; - vector audio; - int64_t pts, dts, duration; - ReceivedTimestamps received_ts; - }; - struct PendingFrame { - RefCountedGLsync fence; - vector input_frames; - int64_t pts, duration; - }; - - void open_output_file(const std::string &filename); - void encode_thread_func(); - void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts); - void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data); - void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration); - void storage_task_thread(); - void storage_task_enqueue(storage_task task); - void save_codeddata(storage_task task); - int render_packedsequence(); - int render_packedpicture(); - void render_packedslice(); - int render_sequence(); - int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num); - void sps_rbsp(bitstream *bs); - void pps_rbsp(bitstream *bs); - int build_packed_pic_buffer(unsigned char **header_buffer); - int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type); - void slice_header(bitstream *bs); - int build_packed_seq_buffer(unsigned char **header_buffer); - int build_packed_slice_buffer(unsigned char **header_buffer); - int init_va(const string &va_display); - int deinit_va(); - void enable_zerocopy_if_possible(); - VADisplay va_open_display(const string &va_display); - void va_close_display(VADisplay va_dpy); - int setup_encode(); - void release_encode(); - void update_ReferenceFrames(int frame_type); - int update_RefPicList(int frame_type); - - bool is_shutdown = false; - bool has_released_gl_resources = false; - bool use_zerocopy; - int drm_fd = -1; - - thread encode_thread, storage_thread; - - mutex storage_task_queue_mutex; - condition_variable storage_task_queue_changed; - int srcsurface_status[SURFACE_NUM]; // protected by storage_task_queue_mutex - queue storage_task_queue; // protected by storage_task_queue_mutex - bool storage_thread_should_quit = false; // protected by storage_task_queue_mutex - - mutex frame_queue_mutex; - condition_variable frame_queue_nonempty; - bool encode_thread_should_quit = false; // under frame_queue_mutex - - int current_storage_frame; - - map pending_video_frames; // under frame_queue_mutex - movit::ResourcePool *resource_pool; - QSurface *surface; - - unique_ptr file_audio_encoder; - - unique_ptr reorderer; - X264Encoder *x264_encoder; // nullptr if not using x264. - - Mux* stream_mux = nullptr; // To HTTP. - unique_ptr file_mux; // To local disk. - - Display *x11_display = nullptr; - - // Encoder parameters - VADisplay va_dpy; - VAProfile h264_profile = (VAProfile)~0; - VAConfigAttrib config_attrib[VAConfigAttribTypeMax]; - int config_attrib_num = 0, enc_packed_header_idx; - - struct GLSurface { - VASurfaceID src_surface, ref_surface; - VABufferID coded_buf; - - VAImage surface_image; - GLuint y_tex, cbcr_tex; - - // Only if use_zerocopy == true. - EGLImage y_egl_image, cbcr_egl_image; - - // Only if use_zerocopy == false. - GLuint pbo; - uint8_t *y_ptr, *cbcr_ptr; - size_t y_offset, cbcr_offset; - }; - GLSurface gl_surfaces[SURFACE_NUM]; - - VAConfigID config_id; - VAContextID context_id; - VAEncSequenceParameterBufferH264 seq_param; - VAEncPictureParameterBufferH264 pic_param; - VAEncSliceParameterBufferH264 slice_param; - VAPictureH264 CurrentCurrPic; - VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2]; - - // Static quality settings. - static constexpr unsigned int frame_bitrate = 15000000 / 60; // Doesn't really matter; only initial_qp does. - static constexpr unsigned int num_ref_frames = 2; - static constexpr int initial_qp = 15; - static constexpr int minimal_qp = 0; - static constexpr int intra_period = 30; - static constexpr int intra_idr_period = MAX_FPS; // About a second; more at lower frame rates. Not ideal. - - // Quality settings that are meant to be static, but might be overridden - // by the profile. - int constraint_set_flag = 0; - int h264_packedheader = 0; /* support pack header? */ - int h264_maxref = (1<<16|1); - int h264_entropy_mode = 1; /* cabac */ - int ip_period = 3; - - int rc_mode = -1; - unsigned int current_frame_num = 0; - unsigned int numShortTerm = 0; - - int frame_width; - int frame_height; - int frame_width_mbaligned; - int frame_height_mbaligned; - - DiskSpaceEstimator *disk_space_estimator; -}; // Supposedly vaRenderPicture() is supposed to destroy the buffer implicitly, // but if we don't delete it here, we get leaks. The GStreamer implementation diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h new file mode 100644 index 0000000..6481692 --- /dev/null +++ b/quicksync_encoder_impl.h @@ -0,0 +1,236 @@ +#ifndef _QUICKSYNC_ENCODER_IMPL_H +#define _QUICKSYNC_ENCODER_IMPL_H 1 + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "audio_encoder.h" +#include "defs.h" +#include "timebase.h" +#include "print_latency.h" + +#define SURFACE_NUM 16 /* 16 surfaces for source YUV */ +#define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM +#define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM + +struct __bitstream { + unsigned int *buffer; + int bit_offset; + int max_size_in_dword; +}; +typedef struct __bitstream bitstream; + +// H.264 video comes out in encoding order (e.g. with two B-frames: +// 0, 3, 1, 2, 6, 4, 5, etc.), but uncompressed video needs to +// come in the right order. Since we do everything, including waiting +// for the frames to come out of OpenGL, in encoding order, we need +// a reordering buffer for uncompressed frames so that they come out +// correctly. We go the super-lazy way of not making it understand +// anything about the true order (which introduces some extra latency, +// though); we know that for N B-frames we need at most (N-1) frames +// in the reorder buffer, and can just sort on that. +// +// The class also deals with keeping a freelist as needed. +class FrameReorderer { +public: + FrameReorderer(unsigned queue_length, int width, int height); + + struct Frame { + int64_t pts, duration; + uint8_t *data; + ReceivedTimestamps received_ts; + + // Invert to get the smallest pts first. + bool operator< (const Frame &other) const { return pts > other.pts; } + }; + + // Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr. + // Does _not_ take ownership of data; a copy is taken if needed. + // The returned pointer is valid until the next call to reorder_frame, or destruction. + // As a special case, if queue_length == 0, will just return pts and data (no reordering needed). + Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts); + + // The same as reorder_frame, but without inserting anything. Used to empty the queue. + Frame get_first_frame(); + + bool empty() const { return frames.empty(); } + +private: + unsigned queue_length; + int width, height; + + std::priority_queue frames; + std::stack freelist; // Includes the last value returned from reorder_frame. + + // Owns all the pointers. Normally, freelist and frames could do this themselves, + // except priority_queue doesn't work well with movable-only types. + std::vector> owner; +}; + + +class QuickSyncEncoderImpl { +public: + QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator); + ~QuickSyncEncoderImpl(); + void add_audio(int64_t pts, std::vector audio); + bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); + RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector &input_frames); + void shutdown(); + void release_gl_resources(); + void set_stream_mux(Mux *mux) + { + stream_mux = mux; + } + + // So we never get negative dts. + int64_t global_delay() const { + return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS); + } + +private: + struct storage_task { + unsigned long long display_order; + int frame_type; + std::vector audio; + int64_t pts, dts, duration; + ReceivedTimestamps received_ts; + }; + struct PendingFrame { + RefCountedGLsync fence; + std::vector input_frames; + int64_t pts, duration; + }; + + void open_output_file(const std::string &filename); + void encode_thread_func(); + void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts); + void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data); + void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, + int frame_type, int64_t pts, int64_t dts, int64_t duration); + void storage_task_thread(); + void storage_task_enqueue(storage_task task); + void save_codeddata(storage_task task); + int render_packedsequence(); + int render_packedpicture(); + void render_packedslice(); + int render_sequence(); + int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num); + void sps_rbsp(bitstream *bs); + void pps_rbsp(bitstream *bs); + int build_packed_pic_buffer(unsigned char **header_buffer); + int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type); + void slice_header(bitstream *bs); + int build_packed_seq_buffer(unsigned char **header_buffer); + int build_packed_slice_buffer(unsigned char **header_buffer); + int init_va(const std::string &va_display); + int deinit_va(); + void enable_zerocopy_if_possible(); + VADisplay va_open_display(const std::string &va_display); + void va_close_display(VADisplay va_dpy); + int setup_encode(); + void release_encode(); + void update_ReferenceFrames(int frame_type); + int update_RefPicList(int frame_type); + + bool is_shutdown = false; + bool has_released_gl_resources = false; + bool use_zerocopy; + int drm_fd = -1; + + std::thread encode_thread, storage_thread; + + std::mutex storage_task_queue_mutex; + std::condition_variable storage_task_queue_changed; + int srcsurface_status[SURFACE_NUM]; // protected by storage_task_queue_mutex + std::queue storage_task_queue; // protected by storage_task_queue_mutex + bool storage_thread_should_quit = false; // protected by storage_task_queue_mutex + + std::mutex frame_queue_mutex; + std::condition_variable frame_queue_nonempty; + bool encode_thread_should_quit = false; // under frame_queue_mutex + + int current_storage_frame; + + std::map pending_video_frames; // under frame_queue_mutex + movit::ResourcePool *resource_pool; + QSurface *surface; + + std::unique_ptr file_audio_encoder; + + std::unique_ptr reorderer; + X264Encoder *x264_encoder; // nullptr if not using x264. + + Mux* stream_mux = nullptr; // To HTTP. + std::unique_ptr file_mux; // To local disk. + + Display *x11_display = nullptr; + + // Encoder parameters + VADisplay va_dpy; + VAProfile h264_profile = (VAProfile)~0; + VAConfigAttrib config_attrib[VAConfigAttribTypeMax]; + int config_attrib_num = 0, enc_packed_header_idx; + + struct GLSurface { + VASurfaceID src_surface, ref_surface; + VABufferID coded_buf; + + VAImage surface_image; + GLuint y_tex, cbcr_tex; + + // Only if use_zerocopy == true. + EGLImage y_egl_image, cbcr_egl_image; + + // Only if use_zerocopy == false. + GLuint pbo; + uint8_t *y_ptr, *cbcr_ptr; + size_t y_offset, cbcr_offset; + }; + GLSurface gl_surfaces[SURFACE_NUM]; + + VAConfigID config_id; + VAContextID context_id; + VAEncSequenceParameterBufferH264 seq_param; + VAEncPictureParameterBufferH264 pic_param; + VAEncSliceParameterBufferH264 slice_param; + VAPictureH264 CurrentCurrPic; + VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2]; + + // Static quality settings. + static constexpr unsigned int frame_bitrate = 15000000 / 60; // Doesn't really matter; only initial_qp does. + static constexpr unsigned int num_ref_frames = 2; + static constexpr int initial_qp = 15; + static constexpr int minimal_qp = 0; + static constexpr int intra_period = 30; + static constexpr int intra_idr_period = MAX_FPS; // About a second; more at lower frame rates. Not ideal. + + // Quality settings that are meant to be static, but might be overridden + // by the profile. + int constraint_set_flag = 0; + int h264_packedheader = 0; /* support pack header? */ + int h264_maxref = (1<<16|1); + int h264_entropy_mode = 1; /* cabac */ + int ip_period = 3; + + int rc_mode = -1; + unsigned int current_frame_num = 0; + unsigned int numShortTerm = 0; + + int frame_width; + int frame_height; + int frame_width_mbaligned; + int frame_height_mbaligned; + + DiskSpaceEstimator *disk_space_estimator; +}; + +#endif // !defined(_QUICKSYNC_ENCODER_IMPL_H)