]> git.sesse.net Git - nageru/blob - quicksync_encoder_impl.h
Move QuickSyncEncoderImpl's definition into its own header.
[nageru] / quicksync_encoder_impl.h
1 #ifndef _QUICKSYNC_ENCODER_IMPL_H
2 #define _QUICKSYNC_ENCODER_IMPL_H 1
3
4 #include <epoxy/egl.h>
5 #include <va/va.h>
6
7 #include <condition_variable>
8 #include <map>
9 #include <memory>
10 #include <mutex>
11 #include <queue>
12 #include <string>
13 #include <stack>
14 #include <thread>
15
16 #include "audio_encoder.h"
17 #include "defs.h"
18 #include "timebase.h"
19 #include "print_latency.h"
20
21 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
22 #define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
23 #define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
24
25 struct __bitstream {
26     unsigned int *buffer;
27     int bit_offset;
28     int max_size_in_dword;
29 };
30 typedef struct __bitstream bitstream;
31
32 // H.264 video comes out in encoding order (e.g. with two B-frames:
33 // 0, 3, 1, 2, 6, 4, 5, etc.), but uncompressed video needs to
34 // come in the right order. Since we do everything, including waiting
35 // for the frames to come out of OpenGL, in encoding order, we need
36 // a reordering buffer for uncompressed frames so that they come out
37 // correctly. We go the super-lazy way of not making it understand
38 // anything about the true order (which introduces some extra latency,
39 // though); we know that for N B-frames we need at most (N-1) frames
40 // in the reorder buffer, and can just sort on that.
41 //
42 // The class also deals with keeping a freelist as needed.
43 class FrameReorderer {
44 public:
45         FrameReorderer(unsigned queue_length, int width, int height);
46
47         struct Frame {
48                 int64_t pts, duration;
49                 uint8_t *data;
50                 ReceivedTimestamps received_ts;
51
52                 // Invert to get the smallest pts first.
53                 bool operator< (const Frame &other) const { return pts > other.pts; }
54         };
55
56         // Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr.
57         // Does _not_ take ownership of data; a copy is taken if needed.
58         // The returned pointer is valid until the next call to reorder_frame, or destruction.
59         // As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
60         Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts);
61
62         // The same as reorder_frame, but without inserting anything. Used to empty the queue.
63         Frame get_first_frame();
64
65         bool empty() const { return frames.empty(); }
66
67 private:
68         unsigned queue_length;
69         int width, height;
70
71         std::priority_queue<Frame> frames;
72         std::stack<uint8_t *> freelist;  // Includes the last value returned from reorder_frame.
73
74         // Owns all the pointers. Normally, freelist and frames could do this themselves,
75         // except priority_queue doesn't work well with movable-only types.
76         std::vector<std::unique_ptr<uint8_t[]>> owner;
77 };
78
79
80 class QuickSyncEncoderImpl {
81 public:
82         QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator);
83         ~QuickSyncEncoderImpl();
84         void add_audio(int64_t pts, std::vector<float> audio);
85         bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
86         RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector<RefCountedFrame> &input_frames);
87         void shutdown();
88         void release_gl_resources();
89         void set_stream_mux(Mux *mux)
90         {
91                 stream_mux = mux;
92         }
93
94         // So we never get negative dts.
95         int64_t global_delay() const {
96                 return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS);
97         }
98
99 private:
100         struct storage_task {
101                 unsigned long long display_order;
102                 int frame_type;
103                 std::vector<float> audio;
104                 int64_t pts, dts, duration;
105                 ReceivedTimestamps received_ts;
106         };
107         struct PendingFrame {
108                 RefCountedGLsync fence;
109                 std::vector<RefCountedFrame> input_frames;
110                 int64_t pts, duration;
111         };
112
113         void open_output_file(const std::string &filename);
114         void encode_thread_func();
115         void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
116         void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data);
117         void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
118                           int frame_type, int64_t pts, int64_t dts, int64_t duration);
119         void storage_task_thread();
120         void storage_task_enqueue(storage_task task);
121         void save_codeddata(storage_task task);
122         int render_packedsequence();
123         int render_packedpicture();
124         void render_packedslice();
125         int render_sequence();
126         int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num);
127         void sps_rbsp(bitstream *bs);
128         void pps_rbsp(bitstream *bs);
129         int build_packed_pic_buffer(unsigned char **header_buffer);
130         int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type);
131         void slice_header(bitstream *bs);
132         int build_packed_seq_buffer(unsigned char **header_buffer);
133         int build_packed_slice_buffer(unsigned char **header_buffer);
134         int init_va(const std::string &va_display);
135         int deinit_va();
136         void enable_zerocopy_if_possible();
137         VADisplay va_open_display(const std::string &va_display);
138         void va_close_display(VADisplay va_dpy);
139         int setup_encode();
140         void release_encode();
141         void update_ReferenceFrames(int frame_type);
142         int update_RefPicList(int frame_type);
143
144         bool is_shutdown = false;
145         bool has_released_gl_resources = false;
146         bool use_zerocopy;
147         int drm_fd = -1;
148
149         std::thread encode_thread, storage_thread;
150
151         std::mutex storage_task_queue_mutex;
152         std::condition_variable storage_task_queue_changed;
153         int srcsurface_status[SURFACE_NUM];  // protected by storage_task_queue_mutex
154         std::queue<storage_task> storage_task_queue;  // protected by storage_task_queue_mutex
155         bool storage_thread_should_quit = false;  // protected by storage_task_queue_mutex
156
157         std::mutex frame_queue_mutex;
158         std::condition_variable frame_queue_nonempty;
159         bool encode_thread_should_quit = false;  // under frame_queue_mutex
160
161         int current_storage_frame;
162
163         std::map<int, PendingFrame> pending_video_frames;  // under frame_queue_mutex
164         movit::ResourcePool *resource_pool;
165         QSurface *surface;
166
167         std::unique_ptr<AudioEncoder> file_audio_encoder;
168
169         std::unique_ptr<FrameReorderer> reorderer;
170         X264Encoder *x264_encoder;  // nullptr if not using x264.
171
172         Mux* stream_mux = nullptr;  // To HTTP.
173         std::unique_ptr<Mux> file_mux;  // To local disk.
174
175         Display *x11_display = nullptr;
176
177         // Encoder parameters
178         VADisplay va_dpy;
179         VAProfile h264_profile = (VAProfile)~0;
180         VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
181         int config_attrib_num = 0, enc_packed_header_idx;
182
183         struct GLSurface {
184                 VASurfaceID src_surface, ref_surface;
185                 VABufferID coded_buf;
186
187                 VAImage surface_image;
188                 GLuint y_tex, cbcr_tex;
189
190                 // Only if use_zerocopy == true.
191                 EGLImage y_egl_image, cbcr_egl_image;
192
193                 // Only if use_zerocopy == false.
194                 GLuint pbo;
195                 uint8_t *y_ptr, *cbcr_ptr;
196                 size_t y_offset, cbcr_offset;
197         };
198         GLSurface gl_surfaces[SURFACE_NUM];
199
200         VAConfigID config_id;
201         VAContextID context_id;
202         VAEncSequenceParameterBufferH264 seq_param;
203         VAEncPictureParameterBufferH264 pic_param;
204         VAEncSliceParameterBufferH264 slice_param;
205         VAPictureH264 CurrentCurrPic;
206         VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2];
207
208         // Static quality settings.
209         static constexpr unsigned int frame_bitrate = 15000000 / 60;  // Doesn't really matter; only initial_qp does.
210         static constexpr unsigned int num_ref_frames = 2;
211         static constexpr int initial_qp = 15;
212         static constexpr int minimal_qp = 0;
213         static constexpr int intra_period = 30;
214         static constexpr int intra_idr_period = MAX_FPS;  // About a second; more at lower frame rates. Not ideal.
215
216         // Quality settings that are meant to be static, but might be overridden
217         // by the profile.
218         int constraint_set_flag = 0;
219         int h264_packedheader = 0; /* support pack header? */
220         int h264_maxref = (1<<16|1);
221         int h264_entropy_mode = 1; /* cabac */
222         int ip_period = 3;
223
224         int rc_mode = -1;
225         unsigned int current_frame_num = 0;
226         unsigned int numShortTerm = 0;
227
228         int frame_width;
229         int frame_height;
230         int frame_width_mbaligned;
231         int frame_height_mbaligned;
232
233         DiskSpaceEstimator *disk_space_estimator;
234 };
235
236 #endif  // !defined(_QUICKSYNC_ENCODER_IMPL_H)