]> git.sesse.net Git - nageru/blob - quicksync_encoder_impl.h
Make QuickSync surfaces be allocated dynamically.
[nageru] / quicksync_encoder_impl.h
1 #ifndef _QUICKSYNC_ENCODER_IMPL_H
2 #define _QUICKSYNC_ENCODER_IMPL_H 1
3
4 #include <epoxy/egl.h>
5 #include <va/va.h>
6
7 #include <condition_variable>
8 #include <map>
9 #include <memory>
10 #include <mutex>
11 #include <queue>
12 #include <string>
13 #include <stack>
14 #include <thread>
15 #include <unordered_map>
16
17 #include "audio_encoder.h"
18 #include "defs.h"
19 #include "timebase.h"
20 #include "print_latency.h"
21
22 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
23 #define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
24 #define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
25
26 struct __bitstream {
27     unsigned int *buffer;
28     int bit_offset;
29     int max_size_in_dword;
30 };
31 typedef struct __bitstream bitstream;
32
33 class QuickSyncEncoderImpl {
34 public:
35         QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator);
36         ~QuickSyncEncoderImpl();
37         void add_audio(int64_t pts, std::vector<float> audio);
38         bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
39         RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector<RefCountedFrame> &input_frames);
40         void shutdown();
41         void release_gl_resources();
42         void set_stream_mux(Mux *mux)
43         {
44                 stream_mux = mux;
45         }
46
47         // So we never get negative dts.
48         int64_t global_delay() const {
49                 return int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS);
50         }
51
52 private:
53         struct storage_task {
54                 unsigned long long display_order;
55                 int frame_type;
56                 std::vector<float> audio;
57                 int64_t pts, dts, duration;
58                 ReceivedTimestamps received_ts;
59                 std::vector<size_t> ref_display_frame_numbers;
60         };
61         struct PendingFrame {
62                 RefCountedGLsync fence;
63                 std::vector<RefCountedFrame> input_frames;
64                 int64_t pts, duration;
65         };
66         struct GLSurface {
67                 VASurfaceID src_surface, ref_surface;
68                 VABufferID coded_buf;
69
70                 VAImage surface_image;
71                 GLuint y_tex, cbcr_tex;
72
73                 // Only if use_zerocopy == true.
74                 EGLImage y_egl_image, cbcr_egl_image;
75
76                 // Only if use_zerocopy == false.
77                 GLuint pbo;
78                 uint8_t *y_ptr, *cbcr_ptr;
79                 size_t y_offset, cbcr_offset;
80
81                 // Surfaces can be busy (have refcount > 0) for a variety of
82                 // reasons: First of all because they belong to a frame that's
83                 // under encoding. But also reference frames take refcounts;
84                 // while a frame is being encoded, all its reference frames
85                 // also have increased refcounts so that they are not dropped.
86                 // Similarly, just being in <reference_frames> increases the
87                 // refcount. Until it is back to zero, the surface cannot be given
88                 // out for encoding another frame. Use release_gl_surface()
89                 // to reduce the refcount, which will free the surface if
90                 // the refcount reaches zero.
91                 //
92                 // Protected by storage_task_queue_mutex.
93                 int refcount = 0;
94         };
95
96         void open_output_file(const std::string &filename);
97         void encode_thread_func();
98         void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
99         void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data);
100         void pass_frame(PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration);
101         void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
102                           int frame_type, int64_t pts, int64_t dts, int64_t duration);
103         void storage_task_thread();
104         void storage_task_enqueue(storage_task task);
105         void save_codeddata(GLSurface *surf, storage_task task);
106         int render_packedsequence();
107         int render_packedpicture();
108         void render_packedslice();
109         int render_sequence();
110         int render_picture(GLSurface *surf, int frame_type, int display_frame_num, int gop_start_display_frame_num);
111         void sps_rbsp(bitstream *bs);
112         void pps_rbsp(bitstream *bs);
113         int build_packed_pic_buffer(unsigned char **header_buffer);
114         int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type);
115         void slice_header(bitstream *bs);
116         int build_packed_seq_buffer(unsigned char **header_buffer);
117         int build_packed_slice_buffer(unsigned char **header_buffer);
118         int init_va(const std::string &va_display);
119         int deinit_va();
120         void enable_zerocopy_if_possible();
121         VADisplay va_open_display(const std::string &va_display);
122         void va_close_display(VADisplay va_dpy);
123         int setup_encode();
124         void release_encode();
125         void update_ReferenceFrames(int current_display_frame, int frame_type);
126         void update_RefPicList_P(VAPictureH264 RefPicList0_P[MAX_NUM_REF2]);
127         void update_RefPicList_B(VAPictureH264 RefPicList0_B[MAX_NUM_REF2], VAPictureH264 RefPicList1_B[MAX_NUM_REF2]);
128         GLSurface *allocate_gl_surface();
129         void release_gl_surface(size_t display_frame_num);
130
131         bool is_shutdown = false;
132         bool has_released_gl_resources = false;
133         bool use_zerocopy;
134         int drm_fd = -1;
135
136         std::thread encode_thread, storage_thread;
137
138         std::mutex storage_task_queue_mutex;
139         std::condition_variable storage_task_queue_changed;
140         std::queue<storage_task> storage_task_queue;  // protected by storage_task_queue_mutex
141         bool storage_thread_should_quit = false;  // protected by storage_task_queue_mutex
142
143         std::mutex frame_queue_mutex;
144         std::condition_variable frame_queue_nonempty;
145         bool encode_thread_should_quit = false;  // under frame_queue_mutex
146
147         int current_storage_frame;
148
149         std::queue<PendingFrame> pending_video_frames;  // under frame_queue_mutex
150         movit::ResourcePool *resource_pool;
151         QSurface *surface;
152
153         // Frames that are done rendering and passed on to x264 (if enabled),
154         // but have not been encoded by Quick Sync yet, and thus also not freed.
155         // The key is the display frame number.
156         std::map<int, PendingFrame> reorder_buffer;
157         int quicksync_encoding_frame_num = 0;
158
159         std::unique_ptr<AudioEncoder> file_audio_encoder;
160
161         X264Encoder *x264_encoder;  // nullptr if not using x264.
162
163         Mux* stream_mux = nullptr;  // To HTTP.
164         std::unique_ptr<Mux> file_mux;  // To local disk.
165
166         Display *x11_display = nullptr;
167
168         // Encoder parameters
169         VADisplay va_dpy;
170         VAProfile h264_profile = (VAProfile)~0;
171         VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
172         int config_attrib_num = 0, enc_packed_header_idx;
173
174         GLSurface gl_surfaces[SURFACE_NUM];
175
176         // For all frames in encoding (refcount > 0), a pointer into gl_surfaces
177         // for the surface used for that frame. Protected by storage_task_queue_mutex.
178         // The key is display frame number.
179         std::unordered_map<size_t, GLSurface *> surface_for_frame;
180
181         VAConfigID config_id;
182         VAContextID context_id;
183         VAEncSequenceParameterBufferH264 seq_param;
184         VAEncPictureParameterBufferH264 pic_param;
185         VAEncSliceParameterBufferH264 slice_param;
186         VAPictureH264 CurrentCurrPic;
187
188         struct ReferenceFrame {
189                 VAPictureH264 pic;
190                 int display_number;  // To track reference counts.
191         };
192         std::deque<ReferenceFrame> reference_frames;
193
194         // Static quality settings.
195         static constexpr unsigned int frame_bitrate = 15000000 / 60;  // Doesn't really matter; only initial_qp does.
196         static constexpr unsigned int num_ref_frames = 2;
197         static constexpr int initial_qp = 15;
198         static constexpr int minimal_qp = 0;
199         static constexpr int intra_period = 30;
200         static constexpr int intra_idr_period = MAX_FPS;  // About a second; more at lower frame rates. Not ideal.
201
202         // Quality settings that are meant to be static, but might be overridden
203         // by the profile.
204         int constraint_set_flag = 0;
205         int h264_packedheader = 0; /* support pack header? */
206         int h264_maxref = (1<<16|1);
207         int h264_entropy_mode = 1; /* cabac */
208         int ip_period = 3;
209
210         int rc_mode = -1;
211         unsigned int current_ref_frame_num = 0;  // Encoding frame order within this GOP, sans B-frames.
212
213         int frame_width;
214         int frame_height;
215         int frame_width_mbaligned;
216         int frame_height_mbaligned;
217
218         DiskSpaceEstimator *disk_space_estimator;
219 };
220
221 #endif  // !defined(_QUICKSYNC_ENCODER_IMPL_H)