]> git.sesse.net Git - nageru/blob - nageru/quicksync_encoder_impl.h
IWYU-fix nageru/*.h.
[nageru] / nageru / quicksync_encoder_impl.h
1 #ifndef _QUICKSYNC_ENCODER_IMPL_H
2 #define _QUICKSYNC_ENCODER_IMPL_H 1
3
4 #include <stddef.h>
5 #include <stdint.h>
6 #include <epoxy/egl.h>
7 #include <epoxy/gl.h>
8 #include <movit/image_format.h>
9 #include <va/va.h>
10 #include <va/va_enc_h264.h>
11
12 #include <atomic>
13 #include <condition_variable>
14 #include <deque>
15 #include <map>
16 #include <memory>
17 #include <mutex>
18 #include <queue>
19 #include <string>
20 #include <thread>
21 #include <unordered_map>
22 #include <vector>
23
24 #include "audio_encoder.h"
25 #include "defs.h"
26 #include "shared/timebase.h"
27 #include "print_latency.h"
28 #include "ref_counted_frame.h"
29 #include "shared/ref_counted_gl_sync.h"
30 #include "shared/va_display.h"
31 #include "v4l_output.h"
32
33 extern "C" {
34 #include <libavformat/avformat.h>
35 }
36
37 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
38 #define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
39 #define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
40
41 struct __bitstream {
42     unsigned int *buffer;
43     int bit_offset;
44     int max_size_in_dword;
45 };
46 typedef struct __bitstream bitstream;
47
48 namespace movit {
49 class ResourcePool;
50 }
51 class DiskSpaceEstimator;
52 class QSurface;
53 class VideoCodecInterface;
54
55 class QuickSyncEncoderImpl {
56 public:
57         QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, const AVOutputFormat *oformat, VideoCodecInterface *http_encoder, VideoCodecInterface *disk_encoder, DiskSpaceEstimator *disk_space_estimator);
58         ~QuickSyncEncoderImpl();
59         void add_audio(int64_t pts, std::vector<float> audio);
60         bool is_zerocopy() const;
61         bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex);
62         RefCountedGLsync end_frame();
63         void shutdown();
64         void close_file();
65         void release_gl_resources();
66         void set_http_mux(Mux *mux)
67         {
68                 http_mux = mux;
69         }
70         void set_srt_mux(Mux *mux)
71         {
72                 srt_mux = mux;
73         }
74
75         // So we never get negative dts.
76         int64_t global_delay() const {
77                 return int64_t(ip_period - 1) * (TIMEBASE / TYPICAL_FPS);
78         }
79
80 private:
81         struct storage_task {
82                 unsigned long long display_order;
83                 int frame_type;
84                 std::vector<float> audio;
85                 int64_t pts, dts, duration;
86                 movit::YCbCrLumaCoefficients ycbcr_coefficients;
87                 ReceivedTimestamps received_ts;
88                 std::vector<size_t> ref_display_frame_numbers;
89         };
90         struct PendingFrame {
91                 RefCountedGLsync fence;
92                 std::vector<RefCountedFrame> input_frames;
93                 int64_t pts, duration;
94                 movit::YCbCrLumaCoefficients ycbcr_coefficients;
95         };
96         struct GLSurface {
97                 // Only if x264_video_to_disk == false.
98                 VASurfaceID src_surface, ref_surface;
99                 VABufferID coded_buf;
100                 VAImage surface_image;
101
102                 // Only if use_zerocopy == true (which implies x264_video_to_disk == false).
103                 GLuint y_tex, cbcr_tex;
104                 EGLImage y_egl_image, cbcr_egl_image;
105
106                 // Only if use_zerocopy == false.
107                 GLuint pbo;
108                 uint8_t *y_ptr, *cbcr_ptr;
109                 size_t y_offset, cbcr_offset;
110
111                 // Surfaces can be busy (have refcount > 0) for a variety of
112                 // reasons: First of all because they belong to a frame that's
113                 // under encoding. But also reference frames take refcounts;
114                 // while a frame is being encoded, all its reference frames
115                 // also have increased refcounts so that they are not dropped.
116                 // Similarly, just being in <reference_frames> increases the
117                 // refcount. Until it is back to zero, the surface cannot be given
118                 // out for encoding another frame. Use release_gl_surface()
119                 // to reduce the refcount, which will free the surface if
120                 // the refcount reaches zero.
121                 //
122                 // Protected by storage_task_queue_mutex.
123                 int refcount = 0;
124         };
125
126         void open_output_file(const std::string &filename);
127         void encode_thread_func();
128         void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
129         void pass_frame(PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration);
130         void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
131                           int frame_type, int64_t pts, int64_t dts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients);
132         void storage_task_thread();
133         void storage_task_enqueue(storage_task task);
134         void save_codeddata(GLSurface *surf, storage_task task);
135         int render_packedsequence(movit::YCbCrLumaCoefficients ycbcr_coefficients);
136         int render_packedpicture();
137         void render_packedslice();
138         int render_sequence();
139         int render_picture(GLSurface *surf, int frame_type, int display_frame_num, int gop_start_display_frame_num);
140         void sps_rbsp(movit::YCbCrLumaCoefficients ycbcr_coefficients, bitstream *bs);
141         void pps_rbsp(bitstream *bs);
142         int build_packed_pic_buffer(unsigned char **header_buffer);
143         int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type);
144         void slice_header(bitstream *bs);
145         int build_packed_seq_buffer(movit::YCbCrLumaCoefficients ycbcr_coefficients, unsigned char **header_buffer);
146         int build_packed_slice_buffer(unsigned char **header_buffer);
147         int init_va(const std::string &va_display);
148         void enable_zerocopy_if_possible();
149         int setup_encode();
150         void release_encode();
151         void update_ReferenceFrames(int current_display_frame, int frame_type);
152         void update_RefPicList_P(VAPictureH264 RefPicList0_P[MAX_NUM_REF2]);
153         void update_RefPicList_B(VAPictureH264 RefPicList0_B[MAX_NUM_REF2], VAPictureH264 RefPicList1_B[MAX_NUM_REF2]);
154         GLSurface *allocate_gl_surface();
155         void release_gl_surface(size_t display_frame_num);
156
157         bool is_shutdown = false;
158         bool has_released_gl_resources = false;
159         std::atomic<bool> use_zerocopy{false};
160
161         std::thread encode_thread, storage_thread;
162
163         std::mutex storage_task_queue_mutex;
164         std::condition_variable storage_task_queue_changed;
165         std::queue<storage_task> storage_task_queue;  // protected by storage_task_queue_mutex
166         bool storage_thread_should_quit = false;  // protected by storage_task_queue_mutex
167
168         std::mutex frame_queue_mutex;
169         std::condition_variable frame_queue_nonempty;
170         bool encode_thread_should_quit = false;  // under frame_queue_mutex
171
172         int current_storage_frame;
173
174         PendingFrame current_video_frame;  // Used only between begin_frame() and end_frame().
175         std::queue<PendingFrame> pending_video_frames;  // under frame_queue_mutex
176         movit::ResourcePool *resource_pool;
177         QSurface *surface;
178
179         // Frames that are done rendering and passed on to x264 (if enabled),
180         // but have not been encoded by Quick Sync yet, and thus also not freed.
181         // The key is the display frame number.
182         std::map<int, PendingFrame> reorder_buffer;
183         int quicksync_encoding_frame_num = 0;
184
185         std::mutex file_audio_encoder_mutex;
186         std::unique_ptr<AudioEncoder> file_audio_encoder;
187
188         VideoCodecInterface *http_encoder;  // nullptr if not using x264/SVT-AV1.
189         VideoCodecInterface *disk_encoder;
190         std::unique_ptr<V4LOutput> v4l_output;  // nullptr if not using V4L2 output.
191
192         Mux* http_mux = nullptr;  // To the HTTP server.
193         Mux* srt_mux = nullptr;  // To the remote SRT endpoint, if any.
194         std::unique_ptr<Mux> file_mux;  // To local disk.
195
196         // Encoder parameters
197         std::unique_ptr<VADisplayWithCleanup> va_dpy;
198         VAProfile h264_profile = (VAProfile)~0;
199         VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
200         int config_attrib_num = 0, enc_packed_header_idx;
201
202         GLSurface gl_surfaces[SURFACE_NUM];
203
204         // For all frames in encoding (refcount > 0), a pointer into gl_surfaces
205         // for the surface used for that frame. Protected by storage_task_queue_mutex.
206         // The key is display frame number.
207         std::unordered_map<size_t, GLSurface *> surface_for_frame;
208
209         VAConfigID config_id;
210         VAContextID context_id;
211         VAEncSequenceParameterBufferH264 seq_param;
212         VAEncPictureParameterBufferH264 pic_param;
213         VAEncSliceParameterBufferH264 slice_param;
214         VAPictureH264 CurrentCurrPic;
215
216         struct ReferenceFrame {
217                 VAPictureH264 pic;
218                 int display_number;  // To track reference counts.
219         };
220         std::deque<ReferenceFrame> reference_frames;
221
222         // Static quality settings.
223         static constexpr unsigned int frame_bitrate = 15000000 / 60;  // Doesn't really matter; only initial_qp does.
224         static constexpr unsigned int num_ref_frames = 2;
225         static constexpr int initial_qp = 15;
226         static constexpr int minimal_qp = 0;
227         static constexpr int intra_period = 30;
228         static constexpr int intra_idr_period = TYPICAL_FPS;  // About a second; more at lower frame rates. Not ideal.
229
230         // Quality settings that are meant to be static, but might be overridden
231         // by the profile.
232         int constraint_set_flag = 0;
233         int h264_packedheader = 0; /* support pack header? */
234         int h264_maxref = (1<<16|1);
235         int h264_entropy_mode = 1; /* cabac */
236         int ip_period = 3;
237
238         unsigned int current_ref_frame_num = 0;  // Encoding frame order within this GOP, sans B-frames.
239
240         int frame_width;
241         int frame_height;
242         int frame_width_mbaligned;
243         int frame_height_mbaligned;
244
245         DiskSpaceEstimator *disk_space_estimator;
246 };
247
248 #endif  // !defined(_QUICKSYNC_ENCODER_IMPL_H)