]> git.sesse.net Git - nageru/blob - nageru/pbo_frame_allocator.cpp
Various fixes for non-VA-API MJPEG encoding.
[nageru] / nageru / pbo_frame_allocator.cpp
1 #include "pbo_frame_allocator.h"
2
3 #include <bmusb/bmusb.h>
4 #include <movit/util.h>
5 #include <stdbool.h>
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <cstddef>
9
10 #include "flags.h"
11 #include "mjpeg_encoder.h"
12 #include "v210_converter.h"
13 #include "va_display_with_cleanup.h"
14
15 using namespace std;
16
17 namespace {
18
19 void set_clamp_to_edge()
20 {
21         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
22         check_error();
23         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
24         check_error();
25         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
26         check_error();
27 }
28
29 }  // namespace
30
31 PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, unsigned card_index, MJPEGEncoder *mjpeg_encoder, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits)
32         : card_index(card_index), mjpeg_encoder(mjpeg_encoder), pixel_format(pixel_format), buffer(buffer)
33 {
34         userdata.reset(new Userdata[num_queued_frames]);
35         for (size_t i = 0; i < num_queued_frames; ++i) {
36                 init_frame(i, frame_size, width, height, permissions, map_bits);
37         }
38         glBindBuffer(buffer, 0);
39         check_error();
40         glBindTexture(GL_TEXTURE_2D, 0);
41         check_error();
42 }
43
44 void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint width, GLuint height, GLenum permissions, GLenum map_bits)
45 {
46         GLuint pbo;
47         glGenBuffers(1, &pbo);
48         check_error();
49         glBindBuffer(buffer, pbo);
50         check_error();
51         glBufferStorage(buffer, frame_size, nullptr, permissions | GL_MAP_PERSISTENT_BIT);
52         check_error();
53
54         Frame frame;
55         frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT);
56         frame.data2 = frame.data + frame_size / 2;
57         check_error();
58         frame.size = frame_size;
59         Userdata *ud = &userdata[frame_idx];
60         frame.userdata = ud;
61         ud->pbo = pbo;
62         ud->pixel_format = pixel_format;
63         ud->data_copy_malloc = new uint8_t[frame_size];
64         frame.owner = this;
65
66         // For 8-bit non-planar Y'CbCr, we ask the driver to split Y' and Cb/Cr
67         // into separate textures. For 10-bit, the input format (v210)
68         // is complicated enough that we need to interpolate up to 4:4:4,
69         // which we do in a compute shader ourselves. For BGRA, the data
70         // is already 4:4:4:4.
71         frame.interleaved = (pixel_format == bmusb::PixelFormat_8BitYCbCr);
72
73         // Create textures. We don't allocate any data for the second field at this point
74         // (just create the texture state with the samplers), since our default assumed
75         // resolution is progressive.
76         switch (pixel_format) {
77         case bmusb::PixelFormat_8BitYCbCr:
78                 glGenTextures(2, ud->tex_y);
79                 check_error();
80                 glGenTextures(2, ud->tex_cbcr);
81                 check_error();
82                 break;
83         case bmusb::PixelFormat_10BitYCbCr:
84                 glGenTextures(2, ud->tex_v210);
85                 check_error();
86                 glGenTextures(2, ud->tex_444);
87                 check_error();
88                 break;
89         case bmusb::PixelFormat_8BitBGRA:
90                 glGenTextures(2, ud->tex_rgba);
91                 check_error();
92                 break;
93         case bmusb::PixelFormat_8BitYCbCrPlanar:
94                 glGenTextures(2, ud->tex_y);
95                 check_error();
96                 glGenTextures(2, ud->tex_cb);
97                 check_error();
98                 glGenTextures(2, ud->tex_cr);
99                 check_error();
100                 break;
101         default:
102                 assert(false);
103         }
104
105         ud->last_width[0] = width;
106         ud->last_height[0] = height;
107         ud->last_cbcr_width[0] = width / 2;
108         ud->last_cbcr_height[0] = height;
109         ud->last_v210_width[0] = 0;
110
111         ud->last_width[1] = 0;
112         ud->last_height[1] = 0;
113         ud->last_cbcr_width[1] = 0;
114         ud->last_cbcr_height[1] = 0;
115         ud->last_v210_width[1] = 0;
116
117         ud->last_interlaced = false;
118         ud->last_has_signal = false;
119         ud->last_is_connected = false;
120         for (unsigned field = 0; field < 2; ++field) {
121                 switch (pixel_format) {
122                 case bmusb::PixelFormat_10BitYCbCr: {
123                         const size_t v210_width = v210Converter::get_minimum_v210_texture_width(width);
124
125                         // Seemingly we need to set the minification filter even though
126                         // shader image loads don't use them, or NVIDIA will just give us
127                         // zero back.
128                         glBindTexture(GL_TEXTURE_2D, ud->tex_v210[field]);
129                         check_error();
130                         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
131                         check_error();
132                         if (field == 0) {
133                                 ud->last_v210_width[0] = v210_width;
134                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, v210_width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
135                                 check_error();
136                         }
137
138                         glBindTexture(GL_TEXTURE_2D, ud->tex_444[field]);
139                         check_error();
140                         set_clamp_to_edge();
141                         if (field == 0) {
142                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
143                                 check_error();
144                         }
145                         break;
146                 }
147                 case bmusb::PixelFormat_8BitYCbCr:
148                         glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
149                         check_error();
150                         set_clamp_to_edge();
151                         if (field == 0) {
152                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
153                                 check_error();
154                         }
155
156                         glBindTexture(GL_TEXTURE_2D, ud->tex_cbcr[field]);
157                         check_error();
158                         set_clamp_to_edge();
159                         if (field == 0) {
160                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, width / 2, height, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr);
161                                 check_error();
162                         }
163                         break;
164                 case bmusb::PixelFormat_8BitBGRA:
165                         glBindTexture(GL_TEXTURE_2D, ud->tex_rgba[field]);
166                         check_error();
167                         set_clamp_to_edge();
168                         if (field == 0) {
169                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
170                                 check_error();
171                         }
172                         break;
173                 case bmusb::PixelFormat_8BitYCbCrPlanar:
174                         glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
175                         check_error();
176                         set_clamp_to_edge();
177                         if (field == 0) {
178                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
179                                 check_error();
180                         }
181
182                         glBindTexture(GL_TEXTURE_2D, ud->tex_cb[field]);
183                         check_error();
184                         set_clamp_to_edge();
185                         if (field == 0) {
186                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
187                                 check_error();
188                         }
189
190                         glBindTexture(GL_TEXTURE_2D, ud->tex_cr[field]);
191                         check_error();
192                         set_clamp_to_edge();
193                         if (field == 0) {
194                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
195                                 check_error();
196                         }
197                         break;
198                 default:
199                         assert(false);
200                 }
201         }
202
203         freelist.push(frame);
204 }
205
206 PBOFrameAllocator::~PBOFrameAllocator()
207 {
208         while (!freelist.empty()) {
209                 Frame frame = freelist.front();
210                 freelist.pop();
211                 destroy_frame(&frame);
212         }
213 }
214
215 void PBOFrameAllocator::destroy_frame(Frame *frame)
216 {
217         Userdata *ud = (Userdata *)frame->userdata;
218         delete[] ud->data_copy_malloc;
219
220         GLuint pbo = ud->pbo;
221         glBindBuffer(buffer, pbo);
222         check_error();
223         glUnmapBuffer(buffer);
224         check_error();
225         glBindBuffer(buffer, 0);
226         check_error();
227         glDeleteBuffers(1, &pbo);
228         check_error();
229         switch (pixel_format) {
230         case bmusb::PixelFormat_10BitYCbCr:
231                 glDeleteTextures(2, ud->tex_v210);
232                 check_error();
233                 glDeleteTextures(2, ud->tex_444);
234                 check_error();
235                 break;
236         case bmusb::PixelFormat_8BitYCbCr:
237                 glDeleteTextures(2, ud->tex_y);
238                 check_error();
239                 glDeleteTextures(2, ud->tex_cbcr);
240                 check_error();
241                 break;
242         case bmusb::PixelFormat_8BitBGRA:
243                 glDeleteTextures(2, ud->tex_rgba);
244                 check_error();
245                 break;
246         case bmusb::PixelFormat_8BitYCbCrPlanar:
247                 glDeleteTextures(2, ud->tex_y);
248                 check_error();
249                 glDeleteTextures(2, ud->tex_cb);
250                 check_error();
251                 glDeleteTextures(2, ud->tex_cr);
252                 check_error();
253                 break;
254         default:
255                 assert(false);
256         }
257 }
258 //static int sumsum = 0;
259
260 bmusb::FrameAllocator::Frame PBOFrameAllocator::alloc_frame()
261 {
262         Frame vf;
263
264         lock_guard<mutex> lock(freelist_mutex);  // Meh.
265         if (freelist.empty()) {
266                 printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
267         } else {
268                 //fprintf(stderr, "freelist has %d allocated\n", ++sumsum);
269                 vf = freelist.front();
270                 freelist.pop();  // Meh.
271         }
272         vf.len = 0;
273         vf.overflow = 0;
274
275         if (mjpeg_encoder != nullptr &&
276             mjpeg_encoder->get_mjpeg_stream_for_card(card_index) != -1 &&
277             vf.userdata != nullptr) {
278                 Userdata *ud = (Userdata *)vf.userdata;
279                 vf.data_copy = ud->data_copy_malloc;
280                 ud->data_copy_current_src = Userdata::FROM_MALLOC;
281         } else {
282                 vf.data_copy = nullptr;
283         }
284
285         return vf;
286 }
287
288 bmusb::FrameAllocator::Frame PBOFrameAllocator::create_frame(size_t width, size_t height, size_t stride)
289 {
290         Frame vf;
291
292         {
293                 lock_guard<mutex> lock(freelist_mutex);
294                 if (freelist.empty()) {
295                         printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
296                         vf.len = 0;
297                         vf.overflow = 0;
298                         return vf;
299                 } else {
300                         vf = freelist.front();
301                         freelist.pop();
302                 }
303         }
304         vf.len = 0;
305         vf.overflow = 0;
306
307         Userdata *userdata = (Userdata *)vf.userdata;
308
309         if (mjpeg_encoder != nullptr &&
310             mjpeg_encoder->get_mjpeg_stream_for_card(card_index) != -1) {
311                 if (mjpeg_encoder->using_vaapi()) {
312                         VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
313                         MJPEGEncoder::VAResources resources = mjpeg_encoder->get_va_resources(width, height);
314                         MJPEGEncoder::ReleaseVAResources release(mjpeg_encoder, resources);
315
316                         if (resources.image.pitches[0] == stride) {
317                                 userdata->va_resources = move(resources);
318                                 userdata->va_resources_release = move(release);
319
320                                 VAStatus va_status = vaMapBuffer(va_dpy, resources.image.buf, (void **)&vf.data_copy);
321                                 CHECK_VASTATUS(va_status, "vaMapBuffer");
322                                 vf.data_copy += resources.image.offsets[0];
323                                 userdata->data_copy_current_src = Userdata::FROM_VA_API;
324                         } else {
325                                 printf("WARNING: Could not copy directly into VA-API MJPEG buffer for %zu x %zu, since producer and consumer disagreed on stride (%zu != %d).\n", width, height, stride, resources.image.pitches[0]);
326                                 vf.data_copy = userdata->data_copy_malloc;
327                                 userdata->data_copy_current_src = Userdata::FROM_MALLOC;
328                         }
329                 } else {
330                         vf.data_copy = userdata->data_copy_malloc;
331                         userdata->data_copy_current_src = Userdata::FROM_MALLOC;
332                 }
333         } else {
334                 vf.data_copy = nullptr;
335         }
336
337         return vf;
338 }
339
340 void PBOFrameAllocator::release_frame(Frame frame)
341 {
342         if (frame.overflow > 0) {
343                 printf("%d bytes overflow after last (PBO) frame\n", int(frame.overflow));
344         }
345
346 #if 0
347         // Poison the page. (Note that this might be bogus if you don't have an OpenGL context.)
348         memset(frame.data, 0, frame.size);
349         Userdata *userdata = (Userdata *)frame.userdata;
350         for (unsigned field = 0; field < 2; ++field) {
351                 glBindTexture(GL_TEXTURE_2D, userdata->tex_y[field]);
352                 check_error();
353                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
354                 check_error();
355                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
356                 check_error();
357                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
358                 check_error();
359                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, userdata->last_width[field], userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
360                 check_error();
361
362                 glBindTexture(GL_TEXTURE_2D, userdata->tex_cbcr[field]);
363                 check_error();
364                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
365                 check_error();
366                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
367                 check_error();
368                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
369                 check_error();
370                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, userdata->last_width[field] / 2, userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
371                 check_error();
372         }
373 #endif
374
375         {
376                 // In case we never got to upload the frame to MJPEGEncoder.
377                 Userdata *userdata = (Userdata *)frame.userdata;
378                 MJPEGEncoder::VAResources resources __attribute__((unused)) = move(userdata->va_resources);
379                 MJPEGEncoder::ReleaseVAResources release = move(userdata->va_resources_release);
380
381                 if (frame.data_copy != nullptr && userdata->data_copy_current_src == Userdata::FROM_VA_API) {
382                         VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
383                         VAStatus va_status = vaUnmapBuffer(va_dpy, resources.image.buf);
384                         CHECK_VASTATUS(va_status, "vaUnmapBuffer");
385
386                         frame.data_copy = nullptr;
387                 }
388         }
389
390         lock_guard<mutex> lock(freelist_mutex);
391         freelist.push(frame);
392         //--sumsum;
393 }