]> git.sesse.net Git - nageru/blob - futatabi/vaapi_jpeg_decoder.cpp
Set CEF autoplay policy to be more lenient.
[nageru] / futatabi / vaapi_jpeg_decoder.cpp
1 #include "vaapi_jpeg_decoder.h"
2
3 #include "jpeg_destroyer.h"
4 #include "jpeg_frame.h"
5 #include "jpeglib_error_wrapper.h"
6 #include "pbo_pool.h"
7 #include "shared/memcpy_interleaved.h"
8 #include "shared/va_display.h"
9 #include "shared/va_resource_pool.h"
10
11 #include <X11/Xlib.h>
12 #include <assert.h>
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <glob.h>
16 #include <jpeglib.h>
17 #include <list>
18 #include <mutex>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <string>
23 #include <unistd.h>
24 #include <va/va.h>
25 #include <va/va_drm.h>
26 #include <va/va_x11.h>
27
28 #define BUFFER_OFFSET(i) ((char *)nullptr + (i))
29
30 using namespace std;
31
32 // TODO: Deduplicate between Nageru and this.
33 static void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height)
34 {
35         if (src_width == dst_pitch) {
36                 memcpy(dst, src, src_width * height);
37         } else {
38                 for (size_t y = 0; y < height; ++y) {
39                         const uint8_t *sptr = src + y * src_width;
40                         uint8_t *dptr = dst + y * dst_pitch;
41                         memcpy(dptr, sptr, src_width);
42                 }
43         }
44 }
45
46 static unique_ptr<VADisplayWithCleanup> va_dpy;
47 static unique_ptr<VAResourcePool> va_pool;
48
49 bool vaapi_jpeg_decoding_usable = false;
50
51 // From libjpeg (although it's of course identical between implementations).
52 static const int jpeg_natural_order[DCTSIZE2] = {
53          0,  1,  8, 16,  9,  2,  3, 10,
54         17, 24, 32, 25, 18, 11,  4,  5,
55         12, 19, 26, 33, 40, 48, 41, 34,
56         27, 20, 13,  6,  7, 14, 21, 28,
57         35, 42, 49, 56, 57, 50, 43, 36,
58         29, 22, 15, 23, 30, 37, 44, 51,
59         58, 59, 52, 45, 38, 31, 39, 46,
60         53, 60, 61, 54, 47, 55, 62, 63,
61 };
62
63 static unique_ptr<VADisplayWithCleanup> try_open_va_mjpeg(const string &va_display)
64 {
65         VAConfigID config_id_422, config_id_420;
66         VAImageFormat uyvy_format, nv12_format;
67
68         // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/
69         unique_ptr<VADisplayWithCleanup> va_dpy =
70                 try_open_va(va_display, { VAProfileJPEGBaseline }, VAEntrypointVLD,
71                         { { "4:2:2", VA_RT_FORMAT_YUV422, VA_FOURCC_UYVY, &config_id_422, &uyvy_format },
72                           { "4:2:0", VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, &config_id_420, &nv12_format } },
73                         /*chosen_profile=*/nullptr, /*error=*/nullptr);
74         if (va_dpy == nullptr) {
75                 return va_dpy;
76         }
77
78         va_pool.reset(new VAResourcePool(va_dpy->va_dpy, uyvy_format, nv12_format, config_id_422, config_id_420, /*with_data_buffer=*/false));
79
80         return va_dpy;
81 }
82
83 string get_usable_va_display()
84 {
85         // Reduce the amount of chatter while probing,
86         // unless the user has specified otherwise.
87         bool need_env_reset = false;
88         if (getenv("LIBVA_MESSAGING_LEVEL") == nullptr) {
89                 setenv("LIBVA_MESSAGING_LEVEL", "0", true);
90                 need_env_reset = true;
91         }
92
93         // First try the default (ie., whatever $DISPLAY is set to).
94         unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va_mjpeg("");
95         if (va_dpy != nullptr) {
96                 if (need_env_reset) {
97                         unsetenv("LIBVA_MESSAGING_LEVEL");
98                 }
99                 return "";
100         }
101
102         fprintf(stderr, "The X11 display did not expose a VA-API JPEG decoder.\n");
103
104         // Try all /dev/dri/render* in turn. TODO: Accept /dev/dri/card*, too?
105         glob_t g;
106         int err = glob("/dev/dri/renderD*", 0, nullptr, &g);
107         if (err != 0) {
108                 fprintf(stderr, "Couldn't list render nodes (%s) when trying to autodetect a replacement.\n", strerror(errno));
109         } else {
110                 for (size_t i = 0; i < g.gl_pathc; ++i) {
111                         string path = g.gl_pathv[i];
112                         va_dpy = try_open_va_mjpeg(path);
113                         if (va_dpy != nullptr) {
114                                 fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
115                                         path.c_str());
116                                 globfree(&g);
117                                 if (need_env_reset) {
118                                         unsetenv("LIBVA_MESSAGING_LEVEL");
119                                 }
120                                 return path;
121                         }
122                 }
123         }
124
125         fprintf(stderr, "No suitable VA-API JPEG decoders were found in /dev/dri; giving up.\n");
126         fprintf(stderr, "Note that if you are using an Intel CPU with an external GPU,\n");
127         fprintf(stderr, "you may need to enable the integrated Intel GPU in your BIOS\n");
128         fprintf(stderr, "to expose Quick Sync.\n");
129         return "none";
130 }
131
132 void init_jpeg_vaapi()
133 {
134         string dpy = get_usable_va_display();
135         if (dpy == "none") {
136                 return;
137         }
138
139         va_dpy = try_open_va_mjpeg(dpy);
140         if (va_dpy == nullptr) {
141                 return;
142         }
143
144         fprintf(stderr, "VA-API JPEG decoding initialized.\n");
145         vaapi_jpeg_decoding_usable = true;
146 }
147
148 class VABufferDestroyer {
149 public:
150         VABufferDestroyer(VADisplay dpy, VABufferID buf)
151                 : dpy(dpy), buf(buf) {}
152
153         ~VABufferDestroyer()
154         {
155                 VAStatus va_status = vaDestroyBuffer(dpy, buf);
156                 CHECK_VASTATUS(va_status, "vaDestroyBuffer");
157         }
158
159 private:
160         VADisplay dpy;
161         VABufferID buf;
162 };
163
164 shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
165 {
166         jpeg_decompress_struct dinfo;
167         JPEGWrapErrorManager error_mgr(&dinfo);
168         if (!error_mgr.run([&dinfo] { jpeg_create_decompress(&dinfo); })) {
169                 return nullptr;
170         }
171         JPEGDestroyer destroy_dinfo(&dinfo);
172
173         jpeg_save_markers(&dinfo, JPEG_APP0 + 1, 0xFFFF);
174
175         jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
176         if (!error_mgr.run([&dinfo] { jpeg_read_header(&dinfo, true); })) {
177                 return nullptr;
178         }
179
180         if (dinfo.num_components != 3) {
181                 fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
182                         dinfo.num_components,
183                         dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
184                         dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
185                         dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
186                 return nullptr;
187         }
188
189         const bool is_422 =
190                 dinfo.comp_info[0].h_samp_factor == 2 &&
191                 dinfo.comp_info[1].h_samp_factor == 1 &&
192                 dinfo.comp_info[1].v_samp_factor == dinfo.comp_info[0].v_samp_factor &&
193                 dinfo.comp_info[2].h_samp_factor == 1 &&
194                 dinfo.comp_info[2].v_samp_factor == dinfo.comp_info[0].v_samp_factor;
195         const bool is_420 =
196                 dinfo.comp_info[0].h_samp_factor == 2 &&
197                 dinfo.comp_info[0].v_samp_factor == 2 &&
198                 dinfo.comp_info[1].h_samp_factor == 1 &&
199                 dinfo.comp_info[1].v_samp_factor == 1 &&
200                 dinfo.comp_info[2].h_samp_factor == 1 &&
201                 dinfo.comp_info[2].v_samp_factor == 1;
202         if (!is_422 && !is_420) {
203                 fprintf(stderr, "Not 4:2:2 or 4:2:0. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
204                         dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
205                         dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
206                         dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
207                 return nullptr;
208         }
209
210         // Picture parameters.
211         VAPictureParameterBufferJPEGBaseline pic_param;
212         memset(&pic_param, 0, sizeof(pic_param));
213         pic_param.picture_width = dinfo.image_width;
214         pic_param.picture_height = dinfo.image_height;
215         for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
216                 const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
217                 pic_param.components[component_idx].component_id = comp->component_id;
218                 pic_param.components[component_idx].h_sampling_factor = comp->h_samp_factor;
219                 pic_param.components[component_idx].v_sampling_factor = comp->v_samp_factor;
220                 pic_param.components[component_idx].quantiser_table_selector = comp->quant_tbl_no;
221         }
222         pic_param.num_components = dinfo.num_components;
223         pic_param.color_space = 0;  // YUV.
224         pic_param.rotation = VA_ROTATION_NONE;
225
226         VAResourcePool::VAResources resources = va_pool->get_va_resources(dinfo.image_width, dinfo.image_height, is_422 ? VA_FOURCC_UYVY : VA_FOURCC_NV12);
227         ReleaseVAResources release(va_pool.get(), resources);
228
229         VABufferID pic_param_buffer;
230         VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAPictureParameterBufferType, sizeof(pic_param), 1, &pic_param, &pic_param_buffer);
231         CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
232         VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
233
234         // Quantization matrices.
235         VAIQMatrixBufferJPEGBaseline iq;
236         memset(&iq, 0, sizeof(iq));
237
238         for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
239                 const JQUANT_TBL *qtbl = dinfo.quant_tbl_ptrs[quant_tbl_idx];
240                 if (qtbl == nullptr) {
241                         iq.load_quantiser_table[quant_tbl_idx] = 0;
242                 } else {
243                         iq.load_quantiser_table[quant_tbl_idx] = 1;
244                         for (int i = 0; i < 64; ++i) {
245                                 if (qtbl->quantval[i] > 255) {
246                                         fprintf(stderr, "Baseline JPEG only!\n");
247                                         return nullptr;
248                                 }
249                                 iq.quantiser_table[quant_tbl_idx][i] = qtbl->quantval[jpeg_natural_order[i]];
250                         }
251                 }
252         }
253
254         VABufferID iq_buffer;
255         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAIQMatrixBufferType, sizeof(iq), 1, &iq, &iq_buffer);
256         CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
257         VABufferDestroyer destroy_iq(va_dpy->va_dpy, iq_buffer);
258
259         // Huffman tables (arithmetic is not supported).
260         VAHuffmanTableBufferJPEGBaseline huff;
261         memset(&huff, 0, sizeof(huff));
262
263         for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
264                 const JHUFF_TBL *ac_hufftbl = dinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
265                 const JHUFF_TBL *dc_hufftbl = dinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
266                 if (ac_hufftbl == nullptr) {
267                         assert(dc_hufftbl == nullptr);
268                         huff.load_huffman_table[huff_tbl_idx] = 0;
269                 } else {
270                         assert(dc_hufftbl != nullptr);
271                         huff.load_huffman_table[huff_tbl_idx] = 1;
272
273                         for (int i = 0; i < 16; ++i) {
274                                 huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
275                         }
276                         for (int i = 0; i < 12; ++i) {
277                                 huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
278                         }
279                         for (int i = 0; i < 16; ++i) {
280                                 huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
281                         }
282                         for (int i = 0; i < 162; ++i) {
283                                 huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
284                         }
285                 }
286         }
287
288         VABufferID huff_buffer;
289         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAHuffmanTableBufferType, sizeof(huff), 1, &huff, &huff_buffer);
290         CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
291         VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
292
293         // Slice parameters (metadata about the slice).
294         VASliceParameterBufferJPEGBaseline parms;
295         memset(&parms, 0, sizeof(parms));
296         parms.slice_data_size = dinfo.src->bytes_in_buffer;
297         parms.slice_data_offset = 0;
298         parms.slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
299         parms.slice_horizontal_position = 0;
300         parms.slice_vertical_position = 0;
301         for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
302                 const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
303                 parms.components[component_idx].component_selector = comp->component_id;
304                 parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
305                 parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
306                 if (parms.components[component_idx].dc_table_selector > 1 ||
307                     parms.components[component_idx].ac_table_selector > 1) {
308                         fprintf(stderr, "Uses too many Huffman tables\n");
309                         return nullptr;
310                 }
311         }
312         parms.num_components = dinfo.num_components;
313         parms.restart_interval = dinfo.restart_interval;
314         int horiz_mcus = (dinfo.image_width + (DCTSIZE * 2) - 1) / (DCTSIZE * 2);
315         int vert_mcus = (dinfo.image_height + DCTSIZE - 1) / DCTSIZE;
316         parms.num_mcus = horiz_mcus * vert_mcus;
317
318         VABufferID slice_param_buffer;
319         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VASliceParameterBufferType, sizeof(parms), 1, &parms, &slice_param_buffer);
320         CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
321         VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
322
323         // The actual data. VA-API will destuff and all for us.
324         VABufferID data_buffer;
325         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VASliceDataBufferType, dinfo.src->bytes_in_buffer, 1, const_cast<unsigned char *>(dinfo.src->next_input_byte), &data_buffer);
326         CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
327         VABufferDestroyer destroy_data(va_dpy->va_dpy, data_buffer);
328
329         va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
330         CHECK_VASTATUS_RET(va_status, "vaBeginPicture");
331         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
332         CHECK_VASTATUS_RET(va_status, "vaRenderPicture(pic_param)");
333         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &iq_buffer, 1);
334         CHECK_VASTATUS_RET(va_status, "vaRenderPicture(iq)");
335         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
336         CHECK_VASTATUS_RET(va_status, "vaRenderPicture(huff)");
337         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
338         CHECK_VASTATUS_RET(va_status, "vaRenderPicture(slice_param)");
339         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &data_buffer, 1);
340         CHECK_VASTATUS_RET(va_status, "vaRenderPicture(data)");
341         va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
342         CHECK_VASTATUS_RET(va_status, "vaEndPicture");
343
344         // vaDeriveImage() works, but the resulting image seems to live in
345         // uncached memory, which makes copying data out from it very, very slow.
346         // Thanks to FFmpeg for the observation that you can vaGetImage() the
347         // surface onto your own image (although then, it can't be planar, which
348         // is unfortunate for us).
349 #if 0
350         VAImage image;
351         va_status = vaDeriveImage(va_dpy->va_dpy, surf, &image);
352         CHECK_VASTATUS_RET(va_status, "vaDeriveImage");
353 #else
354         va_status = vaSyncSurface(va_dpy->va_dpy, resources.surface);
355         CHECK_VASTATUS_RET(va_status, "vaSyncSurface");
356
357         va_status = vaGetImage(va_dpy->va_dpy, resources.surface, 0, 0, dinfo.image_width, dinfo.image_height, resources.image.image_id);
358         CHECK_VASTATUS_RET(va_status, "vaGetImage");
359 #endif
360
361         void *mapped;
362         va_status = vaMapBuffer(va_dpy->va_dpy, resources.image.buf, &mapped);
363         CHECK_VASTATUS_RET(va_status, "vaMapBuffer");
364
365         shared_ptr<Frame> frame(new Frame);
366 #if 0
367         // 4:2:2 planar (for vaDeriveImage).
368         frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
369         frame->cb.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
370         frame->cr.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
371         for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
372                 uint8_t *dptr;
373                 size_t width;
374                 if (component_idx == 0) {
375                         dptr = frame->y.get();
376                         width = dinfo.image_width;
377                 } else if (component_idx == 1) {
378                         dptr = frame->cb.get();
379                         width = dinfo.image_width / 2;
380                 } else if (component_idx == 2) {
381                         dptr = frame->cr.get();
382                         width = dinfo.image_width / 2;
383                 } else {
384                         assert(false);
385                 }
386                 const uint8_t *sptr = (const uint8_t *)mapped + image.offsets[component_idx];
387                 size_t spitch = image.pitches[component_idx];
388                 for (size_t y = 0; y < dinfo.image_height; ++y) {
389                         memcpy(dptr + y * width, sptr + y * spitch, width);
390                 }
391         }
392 #else
393         // Convert Y'CbCr to separate Y' and CbCr.
394         frame->is_semiplanar = true;
395
396         PBO pbo = global_pbo_pool->alloc_pbo();
397         size_t cbcr_offset = dinfo.image_width * dinfo.image_height;
398         uint8_t *y_pix = pbo.ptr;
399         uint8_t *cbcr_pix = pbo.ptr + cbcr_offset;
400
401         unsigned cbcr_width = dinfo.image_width / 2;
402         unsigned cbcr_height;
403         if (is_422) {
404                 const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
405                 if (resources.image.pitches[0] == dinfo.image_width * 2) {
406                         memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2);
407                 } else {
408                         for (unsigned y = 0; y < dinfo.image_height; ++y) {
409                                 memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width,
410                                                    src + y * resources.image.pitches[0], dinfo.image_width * 2);
411                         }
412                 }
413                 cbcr_height = dinfo.image_height;
414         } else {
415                 assert(is_420);
416                 const uint8_t *src_y = (const uint8_t *)mapped + resources.image.offsets[0];
417                 const uint8_t *src_cbcr = (const uint8_t *)mapped + resources.image.offsets[1];
418                 memcpy_with_pitch(y_pix, src_y, dinfo.image_width, resources.image.pitches[0], dinfo.image_height);
419                 memcpy_with_pitch(cbcr_pix, src_cbcr, dinfo.image_width, resources.image.pitches[1], dinfo.image_height / 2);
420                 cbcr_height = dinfo.image_height / 2;
421         }
422
423         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
424         frame->y = create_texture_2d(dinfo.image_width, dinfo.image_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
425         frame->cbcr = create_texture_2d(cbcr_width, cbcr_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset));
426         glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height + cbcr_width * cbcr_height * 2);
427         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
428
429         glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
430         pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
431         frame->uploaded_ui_thread = pbo.upload_done;
432         frame->uploaded_interpolation = pbo.upload_done;
433         global_pbo_pool->release_pbo(move(pbo));
434 #endif
435         frame->width = dinfo.image_width;
436         frame->height = dinfo.image_height;
437         frame->chroma_subsampling_x = 2;
438         frame->chroma_subsampling_y = is_420 ? 2 : 1;
439
440         if (dinfo.marker_list != nullptr &&
441             dinfo.marker_list->marker == JPEG_APP0 + 1 &&
442             dinfo.marker_list->data_length >= 4 &&
443             memcmp(dinfo.marker_list->data, "Exif", 4) == 0) {
444                 frame->exif_data.assign(reinterpret_cast<char *>(dinfo.marker_list->data),
445                         dinfo.marker_list->data_length);
446         }
447
448         va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
449         CHECK_VASTATUS_RET(va_status, "vaUnmapBuffer");
450
451         return frame;
452 }