1 #include "vaapi_jpeg_decoder.h"
3 #include "jpeg_destroyer.h"
4 #include "jpeg_frame.h"
5 #include "jpeglib_error_wrapper.h"
7 #include "shared/memcpy_interleaved.h"
8 #include "shared/va_display.h"
24 #include <va/va_drm.h>
25 #include <va/va_x11.h>
27 #define BUFFER_OFFSET(i) ((char *)nullptr + (i))
31 static unique_ptr<VADisplayWithCleanup> va_dpy;
32 static VAConfigID config_id;
33 static VAImageFormat uyvy_format;
34 bool vaapi_jpeg_decoding_usable = false;
37 unsigned width, height;
42 static list<VAResources> va_resources_freelist;
43 static mutex va_resources_mutex;
45 #define CHECK_VASTATUS(va_status, func) \
46 if (va_status != VA_STATUS_SUCCESS) { \
47 fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
51 #define CHECK_VASTATUS_RET(va_status, func) \
52 if (va_status != VA_STATUS_SUCCESS) { \
53 fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
57 // From libjpeg (although it's of course identical between implementations).
58 static const int jpeg_natural_order[DCTSIZE2] = {
59 0, 1, 8, 16, 9, 2, 3, 10,
60 17, 24, 32, 25, 18, 11, 4, 5,
61 12, 19, 26, 33, 40, 48, 41, 34,
62 27, 20, 13, 6, 7, 14, 21, 28,
63 35, 42, 49, 56, 57, 50, 43, 36,
64 29, 22, 15, 23, 30, 37, 44, 51,
65 58, 59, 52, 45, 38, 31, 39, 46,
66 53, 60, 61, 54, 47, 55, 62, 63,
69 VAResources get_va_resources(unsigned width, unsigned height)
72 lock_guard<mutex> lock(va_resources_mutex);
73 for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) {
74 if (it->width == width && it->height == height) {
75 VAResources ret = *it;
76 va_resources_freelist.erase(it);
87 VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
89 &ret.surface, 1, nullptr, 0);
90 CHECK_VASTATUS(va_status, "vaCreateSurfaces");
92 va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
93 CHECK_VASTATUS(va_status, "vaCreateContext");
95 va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image);
96 CHECK_VASTATUS(va_status, "vaCreateImage");
101 void release_va_resources(VAResources resources)
103 lock_guard<mutex> lock(va_resources_mutex);
104 if (va_resources_freelist.size() > 10) {
105 auto it = va_resources_freelist.end();
108 VAStatus va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id);
109 CHECK_VASTATUS(va_status, "vaDestroyImage");
111 va_status = vaDestroyContext(va_dpy->va_dpy, it->context);
112 CHECK_VASTATUS(va_status, "vaDestroyContext");
114 va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1);
115 CHECK_VASTATUS(va_status, "vaDestroySurfaces");
117 va_resources_freelist.erase(it);
120 va_resources_freelist.push_front(resources);
123 // RAII wrapper to release VAResources on return (even on error).
124 class ReleaseVAResources {
126 ReleaseVAResources(const VAResources &resources)
127 : resources(resources) {}
128 ~ReleaseVAResources()
131 release_va_resources(resources);
135 void commit() { committed = true; }
138 const VAResources &resources;
139 bool committed = false;
142 static unique_ptr<VADisplayWithCleanup> try_open_va_mjpeg(const string &va_display)
144 // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/
145 return try_open_va(va_display, { VAProfileJPEGBaseline }, VAEntrypointVLD,
146 { { "4:2:2", VA_RT_FORMAT_YUV422, VA_FOURCC_UYVY, &config_id, &uyvy_format } },
147 /*chosen_profile=*/nullptr, /*error=*/nullptr);
150 string get_usable_va_display()
152 // Reduce the amount of chatter while probing,
153 // unless the user has specified otherwise.
154 bool need_env_reset = false;
155 if (getenv("LIBVA_MESSAGING_LEVEL") == nullptr) {
156 setenv("LIBVA_MESSAGING_LEVEL", "0", true);
157 need_env_reset = true;
160 // First try the default (ie., whatever $DISPLAY is set to).
161 unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va_mjpeg("");
162 if (va_dpy != nullptr) {
163 if (need_env_reset) {
164 unsetenv("LIBVA_MESSAGING_LEVEL");
169 fprintf(stderr, "The X11 display did not expose a VA-API JPEG decoder.\n");
171 // Try all /dev/dri/render* in turn. TODO: Accept /dev/dri/card*, too?
173 int err = glob("/dev/dri/renderD*", 0, nullptr, &g);
175 fprintf(stderr, "Couldn't list render nodes (%s) when trying to autodetect a replacement.\n", strerror(errno));
177 for (size_t i = 0; i < g.gl_pathc; ++i) {
178 string path = g.gl_pathv[i];
179 va_dpy = try_open_va_mjpeg(path);
180 if (va_dpy != nullptr) {
181 fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
184 if (need_env_reset) {
185 unsetenv("LIBVA_MESSAGING_LEVEL");
192 fprintf(stderr, "No suitable VA-API JPEG decoders were found in /dev/dri; giving up.\n");
193 fprintf(stderr, "Note that if you are using an Intel CPU with an external GPU,\n");
194 fprintf(stderr, "you may need to enable the integrated Intel GPU in your BIOS\n");
195 fprintf(stderr, "to expose Quick Sync.\n");
199 void init_jpeg_vaapi()
201 string dpy = get_usable_va_display();
206 va_dpy = try_open_va_mjpeg(dpy);
207 if (va_dpy == nullptr) {
211 fprintf(stderr, "VA-API JPEG decoding initialized.\n");
212 vaapi_jpeg_decoding_usable = true;
215 class VABufferDestroyer {
217 VABufferDestroyer(VADisplay dpy, VABufferID buf)
218 : dpy(dpy), buf(buf) {}
222 VAStatus va_status = vaDestroyBuffer(dpy, buf);
223 CHECK_VASTATUS(va_status, "vaDestroyBuffer");
231 shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
233 jpeg_decompress_struct dinfo;
234 JPEGWrapErrorManager error_mgr(&dinfo);
235 if (!error_mgr.run([&dinfo] { jpeg_create_decompress(&dinfo); })) {
238 JPEGDestroyer destroy_dinfo(&dinfo);
240 jpeg_save_markers(&dinfo, JPEG_APP0 + 1, 0xFFFF);
242 jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
243 if (!error_mgr.run([&dinfo] { jpeg_read_header(&dinfo, true); })) {
247 if (dinfo.num_components != 3) {
248 fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
249 dinfo.num_components,
250 dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
251 dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
252 dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
255 if (dinfo.comp_info[0].h_samp_factor != 2 ||
256 dinfo.comp_info[1].h_samp_factor != 1 ||
257 dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[0].v_samp_factor ||
258 dinfo.comp_info[2].h_samp_factor != 1 ||
259 dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) {
260 fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
261 dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
262 dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
263 dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
267 // Picture parameters.
268 VAPictureParameterBufferJPEGBaseline pic_param;
269 memset(&pic_param, 0, sizeof(pic_param));
270 pic_param.picture_width = dinfo.image_width;
271 pic_param.picture_height = dinfo.image_height;
272 for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
273 const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
274 pic_param.components[component_idx].component_id = comp->component_id;
275 pic_param.components[component_idx].h_sampling_factor = comp->h_samp_factor;
276 pic_param.components[component_idx].v_sampling_factor = comp->v_samp_factor;
277 pic_param.components[component_idx].quantiser_table_selector = comp->quant_tbl_no;
279 pic_param.num_components = dinfo.num_components;
280 pic_param.color_space = 0; // YUV.
281 pic_param.rotation = VA_ROTATION_NONE;
283 VAResources resources = get_va_resources(dinfo.image_width, dinfo.image_height);
284 ReleaseVAResources release(resources);
286 VABufferID pic_param_buffer;
287 VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAPictureParameterBufferType, sizeof(pic_param), 1, &pic_param, &pic_param_buffer);
288 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
289 VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
291 // Quantization matrices.
292 VAIQMatrixBufferJPEGBaseline iq;
293 memset(&iq, 0, sizeof(iq));
295 for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
296 const JQUANT_TBL *qtbl = dinfo.quant_tbl_ptrs[quant_tbl_idx];
297 if (qtbl == nullptr) {
298 iq.load_quantiser_table[quant_tbl_idx] = 0;
300 iq.load_quantiser_table[quant_tbl_idx] = 1;
301 for (int i = 0; i < 64; ++i) {
302 if (qtbl->quantval[i] > 255) {
303 fprintf(stderr, "Baseline JPEG only!\n");
306 iq.quantiser_table[quant_tbl_idx][i] = qtbl->quantval[jpeg_natural_order[i]];
311 VABufferID iq_buffer;
312 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAIQMatrixBufferType, sizeof(iq), 1, &iq, &iq_buffer);
313 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
314 VABufferDestroyer destroy_iq(va_dpy->va_dpy, iq_buffer);
316 // Huffman tables (arithmetic is not supported).
317 VAHuffmanTableBufferJPEGBaseline huff;
318 memset(&huff, 0, sizeof(huff));
320 for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
321 const JHUFF_TBL *ac_hufftbl = dinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
322 const JHUFF_TBL *dc_hufftbl = dinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
323 if (ac_hufftbl == nullptr) {
324 assert(dc_hufftbl == nullptr);
325 huff.load_huffman_table[huff_tbl_idx] = 0;
327 assert(dc_hufftbl != nullptr);
328 huff.load_huffman_table[huff_tbl_idx] = 1;
330 for (int i = 0; i < 16; ++i) {
331 huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
333 for (int i = 0; i < 12; ++i) {
334 huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
336 for (int i = 0; i < 16; ++i) {
337 huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
339 for (int i = 0; i < 162; ++i) {
340 huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
345 VABufferID huff_buffer;
346 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAHuffmanTableBufferType, sizeof(huff), 1, &huff, &huff_buffer);
347 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
348 VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
350 // Slice parameters (metadata about the slice).
351 VASliceParameterBufferJPEGBaseline parms;
352 memset(&parms, 0, sizeof(parms));
353 parms.slice_data_size = dinfo.src->bytes_in_buffer;
354 parms.slice_data_offset = 0;
355 parms.slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
356 parms.slice_horizontal_position = 0;
357 parms.slice_vertical_position = 0;
358 for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
359 const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
360 parms.components[component_idx].component_selector = comp->component_id;
361 parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
362 parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
363 if (parms.components[component_idx].dc_table_selector > 1 ||
364 parms.components[component_idx].ac_table_selector > 1) {
365 fprintf(stderr, "Uses too many Huffman tables\n");
369 parms.num_components = dinfo.num_components;
370 parms.restart_interval = dinfo.restart_interval;
371 int horiz_mcus = (dinfo.image_width + (DCTSIZE * 2) - 1) / (DCTSIZE * 2);
372 int vert_mcus = (dinfo.image_height + DCTSIZE - 1) / DCTSIZE;
373 parms.num_mcus = horiz_mcus * vert_mcus;
375 VABufferID slice_param_buffer;
376 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VASliceParameterBufferType, sizeof(parms), 1, &parms, &slice_param_buffer);
377 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
378 VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
380 // The actual data. VA-API will destuff and all for us.
381 VABufferID data_buffer;
382 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VASliceDataBufferType, dinfo.src->bytes_in_buffer, 1, const_cast<unsigned char *>(dinfo.src->next_input_byte), &data_buffer);
383 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
384 VABufferDestroyer destroy_data(va_dpy->va_dpy, data_buffer);
386 va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
387 CHECK_VASTATUS_RET(va_status, "vaBeginPicture");
388 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
389 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(pic_param)");
390 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &iq_buffer, 1);
391 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(iq)");
392 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
393 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(huff)");
394 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
395 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(slice_param)");
396 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &data_buffer, 1);
397 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(data)");
398 va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
399 CHECK_VASTATUS_RET(va_status, "vaEndPicture");
401 // vaDeriveImage() works, but the resulting image seems to live in
402 // uncached memory, which makes copying data out from it very, very slow.
403 // Thanks to FFmpeg for the observation that you can vaGetImage() the
404 // surface onto your own image (although then, it can't be planar, which
405 // is unfortunate for us).
408 va_status = vaDeriveImage(va_dpy->va_dpy, surf, &image);
409 CHECK_VASTATUS_RET(va_status, "vaDeriveImage");
411 va_status = vaSyncSurface(va_dpy->va_dpy, resources.surface);
412 CHECK_VASTATUS_RET(va_status, "vaSyncSurface");
414 va_status = vaGetImage(va_dpy->va_dpy, resources.surface, 0, 0, dinfo.image_width, dinfo.image_height, resources.image.image_id);
415 CHECK_VASTATUS_RET(va_status, "vaGetImage");
419 va_status = vaMapBuffer(va_dpy->va_dpy, resources.image.buf, &mapped);
420 CHECK_VASTATUS_RET(va_status, "vaMapBuffer");
422 shared_ptr<Frame> frame(new Frame);
424 // 4:2:2 planar (for vaDeriveImage).
425 frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
426 frame->cb.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
427 frame->cr.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
428 for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
431 if (component_idx == 0) {
432 dptr = frame->y.get();
433 width = dinfo.image_width;
434 } else if (component_idx == 1) {
435 dptr = frame->cb.get();
436 width = dinfo.image_width / 2;
437 } else if (component_idx == 2) {
438 dptr = frame->cr.get();
439 width = dinfo.image_width / 2;
443 const uint8_t *sptr = (const uint8_t *)mapped + image.offsets[component_idx];
444 size_t spitch = image.pitches[component_idx];
445 for (size_t y = 0; y < dinfo.image_height; ++y) {
446 memcpy(dptr + y * width, sptr + y * spitch, width);
450 // Convert Y'CbCr to separate Y' and CbCr.
451 frame->is_semiplanar = true;
453 PBO pbo = global_pbo_pool->alloc_pbo();
454 size_t cbcr_offset = dinfo.image_width * dinfo.image_height;
455 uint8_t *y_pix = pbo.ptr;
456 uint8_t *cbcr_pix = pbo.ptr + cbcr_offset;
458 const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
459 if (resources.image.pitches[0] == dinfo.image_width * 2) {
460 memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2);
462 for (unsigned y = 0; y < dinfo.image_height; ++y) {
463 memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width,
464 src + y * resources.image.pitches[0], dinfo.image_width * 2);
468 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
469 frame->y = create_texture_2d(dinfo.image_width, dinfo.image_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
470 frame->cbcr = create_texture_2d(dinfo.image_width / 2, dinfo.image_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset));
471 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
473 glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height * 2);
474 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
475 pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
476 frame->uploaded_ui_thread = pbo.upload_done;
477 frame->uploaded_interpolation = pbo.upload_done;
478 global_pbo_pool->release_pbo(move(pbo));
480 frame->width = dinfo.image_width;
481 frame->height = dinfo.image_height;
482 frame->chroma_subsampling_x = 2;
483 frame->chroma_subsampling_y = 1;
485 if (dinfo.marker_list != nullptr &&
486 dinfo.marker_list->marker == JPEG_APP0 + 1 &&
487 dinfo.marker_list->data_length >= 4 &&
488 memcmp(dinfo.marker_list->data, "Exif", 4) == 0) {
489 frame->exif_data.assign(reinterpret_cast<char *>(dinfo.marker_list->data),
490 dinfo.marker_list->data_length);
493 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
494 CHECK_VASTATUS_RET(va_status, "vaUnmapBuffer");