1 #include "vaapi_jpeg_decoder.h"
3 #include "jpeg_destroyer.h"
4 #include "jpeg_frame.h"
5 #include "memcpy_interleaved.h"
21 #include <va/va_drm.h>
22 #include <va/va_x11.h>
26 static unique_ptr<VADisplayWithCleanup> va_dpy;
27 static VAConfigID config_id;
28 static VAImageFormat uyvy_format;
29 bool vaapi_jpeg_decoding_usable = false;
32 unsigned width, height;
37 static list<VAResources> va_resources_freelist;
38 static mutex va_resources_mutex;
40 #define CHECK_VASTATUS(va_status, func) \
41 if (va_status != VA_STATUS_SUCCESS) { \
42 fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
46 #define CHECK_VASTATUS_RET(va_status, func) \
47 if (va_status != VA_STATUS_SUCCESS) { \
48 fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
52 VAResources get_va_resources(unsigned width, unsigned height)
55 lock_guard<mutex> lock(va_resources_mutex);
56 for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) {
57 if (it->width == width && it->height == height) {
58 VAResources ret = *it;
59 va_resources_freelist.erase(it);
70 VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
72 &ret.surface, 1, nullptr, 0);
73 CHECK_VASTATUS(va_status, "vaCreateSurfaces");
75 va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
76 CHECK_VASTATUS(va_status, "vaCreateContext");
78 va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image);
79 CHECK_VASTATUS(va_status, "vaCreateImage");
84 void release_va_resources(VAResources resources)
86 lock_guard<mutex> lock(va_resources_mutex);
87 if (va_resources_freelist.size() > 10) {
88 auto it = va_resources_freelist.end();
91 VAStatus va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id);
92 CHECK_VASTATUS(va_status, "vaDestroyImage");
94 va_status = vaDestroyContext(va_dpy->va_dpy, it->context);
95 CHECK_VASTATUS(va_status, "vaDestroyContext");
97 va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1);
98 CHECK_VASTATUS(va_status, "vaDestroySurfaces");
100 va_resources_freelist.erase(it);
103 va_resources_freelist.push_front(resources);
106 // RAII wrapper to release VAResources on return (even on error).
107 class ReleaseVAResources {
109 ReleaseVAResources(const VAResources &resources)
110 : resources(resources) {}
111 ~ReleaseVAResources()
114 release_va_resources(resources);
118 void commit() { committed = true; }
121 const VAResources &resources;
122 bool committed = false;
125 VADisplayWithCleanup::~VADisplayWithCleanup()
127 if (va_dpy != nullptr) {
130 if (x11_display != nullptr) {
131 XCloseDisplay(x11_display);
138 unique_ptr<VADisplayWithCleanup> va_open_display(const string &va_display)
140 if (va_display.empty() || va_display[0] != '/') { // An X display.
141 Display *x11_display = XOpenDisplay(va_display.empty() ? nullptr : va_display.c_str());
142 if (x11_display == nullptr) {
143 fprintf(stderr, "error: can't connect to X server!\n");
147 unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
148 ret->x11_display = x11_display;
149 ret->va_dpy = vaGetDisplay(x11_display);
150 if (ret->va_dpy == nullptr) {
154 } else { // A DRM node on the filesystem (e.g. /dev/dri/renderD128).
155 int drm_fd = open(va_display.c_str(), O_RDWR);
157 perror(va_display.c_str());
160 unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
161 ret->drm_fd = drm_fd;
162 ret->va_dpy = vaGetDisplayDRM(drm_fd);
163 if (ret->va_dpy == nullptr) {
170 unique_ptr<VADisplayWithCleanup> try_open_va(const string &va_display, string *error)
172 unique_ptr<VADisplayWithCleanup> va_dpy = va_open_display(va_display);
173 if (va_dpy == nullptr) {
175 *error = "Opening VA display failed";
178 int major_ver, minor_ver;
179 VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver);
180 if (va_status != VA_STATUS_SUCCESS) {
182 snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status);
183 if (error != nullptr)
188 int num_entrypoints = vaMaxNumEntrypoints(va_dpy->va_dpy);
189 unique_ptr<VAEntrypoint[]> entrypoints(new VAEntrypoint[num_entrypoints]);
190 if (entrypoints == nullptr) {
191 if (error != nullptr)
192 *error = "Failed to allocate memory for VA entry points";
196 vaQueryConfigEntrypoints(va_dpy->va_dpy, VAProfileJPEGBaseline, entrypoints.get(), &num_entrypoints);
197 for (int slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
198 if (entrypoints[slice_entrypoint] != VAEntrypointVLD) {
202 // We found a usable decode, so return it.
206 if (error != nullptr)
207 *error = "Can't find VAEntrypointVLD for the JPEG profile";
211 string get_usable_va_display()
213 // Reduce the amount of chatter while probing,
214 // unless the user has specified otherwise.
215 bool need_env_reset = false;
216 if (getenv("LIBVA_MESSAGING_LEVEL") == nullptr) {
217 setenv("LIBVA_MESSAGING_LEVEL", "0", true);
218 need_env_reset = true;
221 // First try the default (ie., whatever $DISPLAY is set to).
222 unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va("", nullptr);
223 if (va_dpy != nullptr) {
224 if (need_env_reset) {
225 unsetenv("LIBVA_MESSAGING_LEVEL");
230 fprintf(stderr, "The X11 display did not expose a VA-API JPEG decoder.\n");
232 // Try all /dev/dri/render* in turn. TODO: Accept /dev/dri/card*, too?
234 int err = glob("/dev/dri/renderD*", 0, nullptr, &g);
236 fprintf(stderr, "Couldn't list render nodes (%s) when trying to autodetect a replacement.\n", strerror(errno));
238 for (size_t i = 0; i < g.gl_pathc; ++i) {
239 string path = g.gl_pathv[i];
240 va_dpy = try_open_va(path, nullptr);
241 if (va_dpy != nullptr) {
242 fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
245 if (need_env_reset) {
246 unsetenv("LIBVA_MESSAGING_LEVEL");
253 fprintf(stderr, "No suitable VA-API JPEG decoders were found in /dev/dri; giving up.\n");
254 fprintf(stderr, "Note that if you are using an Intel CPU with an external GPU,\n");
255 fprintf(stderr, "you may need to enable the integrated Intel GPU in your BIOS\n");
256 fprintf(stderr, "to expose Quick Sync.\n");
260 void init_jpeg_vaapi()
262 string dpy = get_usable_va_display();
267 va_dpy = try_open_va(dpy, nullptr);
268 if (va_dpy == nullptr) {
272 VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 };
274 VAStatus va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointVLD,
275 &attr, 1, &config_id);
276 CHECK_VASTATUS(va_status, "vaCreateConfig");
278 int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy);
279 assert(num_formats > 0);
281 unique_ptr<VAImageFormat[]> formats(new VAImageFormat[num_formats]);
282 va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats);
283 CHECK_VASTATUS(va_status, "vaQueryImageFormats");
286 for (int i = 0; i < num_formats; ++i) {
287 // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/
288 if (formats[i].fourcc == VA_FOURCC_UYVY) {
289 memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat));
298 fprintf(stderr, "VA-API JPEG decoding initialized.\n");
299 vaapi_jpeg_decoding_usable = true;
302 class VABufferDestroyer {
304 VABufferDestroyer(VADisplay *dpy, VABufferID buf)
305 : dpy(dpy), buf(buf) {}
307 ~VABufferDestroyer() {
308 vaDestroyBuffer(dpy, buf);
316 shared_ptr<Frame> decode_jpeg_vaapi(const string &filename)
318 jpeg_decompress_struct dinfo;
320 dinfo.err = jpeg_std_error(&jerr);
321 jpeg_create_decompress(&dinfo);
322 JPEGDestroyer destroy_dinfo(&dinfo);
324 FILE *fp = fopen(filename.c_str(), "rb");
326 perror(filename.c_str());
329 jpeg_stdio_src(&dinfo, fp);
331 jpeg_read_header(&dinfo, true);
333 // Read the data that comes after the header. VA-API will destuff and all for us.
334 std::string str((const char *)dinfo.src->next_input_byte, dinfo.src->bytes_in_buffer);
337 size_t ret = fread(buf, 1, sizeof(buf), fp);
338 str.append(buf, ret);
342 if (dinfo.num_components != 3) {
343 fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
344 dinfo.num_components,
345 dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
346 dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
347 dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
350 if (dinfo.comp_info[0].h_samp_factor != 2 ||
351 dinfo.comp_info[1].h_samp_factor != 1 ||
352 dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[0].v_samp_factor ||
353 dinfo.comp_info[2].h_samp_factor != 1 ||
354 dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) {
355 fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
356 dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
357 dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
358 dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
362 // Picture parameters.
363 VAPictureParameterBufferJPEGBaseline pic_param;
364 memset(&pic_param, 0, sizeof(pic_param));
365 pic_param.picture_width = dinfo.image_width;
366 pic_param.picture_height = dinfo.image_height;
367 for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
368 const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
369 pic_param.components[component_idx].component_id = comp->component_id;
370 pic_param.components[component_idx].h_sampling_factor = comp->h_samp_factor;
371 pic_param.components[component_idx].v_sampling_factor = comp->v_samp_factor;
372 pic_param.components[component_idx].quantiser_table_selector = comp->quant_tbl_no;
374 pic_param.num_components = dinfo.num_components;
375 pic_param.color_space = 0; // YUV.
376 pic_param.rotation = VA_ROTATION_NONE;
378 VABufferID pic_param_buffer;
379 VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAPictureParameterBufferType, sizeof(pic_param), 1, &pic_param, &pic_param_buffer);
380 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
381 VABufferDestroyer destroy_pic_param(&va_dpy->va_dpy, pic_param_buffer);
383 // Quantization matrices.
384 VAIQMatrixBufferJPEGBaseline iq;
385 memset(&iq, 0, sizeof(iq));
387 for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
388 const JQUANT_TBL *qtbl = dinfo.quant_tbl_ptrs[quant_tbl_idx];
389 if (qtbl == nullptr) {
390 iq.load_quantiser_table[quant_tbl_idx] = 0;
392 iq.load_quantiser_table[quant_tbl_idx] = 1;
393 for (int i = 0; i < 64; ++i) {
394 if (qtbl->quantval[i] > 255) {
395 fprintf(stderr, "Baseline JPEG only!\n");
398 iq.quantiser_table[quant_tbl_idx][i] = qtbl->quantval[i];
403 VABufferID iq_buffer;
404 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAIQMatrixBufferType, sizeof(iq), 1, &iq, &iq_buffer);
405 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
406 VABufferDestroyer destroy_iq(&va_dpy->va_dpy, iq_buffer);
408 // Huffman tables (arithmetic is not supported).
409 VAHuffmanTableBufferJPEGBaseline huff;
410 memset(&huff, 0, sizeof(huff));
412 for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
413 const JHUFF_TBL *ac_hufftbl = dinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
414 const JHUFF_TBL *dc_hufftbl = dinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
415 if (ac_hufftbl == nullptr) {
416 assert(dc_hufftbl == nullptr);
417 huff.load_huffman_table[huff_tbl_idx] = 0;
419 assert(dc_hufftbl != nullptr);
420 huff.load_huffman_table[huff_tbl_idx] = 1;
422 for (int i = 0; i < 16; ++i) {
423 huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
425 for (int i = 0; i < 12; ++i) {
426 huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
428 for (int i = 0; i < 16; ++i) {
429 huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
431 for (int i = 0; i < 162; ++i) {
432 huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
437 VABufferID huff_buffer;
438 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAHuffmanTableBufferType, sizeof(huff), 1, &huff, &huff_buffer);
439 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
440 VABufferDestroyer destroy_huff(&va_dpy->va_dpy, huff_buffer);
442 // Slice parameters (metadata about the slice).
443 VASliceParameterBufferJPEGBaseline parms;
444 memset(&parms, 0, sizeof(parms));
445 parms.slice_data_size = str.size();
446 parms.slice_data_offset = 0;
447 parms.slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
448 parms.slice_horizontal_position = 0;
449 parms.slice_vertical_position = 0;
450 for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
451 const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
452 parms.components[component_idx].component_selector = comp->component_id;
453 parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
454 parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
455 if (parms.components[component_idx].dc_table_selector > 1 ||
456 parms.components[component_idx].ac_table_selector > 1) {
457 fprintf(stderr, "Uses too many Huffman tables\n");
461 parms.num_components = dinfo.num_components;
462 parms.restart_interval = dinfo.restart_interval;
463 int horiz_mcus = (dinfo.image_width + (DCTSIZE * 2) - 1) / (DCTSIZE * 2);
464 int vert_mcus = (dinfo.image_height + DCTSIZE - 1) / DCTSIZE;
465 parms.num_mcus = horiz_mcus * vert_mcus;
467 VABufferID slice_param_buffer;
468 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VASliceParameterBufferType, sizeof(parms), 1, &parms, &slice_param_buffer);
469 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
470 VABufferDestroyer destroy_slice_param(&va_dpy->va_dpy, slice_param_buffer);
473 VABufferID data_buffer;
474 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VASliceDataBufferType, str.size(), 1, &str[0], &data_buffer);
475 CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
476 VABufferDestroyer destroy_data(&va_dpy->va_dpy, data_buffer);
478 VAResources resources = get_va_resources(dinfo.image_width, dinfo.image_height);
479 ReleaseVAResources release(resources);
481 va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
482 CHECK_VASTATUS_RET(va_status, "vaBeginPicture");
483 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
484 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(pic_param)");
485 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &iq_buffer, 1);
486 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(iq)");
487 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
488 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(huff)");
489 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
490 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(slice_param)");
491 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &data_buffer, 1);
492 CHECK_VASTATUS_RET(va_status, "vaRenderPicture(data)");
493 va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
494 CHECK_VASTATUS_RET(va_status, "vaEndPicture");
496 // vaDeriveImage() works, but the resulting image seems to live in
497 // uncached memory, which makes copying data out from it very, very slow.
498 // Thanks to FFmpeg for the observation that you can vaGetImage() the
499 // surface onto your own image (although then, it can't be planar, which
500 // is unfortunate for us).
503 va_status = vaDeriveImage(va_dpy->va_dpy, surf, &image);
504 CHECK_VASTATUS_RET(va_status, "vaDeriveImage");
506 va_status = vaSyncSurface(va_dpy->va_dpy, resources.surface);
507 CHECK_VASTATUS_RET(va_status, "vaSyncSurface");
509 va_status = vaGetImage(va_dpy->va_dpy, resources.surface, 0, 0, dinfo.image_width, dinfo.image_height, resources.image.image_id);
510 CHECK_VASTATUS_RET(va_status, "vaGetImage");
514 va_status = vaMapBuffer(va_dpy->va_dpy, resources.image.buf, &mapped);
515 CHECK_VASTATUS_RET(va_status, "vaMapBuffer");
517 shared_ptr<Frame> frame(new Frame);
519 // 4:2:2 planar (for vaDeriveImage).
520 frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
521 frame->cb.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
522 frame->cr.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
523 for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
526 if (component_idx == 0) {
527 dptr = frame->y.get();
528 width = dinfo.image_width;
529 } else if (component_idx == 1) {
530 dptr = frame->cb.get();
531 width = dinfo.image_width / 2;
532 } else if (component_idx == 2) {
533 dptr = frame->cr.get();
534 width = dinfo.image_width / 2;
538 const uint8_t *sptr = (const uint8_t *)mapped + image.offsets[component_idx];
539 size_t spitch = image.pitches[component_idx];
540 for (size_t y = 0; y < dinfo.image_height; ++y) {
541 memcpy(dptr + y * width, sptr + y * spitch, width);
545 // Convert Y'CbCr to separate Y' and CbCr.
546 frame->is_semiplanar = true;
547 frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
548 frame->cbcr.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
549 const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
550 if (resources.image.pitches[0] == dinfo.image_width * 2) {
551 memcpy_interleaved(frame->cbcr.get(), frame->y.get(), src, dinfo.image_width * dinfo.image_height * 2);
553 for (unsigned y = 0; y < dinfo.image_height; ++y) {
554 memcpy_interleaved(frame->cbcr.get() + y * dinfo.image_width, frame->y.get() + y * dinfo.image_width,
555 src + y * resources.image.pitches[0], dinfo.image_width * 2);
559 frame->width = dinfo.image_width;
560 frame->height = dinfo.image_height;
561 frame->chroma_subsampling_x = 2;
562 frame->chroma_subsampling_y = 1;
563 frame->pitch_y = dinfo.image_width;
564 frame->pitch_chroma = dinfo.image_width / 2;
566 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
567 CHECK_VASTATUS_RET(va_status, "vaUnmapBuffer");