1 #include "mjpeg_encoder.h"
11 #include <libavformat/avformat.h>
15 #include "shared/ffmpeg_raii.h"
17 #include "shared/httpd.h"
18 #include "shared/memcpy_interleaved.h"
19 #include "shared/metrics.h"
20 #include "pbo_frame_allocator.h"
21 #include "shared/timebase.h"
22 #include "va_display_with_cleanup.h"
24 #include <movit/colorspace_conversion_effect.h>
27 #include <va/va_drm.h>
28 #include <va/va_x11.h>
30 using namespace Eigen;
31 using namespace bmusb;
32 using namespace movit;
35 static VAImageFormat uyvy_format;
37 extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height);
39 // From libjpeg (although it's of course identical between implementations).
40 static const int jpeg_natural_order[DCTSIZE2] = {
41 0, 1, 8, 16, 9, 2, 3, 10,
42 17, 24, 32, 25, 18, 11, 4, 5,
43 12, 19, 26, 33, 40, 48, 41, 34,
44 27, 20, 13, 6, 7, 14, 21, 28,
45 35, 42, 49, 56, 57, 50, 43, 36,
46 29, 22, 15, 23, 30, 37, 44, 51,
47 58, 59, 52, 45, 38, 31, 39, 46,
48 53, 60, 61, 54, 47, 55, 62, 63,
51 struct VectorDestinationManager {
52 jpeg_destination_mgr pub;
53 std::vector<uint8_t> dest;
55 VectorDestinationManager()
57 pub.init_destination = init_destination_thunk;
58 pub.empty_output_buffer = empty_output_buffer_thunk;
59 pub.term_destination = term_destination_thunk;
62 static void init_destination_thunk(j_compress_ptr ptr)
64 ((VectorDestinationManager *)(ptr->dest))->init_destination();
67 inline void init_destination()
72 static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
74 return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
77 inline bool empty_output_buffer()
79 make_room(dest.size()); // Should ignore pub.free_in_buffer!
83 inline void make_room(size_t bytes_used)
85 dest.resize(bytes_used + 4096);
86 dest.resize(dest.capacity());
87 pub.next_output_byte = dest.data() + bytes_used;
88 pub.free_in_buffer = dest.size() - bytes_used;
91 static void term_destination_thunk(j_compress_ptr ptr)
93 ((VectorDestinationManager *)(ptr->dest))->term_destination();
96 inline void term_destination()
98 dest.resize(dest.size() - pub.free_in_buffer);
101 static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
103 int MJPEGEncoder::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
105 MJPEGEncoder *engine = (MJPEGEncoder *)opaque;
106 return engine->write_packet2(buf, buf_size, type, time);
109 int MJPEGEncoder::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
111 if (type == AVIO_DATA_MARKER_HEADER) {
112 mux_header.append((char *)buf, buf_size);
113 httpd->set_header(HTTPD::MULTICAM_STREAM, mux_header);
115 httpd->add_data(HTTPD::MULTICAM_STREAM, (char *)buf, buf_size, /*keyframe=*/true, AV_NOPTS_VALUE, AVRational{ AV_TIME_BASE, 1 });
120 MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display)
123 // Set up the mux. We don't use the Mux wrapper, because it's geared towards
124 // a situation with only one video stream (and possibly one audio stream)
125 // with known width/height, and we don't need the extra functionality it provides.
126 avctx.reset(avformat_alloc_context());
127 avctx->oformat = av_guess_format("nut", nullptr, nullptr);
129 uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
130 avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
131 avctx->pb->write_data_type = &MJPEGEncoder::write_packet2_thunk;
132 avctx->flags = AVFMT_FLAG_CUSTOM_IO;
134 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
135 AVStream *stream = avformat_new_stream(avctx.get(), nullptr);
136 if (stream == nullptr) {
137 fprintf(stderr, "avformat_new_stream() failed\n");
141 // FFmpeg is very picky about having audio at 1/48000 timebase,
142 // no matter what we write. Even though we'd prefer our usual 1/120000,
143 // put the video on the same one, so that we can have locked audio.
144 stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
145 stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
146 stream->codecpar->codec_id = AV_CODEC_ID_MJPEG;
148 // Used for aspect ratio only. Can change without notice (the mux won't care).
149 stream->codecpar->width = global_flags.width;
150 stream->codecpar->height = global_flags.height;
152 // TODO: We could perhaps use the interpretation for each card here
153 // (or at least the command-line flags) instead of the defaults,
154 // but what would we do when they change?
155 stream->codecpar->color_primaries = AVCOL_PRI_BT709;
156 stream->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1;
157 stream->codecpar->color_space = AVCOL_SPC_BT709;
158 stream->codecpar->color_range = AVCOL_RANGE_MPEG;
159 stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
160 stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
162 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
163 AVStream *stream = avformat_new_stream(avctx.get(), nullptr);
164 if (stream == nullptr) {
165 fprintf(stderr, "avformat_new_stream() failed\n");
168 stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
169 stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
170 stream->codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
171 stream->codecpar->channel_layout = AV_CH_LAYOUT_STEREO;
172 stream->codecpar->channels = 2;
173 stream->codecpar->sample_rate = OUTPUT_FREQUENCY;
176 AVDictionary *options = NULL;
177 vector<pair<string, string>> opts = MUX_OPTS;
178 for (pair<string, string> opt : opts) {
179 av_dict_set(&options, opt.first.c_str(), opt.second.c_str(), 0);
181 if (avformat_write_header(avctx.get(), &options) < 0) {
182 fprintf(stderr, "avformat_write_header() failed\n");
186 // Initialize VA-API.
188 va_dpy = try_open_va(va_display, &error, &config_id);
189 if (va_dpy == nullptr) {
190 fprintf(stderr, "Could not initialize VA-API for MJPEG encoding: %s. JPEGs will be encoded in software if needed.\n", error.c_str());
193 encoder_thread = thread(&MJPEGEncoder::encoder_thread_func, this);
194 if (va_dpy != nullptr) {
195 va_receiver_thread = thread(&MJPEGEncoder::va_receiver_thread_func, this);
198 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }}, &metric_mjpeg_frames_zero_size_dropped);
199 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }}, &metric_mjpeg_frames_interlaced_dropped);
200 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }}, &metric_mjpeg_frames_unsupported_pixel_format_dropped);
201 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }}, &metric_mjpeg_frames_oversized_dropped);
202 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }}, &metric_mjpeg_overrun_dropped);
203 global_metrics.add("mjpeg_frames", {{ "status", "submitted" }}, &metric_mjpeg_overrun_submitted);
208 MJPEGEncoder::~MJPEGEncoder()
210 av_free(avctx->pb->buffer);
212 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }});
213 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }});
214 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }});
215 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }});
216 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }});
217 global_metrics.remove("mjpeg_frames", {{ "status", "submitted" }});
220 void MJPEGEncoder::stop()
227 any_frames_to_be_encoded.notify_all();
228 any_frames_encoding.notify_all();
229 encoder_thread.join();
230 if (va_dpy != nullptr) {
231 va_receiver_thread.join();
235 unique_ptr<VADisplayWithCleanup> MJPEGEncoder::try_open_va(const string &va_display, string *error, VAConfigID *config_id)
237 unique_ptr<VADisplayWithCleanup> va_dpy = va_open_display(va_display);
238 if (va_dpy == nullptr) {
239 if (error) *error = "Opening VA display failed";
242 int major_ver, minor_ver;
243 VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver);
244 if (va_status != VA_STATUS_SUCCESS) {
246 snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status);
247 if (error != nullptr) *error = buf;
251 VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 };
252 va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointEncPicture,
253 &attr, 1, config_id);
254 if (va_status == VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT) {
255 if (error != nullptr) *error = "No hardware support";
257 } else if (va_status != VA_STATUS_SUCCESS) {
259 snprintf(buf, sizeof(buf), "vaCreateConfig() failed with status %d\n", va_status);
260 if (error != nullptr) *error = buf;
264 // TODO: Unify with the code in Futatabi.
265 int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy);
266 assert(num_formats > 0);
268 unique_ptr<VAImageFormat[]> formats(new VAImageFormat[num_formats]);
269 va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats);
270 if (va_status != VA_STATUS_SUCCESS) {
272 snprintf(buf, sizeof(buf), "vaQueryImageFormats() failed with status %d\n", va_status);
273 if (error != nullptr) *error = buf;
278 for (int i = 0; i < num_formats; ++i) {
279 if (formats[i].fourcc == VA_FOURCC_UYVY) {
280 memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat));
286 if (error != nullptr) *error = "UYVY format not found";
293 void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset, vector<int32_t> audio, const RGBTriplet &white_balance)
295 PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
296 if (video_format.width == 0 || video_format.height == 0) {
297 ++metric_mjpeg_frames_zero_size_dropped;
300 if (video_format.interlaced) {
301 fprintf(stderr, "Card %u: Ignoring JPEG encoding for interlaced frame\n", card_index);
302 ++metric_mjpeg_frames_interlaced_dropped;
305 if (userdata->pixel_format != PixelFormat_8BitYCbCr ||
306 !frame->interleaved) {
307 fprintf(stderr, "Card %u: Ignoring JPEG encoding for unsupported pixel format\n", card_index);
308 ++metric_mjpeg_frames_unsupported_pixel_format_dropped;
311 if (video_format.width > 4096 || video_format.height > 4096) {
312 fprintf(stderr, "Card %u: Ignoring JPEG encoding for oversized frame\n", card_index);
313 ++metric_mjpeg_frames_oversized_dropped;
317 lock_guard<mutex> lock(mu);
318 if (frames_to_be_encoded.size() + frames_encoding.size() > 50) {
319 fprintf(stderr, "WARNING: MJPEG encoding doesn't keep up, discarding frame.\n");
320 ++metric_mjpeg_overrun_dropped;
323 ++metric_mjpeg_overrun_submitted;
324 frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset, move(audio), white_balance });
325 any_frames_to_be_encoded.notify_all();
328 int MJPEGEncoder::get_mjpeg_stream_for_card(unsigned card_index)
330 // Only bother doing MJPEG encoding if there are any connected clients
331 // that want the stream.
332 if (httpd->get_num_connected_multicam_clients() == 0) {
336 auto it = global_flags.card_to_mjpeg_stream_export.find(card_index);
337 if (it == global_flags.card_to_mjpeg_stream_export.end()) {
343 void MJPEGEncoder::encoder_thread_func()
345 pthread_setname_np(pthread_self(), "MJPEG_Encode");
346 posix_memalign((void **)&tmp_y, 4096, 4096 * 8);
347 posix_memalign((void **)&tmp_cbcr, 4096, 4096 * 8);
348 posix_memalign((void **)&tmp_cb, 4096, 4096 * 8);
349 posix_memalign((void **)&tmp_cr, 4096, 4096 * 8);
354 unique_lock<mutex> lock(mu);
355 any_frames_to_be_encoded.wait(lock, [this] { return !frames_to_be_encoded.empty() || should_quit; });
356 if (should_quit) break;
357 qf = move(frames_to_be_encoded.front());
358 frames_to_be_encoded.pop();
361 if (va_dpy != nullptr) {
362 // Will call back in the receiver thread.
363 encode_jpeg_va(move(qf));
365 // Write audio before video, since Futatabi expects it.
366 if (qf.audio.size() > 0) {
367 write_audio_packet(qf.pts, qf.card_index, qf.audio);
370 // Encode synchronously, in the same thread.
371 vector<uint8_t> jpeg = encode_jpeg_libjpeg(qf);
372 write_mjpeg_packet(qf.pts, qf.card_index, jpeg.data(), jpeg.size());
382 void MJPEGEncoder::write_mjpeg_packet(int64_t pts, unsigned card_index, const uint8_t *jpeg, size_t jpeg_size)
385 memset(&pkt, 0, sizeof(pkt));
387 pkt.data = const_cast<uint8_t *>(jpeg);
388 pkt.size = jpeg_size;
389 pkt.stream_index = card_index;
390 pkt.flags = AV_PKT_FLAG_KEY;
391 AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
392 pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
395 if (av_write_frame(avctx.get(), &pkt) < 0) {
396 fprintf(stderr, "av_write_frame() failed\n");
401 void MJPEGEncoder::write_audio_packet(int64_t pts, unsigned card_index, const vector<int32_t> &audio)
404 memset(&pkt, 0, sizeof(pkt));
406 pkt.data = reinterpret_cast<uint8_t *>(const_cast<int32_t *>(&audio[0]));
407 pkt.size = audio.size() * sizeof(audio[0]);
408 pkt.stream_index = card_index + global_flags.card_to_mjpeg_stream_export.size();
409 pkt.flags = AV_PKT_FLAG_KEY;
410 AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
411 pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
412 size_t num_stereo_samples = audio.size() / 2;
413 pkt.duration = av_rescale_q(num_stereo_samples, AVRational{ 1, OUTPUT_FREQUENCY }, time_base);
415 if (av_write_frame(avctx.get(), &pkt) < 0) {
416 fprintf(stderr, "av_write_frame() failed\n");
421 class VABufferDestroyer {
423 VABufferDestroyer(VADisplay dpy, VABufferID buf)
424 : dpy(dpy), buf(buf) {}
426 ~VABufferDestroyer() {
427 VAStatus va_status = vaDestroyBuffer(dpy, buf);
428 CHECK_VASTATUS(va_status, "vaDestroyBuffer");
436 MJPEGEncoder::VAResources MJPEGEncoder::get_va_resources(unsigned width, unsigned height)
439 lock_guard<mutex> lock(va_resources_mutex);
440 for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) {
441 if (it->width == width && it->height == height) {
442 VAResources ret = *it;
443 va_resources_freelist.erase(it);
454 VASurfaceAttrib attrib;
455 attrib.flags = VA_SURFACE_ATTRIB_SETTABLE;
456 attrib.type = VASurfaceAttribPixelFormat;
457 attrib.value.type = VAGenericValueTypeInteger;
458 attrib.value.value.i = VA_FOURCC_UYVY;
460 VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
462 &ret.surface, 1, &attrib, 1);
463 CHECK_VASTATUS(va_status, "vaCreateSurfaces");
465 va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
466 CHECK_VASTATUS(va_status, "vaCreateContext");
468 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncCodedBufferType, width * height * 3 + 8192, 1, nullptr, &ret.data_buffer);
469 CHECK_VASTATUS(va_status, "vaCreateBuffer");
471 va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image);
472 CHECK_VASTATUS(va_status, "vaCreateImage");
477 void MJPEGEncoder::release_va_resources(MJPEGEncoder::VAResources resources)
479 lock_guard<mutex> lock(va_resources_mutex);
480 if (va_resources_freelist.size() > 50) {
481 auto it = va_resources_freelist.end();
484 VAStatus va_status = vaDestroyBuffer(va_dpy->va_dpy, it->data_buffer);
485 CHECK_VASTATUS(va_status, "vaDestroyBuffer");
487 va_status = vaDestroyContext(va_dpy->va_dpy, it->context);
488 CHECK_VASTATUS(va_status, "vaDestroyContext");
490 va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1);
491 CHECK_VASTATUS(va_status, "vaDestroySurfaces");
493 va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id);
494 CHECK_VASTATUS(va_status, "vaDestroyImage");
496 va_resources_freelist.erase(it);
499 va_resources_freelist.push_front(resources);
504 void push16(uint16_t val, string *str)
506 str->push_back(val >> 8);
507 str->push_back(val & 0xff);
510 void push32(uint32_t val, string *str)
512 str->push_back(val >> 24);
513 str->push_back((val >> 16) & 0xff);
514 str->push_back((val >> 8) & 0xff);
515 str->push_back(val & 0xff);
520 void MJPEGEncoder::init_jpeg_422(unsigned width, unsigned height, const RGBTriplet &white_balance, VectorDestinationManager *dest, jpeg_compress_struct *cinfo)
523 cinfo->err = jpeg_std_error(&jerr);
524 jpeg_create_compress(cinfo);
526 cinfo->dest = (jpeg_destination_mgr *)dest;
528 cinfo->input_components = 3;
529 jpeg_set_defaults(cinfo);
530 jpeg_set_quality(cinfo, quality, /*force_baseline=*/false);
532 cinfo->image_width = width;
533 cinfo->image_height = height;
534 cinfo->raw_data_in = true;
535 jpeg_set_colorspace(cinfo, JCS_YCbCr);
536 cinfo->comp_info[0].h_samp_factor = 2;
537 cinfo->comp_info[0].v_samp_factor = 1;
538 cinfo->comp_info[1].h_samp_factor = 1;
539 cinfo->comp_info[1].v_samp_factor = 1;
540 cinfo->comp_info[2].h_samp_factor = 1;
541 cinfo->comp_info[2].v_samp_factor = 1;
542 cinfo->CCIR601_sampling = true; // Seems to be mostly ignored by libjpeg, though.
543 jpeg_start_compress(cinfo, true);
545 if (fabs(white_balance.r - 1.0f) > 1e-3 ||
546 fabs(white_balance.g - 1.0f) > 1e-3 ||
547 fabs(white_balance.b - 1.0f) > 1e-3) {
548 // Convert from (linear) RGB to XYZ.
549 Matrix3d rgb_to_xyz_matrix = movit::ColorspaceConversionEffect::get_xyz_matrix(COLORSPACE_sRGB);
550 Vector3d xyz = rgb_to_xyz_matrix * Vector3d(white_balance.r, white_balance.g, white_balance.b);
552 // Convert from XYZ to xyz by normalizing.
553 xyz /= (xyz[0] + xyz[1] + xyz[2]);
555 // Create a very rudimentary EXIF header to hold our white point.
558 // Exif header, followed by some padding.
562 // TIFF header first:
563 exif += "MM"; // Big endian.
568 // Offset of first IFD (relative to the MM, immediately after the header).
569 push32(exif.size() - 6 + 4, &exif);
571 // Now the actual IFD.
576 // WhitePoint tag ID.
577 push16(0x13e, &exif);
582 // Two values (x and y; z is implicit due to normalization).
585 // Offset (relative to the MM, immediately after the last IFD).
586 push32(exif.size() - 6 + 8, &exif);
591 // The actual values.
592 push32(lrintf(xyz[0] * 10000.0f), &exif);
593 push32(10000, &exif);
594 push32(lrintf(xyz[1] * 10000.0f), &exif);
595 push32(10000, &exif);
597 jpeg_write_marker(cinfo, JPEG_APP0 + 1, (const JOCTET *)exif.data(), exif.size());
600 // This comment marker is private to FFmpeg. It signals limited Y'CbCr range
601 // (and nothing else).
602 jpeg_write_marker(cinfo, JPEG_COM, (const JOCTET *)"CS=ITU601", strlen("CS=ITU601"));
605 vector<uint8_t> MJPEGEncoder::get_jpeg_header(unsigned width, unsigned height, const RGBTriplet &white_balance, jpeg_compress_struct *cinfo)
607 VectorDestinationManager dest;
608 init_jpeg_422(width, height, white_balance, &dest, cinfo);
610 // Make a dummy black image; there's seemingly no other easy way of
611 // making libjpeg outputting all of its headers.
612 JSAMPROW yptr[8], cbptr[8], crptr[8];
613 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
614 memset(tmp_y, 0, 4096);
615 memset(tmp_cb, 0, 4096);
616 memset(tmp_cr, 0, 4096);
617 for (unsigned yy = 0; yy < 8; ++yy) {
622 for (unsigned y = 0; y < height; y += 8) {
623 jpeg_write_raw_data(cinfo, data, /*num_lines=*/8);
625 jpeg_finish_compress(cinfo);
627 // We're only interested in the header, not the data after it.
628 dest.term_destination();
629 for (size_t i = 0; i < dest.dest.size() - 1; ++i) {
630 if (dest.dest[i] == 0xff && dest.dest[i + 1] == 0xda) { // Start of scan (SOS).
631 unsigned len = dest.dest[i + 2] * 256 + dest.dest[i + 3];
632 dest.dest.resize(i + len + 2);
640 MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_resolution(unsigned width, unsigned height, const RGBTriplet &white_balance)
642 pair<unsigned, unsigned> key(width, height);
643 if (va_data_for_resolution.count(key)) {
644 return va_data_for_resolution[key];
647 // Use libjpeg to generate a header and set sane defaults for e.g.
648 // quantization tables. Then do the actual encode with VA-API.
649 jpeg_compress_struct cinfo;
650 vector<uint8_t> jpeg_header = get_jpeg_header(width, height, white_balance, &cinfo);
652 // Picture parameters.
653 VAEncPictureParameterBufferJPEG pic_param;
654 memset(&pic_param, 0, sizeof(pic_param));
655 pic_param.reconstructed_picture = VA_INVALID_ID;
656 pic_param.picture_width = cinfo.image_width;
657 pic_param.picture_height = cinfo.image_height;
658 for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
659 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
660 pic_param.component_id[component_idx] = comp->component_id;
661 pic_param.quantiser_table_selector[component_idx] = comp->quant_tbl_no;
663 pic_param.num_components = cinfo.num_components;
664 pic_param.num_scan = 1;
665 pic_param.sample_bit_depth = 8;
666 pic_param.coded_buf = VA_INVALID_ID; // To be filled out by caller.
667 pic_param.pic_flags.bits.huffman = 1;
668 pic_param.quality = 50; // Don't scale the given quantization matrices. (See gen8_mfc_jpeg_fqm_state)
670 // Quantization matrices.
671 VAQMatrixBufferJPEG q;
672 memset(&q, 0, sizeof(q));
674 q.load_lum_quantiser_matrix = true;
675 q.load_chroma_quantiser_matrix = true;
676 for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
677 const JQUANT_TBL *qtbl = cinfo.quant_tbl_ptrs[quant_tbl_idx];
678 assert((qtbl == nullptr) == (quant_tbl_idx >= 2));
679 if (qtbl == nullptr) continue;
681 uint8_t *qmatrix = (quant_tbl_idx == 0) ? q.lum_quantiser_matrix : q.chroma_quantiser_matrix;
682 for (int i = 0; i < 64; ++i) {
683 if (qtbl->quantval[i] > 255) {
684 fprintf(stderr, "Baseline JPEG only!\n");
687 qmatrix[i] = qtbl->quantval[jpeg_natural_order[i]];
691 // Huffman tables (arithmetic is not supported).
692 VAHuffmanTableBufferJPEGBaseline huff;
693 memset(&huff, 0, sizeof(huff));
695 for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
696 const JHUFF_TBL *ac_hufftbl = cinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
697 const JHUFF_TBL *dc_hufftbl = cinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
698 if (ac_hufftbl == nullptr) {
699 assert(dc_hufftbl == nullptr);
700 huff.load_huffman_table[huff_tbl_idx] = 0;
702 assert(dc_hufftbl != nullptr);
703 huff.load_huffman_table[huff_tbl_idx] = 1;
705 for (int i = 0; i < 16; ++i) {
706 huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
708 for (int i = 0; i < 12; ++i) {
709 huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
711 for (int i = 0; i < 16; ++i) {
712 huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
714 for (int i = 0; i < 162; ++i) {
715 huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
720 // Slice parameters (metadata about the slice).
721 VAEncSliceParameterBufferJPEG parms;
722 memset(&parms, 0, sizeof(parms));
723 for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
724 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
725 parms.components[component_idx].component_selector = comp->component_id;
726 parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
727 parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
728 if (parms.components[component_idx].dc_table_selector > 1 ||
729 parms.components[component_idx].ac_table_selector > 1) {
730 fprintf(stderr, "Uses too many Huffman tables\n");
734 parms.num_components = cinfo.num_components;
735 parms.restart_interval = cinfo.restart_interval;
737 jpeg_destroy_compress(&cinfo);
740 ret.jpeg_header = move(jpeg_header);
741 ret.pic_param = pic_param;
745 va_data_for_resolution[key] = ret;
749 void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf)
751 PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
752 unsigned width = qf.video_format.width;
753 unsigned height = qf.video_format.height;
755 VAResources resources;
756 ReleaseVAResources release;
757 if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
758 resources = move(userdata->va_resources);
759 release = move(userdata->va_resources_release);
761 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
762 resources = get_va_resources(width, height);
763 release = ReleaseVAResources(this, resources);
766 VAData va_data = get_va_data_for_resolution(width, height, qf.white_balance);
767 va_data.pic_param.coded_buf = resources.data_buffer;
769 VABufferID pic_param_buffer;
770 VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPictureParameterBufferType, sizeof(va_data.pic_param), 1, &va_data.pic_param, &pic_param_buffer);
771 CHECK_VASTATUS(va_status, "vaCreateBuffer");
772 VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
775 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAQMatrixBufferType, sizeof(va_data.q), 1, &va_data.q, &q_buffer);
776 CHECK_VASTATUS(va_status, "vaCreateBuffer");
777 VABufferDestroyer destroy_iq(va_dpy->va_dpy, q_buffer);
779 VABufferID huff_buffer;
780 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAHuffmanTableBufferType, sizeof(va_data.huff), 1, &va_data.huff, &huff_buffer);
781 CHECK_VASTATUS(va_status, "vaCreateBuffer");
782 VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
784 VABufferID slice_param_buffer;
785 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncSliceParameterBufferType, sizeof(va_data.parms), 1, &va_data.parms, &slice_param_buffer);
786 CHECK_VASTATUS(va_status, "vaCreateBuffer");
787 VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
789 if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
790 // The pixel data is already put into the image by the caller.
791 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
792 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
794 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
796 // Upload the pixel data.
797 uint8_t *surface_p = nullptr;
798 vaMapBuffer(va_dpy->va_dpy, resources.image.buf, (void **)&surface_p);
800 size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support.
801 size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
804 const uint8_t *src = qf.frame->data_copy + field_start;
805 uint8_t *dst = (unsigned char *)surface_p + resources.image.offsets[0];
806 memcpy_with_pitch(dst, src, qf.video_format.width * 2, resources.image.pitches[0], qf.video_format.height);
809 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
810 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
813 qf.frame->data_copy = nullptr;
815 // Seemingly vaPutImage() (which triggers a GPU copy) is much nicer to the
816 // CPU than vaDeriveImage() and copying directly into the GPU's buffers.
817 // Exactly why is unclear, but it seems to involve L3 cache usage when there
818 // are many high-res (1080p+) images in play.
819 va_status = vaPutImage(va_dpy->va_dpy, resources.surface, resources.image.image_id, 0, 0, width, height, 0, 0, width, height);
820 CHECK_VASTATUS(va_status, "vaPutImage");
822 // Finally, stick in the JPEG header.
823 VAEncPackedHeaderParameterBuffer header_parm;
824 header_parm.type = VAEncPackedHeaderRawData;
825 header_parm.bit_length = 8 * va_data.jpeg_header.size();
827 VABufferID header_parm_buffer;
828 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPackedHeaderParameterBufferType, sizeof(header_parm), 1, &header_parm, &header_parm_buffer);
829 CHECK_VASTATUS(va_status, "vaCreateBuffer");
830 VABufferDestroyer destroy_header(va_dpy->va_dpy, header_parm_buffer);
832 VABufferID header_data_buffer;
833 va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPackedHeaderDataBufferType, va_data.jpeg_header.size(), 1, va_data.jpeg_header.data(), &header_data_buffer);
834 CHECK_VASTATUS(va_status, "vaCreateBuffer");
835 VABufferDestroyer destroy_header_data(va_dpy->va_dpy, header_data_buffer);
837 va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
838 CHECK_VASTATUS(va_status, "vaBeginPicture");
839 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
840 CHECK_VASTATUS(va_status, "vaRenderPicture(pic_param)");
841 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &q_buffer, 1);
842 CHECK_VASTATUS(va_status, "vaRenderPicture(q)");
843 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
844 CHECK_VASTATUS(va_status, "vaRenderPicture(huff)");
845 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
846 CHECK_VASTATUS(va_status, "vaRenderPicture(slice_param)");
847 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_parm_buffer, 1);
848 CHECK_VASTATUS(va_status, "vaRenderPicture(header_parm)");
849 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_data_buffer, 1);
850 CHECK_VASTATUS(va_status, "vaRenderPicture(header_data)");
851 va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
852 CHECK_VASTATUS(va_status, "vaEndPicture");
854 qf.resources = move(resources);
855 qf.resource_releaser = move(release);
857 lock_guard<mutex> lock(mu);
858 frames_encoding.push(move(qf));
859 any_frames_encoding.notify_all();
862 void MJPEGEncoder::va_receiver_thread_func()
864 pthread_setname_np(pthread_self(), "MJPEG_Receive");
868 unique_lock<mutex> lock(mu);
869 any_frames_encoding.wait(lock, [this] { return !frames_encoding.empty() || should_quit; });
870 if (should_quit) return;
871 qf = move(frames_encoding.front());
872 frames_encoding.pop();
875 // Write audio before video, since Futatabi expects it.
876 if (qf.audio.size() > 0) {
877 write_audio_packet(qf.pts, qf.card_index, qf.audio);
880 VAStatus va_status = vaSyncSurface(va_dpy->va_dpy, qf.resources.surface);
881 CHECK_VASTATUS(va_status, "vaSyncSurface");
883 VACodedBufferSegment *segment;
884 va_status = vaMapBuffer(va_dpy->va_dpy, qf.resources.data_buffer, (void **)&segment);
885 CHECK_VASTATUS(va_status, "vaMapBuffer");
887 const uint8_t *coded_buf = reinterpret_cast<uint8_t *>(segment->buf);
888 write_mjpeg_packet(qf.pts, qf.card_index, coded_buf, segment->size);
890 va_status = vaUnmapBuffer(va_dpy->va_dpy, qf.resources.data_buffer);
891 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
895 vector<uint8_t> MJPEGEncoder::encode_jpeg_libjpeg(const QueuedFrame &qf)
897 unsigned width = qf.video_format.width;
898 unsigned height = qf.video_format.height;
900 VectorDestinationManager dest;
901 jpeg_compress_struct cinfo;
902 init_jpeg_422(width, height, qf.white_balance, &dest, &cinfo);
904 size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support.
905 size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
907 JSAMPROW yptr[8], cbptr[8], crptr[8];
908 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
909 for (unsigned y = 0; y < qf.video_format.height; y += 8) {
910 const uint8_t *src = qf.frame->data_copy + field_start + y * qf.video_format.width * 2;
912 memcpy_interleaved(tmp_cbcr, tmp_y, src, qf.video_format.width * 8 * 2);
913 memcpy_interleaved(tmp_cb, tmp_cr, tmp_cbcr, qf.video_format.width * 8);
914 for (unsigned yy = 0; yy < 8; ++yy) {
915 yptr[yy] = tmp_y + yy * width;
916 cbptr[yy] = tmp_cb + yy * width / 2;
917 crptr[yy] = tmp_cr + yy * width / 2;
919 jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
921 jpeg_finish_compress(&cinfo);