1 #include "mjpeg_encoder.h"
6 #include <bmusb/bmusb.h>
9 #include <movit/colorspace_conversion_effect.h>
10 #include <movit/effect.h>
11 #include <movit/image_format.h>
12 #include <movit/ycbcr.h>
22 #include <type_traits>
25 #include <va/va_enc_jpeg.h>
28 #include <immintrin.h>
32 #include <libavcodec/codec_id.h>
33 #include <libavcodec/defs.h>
34 #include <libavcodec/packet.h>
35 #include <libavformat/avformat.h>
36 #include <libavformat/avio.h>
37 #include <libavutil/avutil.h>
38 #include <libavutil/channel_layout.h>
39 #include <libavutil/dict.h>
40 #include <libavutil/mathematics.h>
41 #include <libavutil/mem.h>
42 #include <libavutil/pixfmt.h>
43 #include <libavutil/rational.h>
47 #include "pbo_frame_allocator.h"
48 #include "ref_counted_frame.h"
49 #include "shared/ffmpeg_raii.h"
50 #include "shared/httpd.h"
51 #include "shared/memcpy_interleaved.h"
52 #include "shared/metrics.h"
53 #include "shared/shared_defs.h"
54 #include "shared/timebase.h"
55 #include "shared/va_display.h"
56 #include "shared/va_resource_pool.h"
58 using namespace Eigen;
59 using namespace bmusb;
60 using namespace movit;
63 static VAImageFormat uyvy_format, nv12_format;
65 extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height);
67 // The inverse of memcpy_interleaved(), with (slow) support for pitch.
68 void interleave_with_pitch(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t src_width, size_t dst_pitch, size_t height)
71 if (dst_pitch == src_width * 2 && (src_width * height) % 16 == 0) {
72 __m128i *dptr = reinterpret_cast<__m128i *>(dst);
73 const __m128i *sptr1 = reinterpret_cast<const __m128i *>(src1);
74 const __m128i *sptr2 = reinterpret_cast<const __m128i *>(src2);
75 for (size_t i = 0; i < src_width * height / 16; ++i) {
76 __m128i data1 = _mm_loadu_si128(sptr1++);
77 __m128i data2 = _mm_loadu_si128(sptr2++);
78 _mm_storeu_si128(dptr++, _mm_unpacklo_epi8(data1, data2));
79 _mm_storeu_si128(dptr++, _mm_unpackhi_epi8(data1, data2));
85 for (size_t y = 0; y < height; ++y) {
86 uint8_t *dptr = dst + y * dst_pitch;
87 const uint8_t *sptr1 = src1 + y * src_width;
88 const uint8_t *sptr2 = src2 + y * src_width;
89 for (size_t x = 0; x < src_width; ++x) {
96 // From libjpeg (although it's of course identical between implementations).
97 static const int jpeg_natural_order[DCTSIZE2] = {
98 0, 1, 8, 16, 9, 2, 3, 10,
99 17, 24, 32, 25, 18, 11, 4, 5,
100 12, 19, 26, 33, 40, 48, 41, 34,
101 27, 20, 13, 6, 7, 14, 21, 28,
102 35, 42, 49, 56, 57, 50, 43, 36,
103 29, 22, 15, 23, 30, 37, 44, 51,
104 58, 59, 52, 45, 38, 31, 39, 46,
105 53, 60, 61, 54, 47, 55, 62, 63,
108 struct VectorDestinationManager {
109 jpeg_destination_mgr pub;
110 std::vector<uint8_t> dest;
112 VectorDestinationManager()
114 pub.init_destination = init_destination_thunk;
115 pub.empty_output_buffer = empty_output_buffer_thunk;
116 pub.term_destination = term_destination_thunk;
119 static void init_destination_thunk(j_compress_ptr ptr)
121 ((VectorDestinationManager *)(ptr->dest))->init_destination();
124 inline void init_destination()
129 static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
131 return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
134 inline bool empty_output_buffer()
136 make_room(dest.size()); // Should ignore pub.free_in_buffer!
140 inline void make_room(size_t bytes_used)
142 dest.resize(bytes_used + 4096);
143 dest.resize(dest.capacity());
144 pub.next_output_byte = dest.data() + bytes_used;
145 pub.free_in_buffer = dest.size() - bytes_used;
148 static void term_destination_thunk(j_compress_ptr ptr)
150 ((VectorDestinationManager *)(ptr->dest))->term_destination();
153 inline void term_destination()
155 dest.resize(dest.size() - pub.free_in_buffer);
158 static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
160 int MJPEGEncoder::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
162 WritePacket2Context *ctx = (WritePacket2Context *)opaque;
163 return ctx->mjpeg_encoder->write_packet2(ctx->stream_id, buf, buf_size, type, time);
166 int MJPEGEncoder::write_packet2(HTTPD::StreamID stream_id, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
168 string *mux_header = &streams[stream_id].mux_header;
169 if (type == AVIO_DATA_MARKER_HEADER) {
170 mux_header->append((char *)buf, buf_size);
171 httpd->set_header(stream_id, *mux_header);
173 httpd->add_data(stream_id, (char *)buf, buf_size, /*keyframe=*/true, AV_NOPTS_VALUE, AVRational{ AV_TIME_BASE, 1 });
180 void add_video_stream(AVFormatContext *avctx)
182 AVStream *stream = avformat_new_stream(avctx, nullptr);
183 if (stream == nullptr) {
184 fprintf(stderr, "avformat_new_stream() failed\n");
188 // FFmpeg is very picky about having audio at 1/48000 timebase,
189 // no matter what we write. Even though we'd prefer our usual 1/120000,
190 // put the video on the same one, so that we can have locked audio.
191 stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
192 stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
193 stream->codecpar->codec_id = AV_CODEC_ID_MJPEG;
195 // Used for aspect ratio only. Can change without notice (the mux won't care).
196 stream->codecpar->width = global_flags.width;
197 stream->codecpar->height = global_flags.height;
199 // TODO: We could perhaps use the interpretation for each card here
200 // (or at least the command-line flags) instead of the defaults,
201 // but what would we do when they change?
202 stream->codecpar->color_primaries = AVCOL_PRI_BT709;
203 stream->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1;
204 stream->codecpar->color_space = AVCOL_SPC_BT709;
205 stream->codecpar->color_range = AVCOL_RANGE_MPEG;
206 stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
207 stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
210 void add_audio_stream(AVFormatContext *avctx)
212 AVStream *stream = avformat_new_stream(avctx, nullptr);
213 if (stream == nullptr) {
214 fprintf(stderr, "avformat_new_stream() failed\n");
217 stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
218 stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
219 stream->codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
220 stream->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
221 stream->codecpar->ch_layout.nb_channels = 2;
222 stream->codecpar->ch_layout.u.mask = AV_CH_LAYOUT_STEREO;
223 stream->codecpar->sample_rate = OUTPUT_FREQUENCY;
226 void finalize_mux(AVFormatContext *avctx)
228 AVDictionary *options = NULL;
229 vector<pair<string, string>> opts = MUX_OPTS;
230 for (pair<string, string> opt : opts) {
231 av_dict_set(&options, opt.first.c_str(), opt.second.c_str(), 0);
233 if (avformat_write_header(avctx, &options) < 0) {
234 fprintf(stderr, "avformat_write_header() failed\n");
241 MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display)
244 create_ffmpeg_context(HTTPD::StreamID{ HTTPD::MULTICAM_STREAM, 0 });
245 for (unsigned stream_idx = 0; stream_idx < MAX_VIDEO_CARDS; ++stream_idx) {
246 create_ffmpeg_context(HTTPD::StreamID{ HTTPD::SIPHON_STREAM, stream_idx });
249 add_stream(HTTPD::StreamID{ HTTPD::MULTICAM_STREAM, 0 });
251 // Initialize VA-API.
252 VAConfigID config_id_422, config_id_420;
254 va_dpy = try_open_va(va_display, { VAProfileJPEGBaseline }, VAEntrypointEncPicture,
256 { "4:2:2", VA_RT_FORMAT_YUV422, VA_FOURCC_UYVY, &config_id_422, &uyvy_format },
257 // We'd prefer VA_FOURCC_I420, but it's not supported by Intel's driver.
258 { "4:2:0", VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, &config_id_420, &nv12_format }
260 /*chosen_profile=*/nullptr, &error);
261 if (va_dpy == nullptr) {
262 fprintf(stderr, "Could not initialize VA-API for MJPEG encoding: %s. JPEGs will be encoded in software if needed.\n", error.c_str());
265 encoder_thread = thread(&MJPEGEncoder::encoder_thread_func, this);
266 if (va_dpy != nullptr) {
267 va_pool.reset(new VAResourcePool(va_dpy->va_dpy, uyvy_format, nv12_format, config_id_422, config_id_420, /*with_data_buffer=*/true));
268 va_receiver_thread = thread(&MJPEGEncoder::va_receiver_thread_func, this);
271 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }}, &metric_mjpeg_frames_zero_size_dropped);
272 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }}, &metric_mjpeg_frames_interlaced_dropped);
273 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }}, &metric_mjpeg_frames_unsupported_pixel_format_dropped);
274 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }}, &metric_mjpeg_frames_oversized_dropped);
275 global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }}, &metric_mjpeg_overrun_dropped);
276 global_metrics.add("mjpeg_frames", {{ "status", "submitted" }}, &metric_mjpeg_overrun_submitted);
281 MJPEGEncoder::~MJPEGEncoder()
283 for (auto &id_and_stream : streams) {
284 av_free(id_and_stream.second.avctx->pb->buffer);
287 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }});
288 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }});
289 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }});
290 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }});
291 global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }});
292 global_metrics.remove("mjpeg_frames", {{ "status", "submitted" }});
295 void MJPEGEncoder::stop()
302 any_frames_to_be_encoded.notify_all();
303 any_frames_encoding.notify_all();
304 encoder_thread.join();
305 if (va_dpy != nullptr) {
306 va_receiver_thread.join();
312 bool is_uyvy(RefCountedFrame frame)
314 PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
315 return userdata->pixel_format == PixelFormat_8BitYCbCr && frame->interleaved;
318 bool is_i420(RefCountedFrame frame)
320 PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
321 return userdata->pixel_format == PixelFormat_8BitYCbCrPlanar &&
322 userdata->ycbcr_format.chroma_subsampling_x == 2 &&
323 userdata->ycbcr_format.chroma_subsampling_y == 2;
328 void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset, vector<int32_t> audio, const RGBTriplet &white_balance)
330 if (video_format.width == 0 || video_format.height == 0) {
331 ++metric_mjpeg_frames_zero_size_dropped;
334 if (video_format.interlaced) {
335 fprintf(stderr, "Card %u: Ignoring JPEG encoding for interlaced frame\n", card_index);
336 ++metric_mjpeg_frames_interlaced_dropped;
339 if (!is_uyvy(frame) && !is_i420(frame)) {
340 fprintf(stderr, "Card %u: Ignoring JPEG encoding for unsupported pixel format\n", card_index);
341 ++metric_mjpeg_frames_unsupported_pixel_format_dropped;
344 if (video_format.width > 4096 || video_format.height > 4096) {
345 fprintf(stderr, "Card %u: Ignoring JPEG encoding for oversized frame\n", card_index);
346 ++metric_mjpeg_frames_oversized_dropped;
350 lock_guard<mutex> lock(mu);
351 if (frames_to_be_encoded.size() + frames_encoding.size() > 50) {
352 fprintf(stderr, "WARNING: MJPEG encoding doesn't keep up, discarding frame.\n");
353 ++metric_mjpeg_overrun_dropped;
356 ++metric_mjpeg_overrun_submitted;
357 frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset, move(audio), white_balance });
358 any_frames_to_be_encoded.notify_all();
361 bool MJPEGEncoder::should_encode_mjpeg_for_card(unsigned card_index)
363 // Only bother doing MJPEG encoding if there are any connected clients
364 // that want the stream.
365 if (httpd->get_num_connected_multicam_clients() == 0 &&
366 httpd->get_num_connected_siphon_clients(card_index) == 0) {
370 auto it = global_flags.card_to_mjpeg_stream_export.find(card_index);
371 return (it != global_flags.card_to_mjpeg_stream_export.end());
374 void MJPEGEncoder::encoder_thread_func()
376 pthread_setname_np(pthread_self(), "MJPEG_Encode");
377 posix_memalign((void **)&tmp_y, 4096, 4096 * 8);
378 posix_memalign((void **)&tmp_cbcr, 4096, 4096 * 8);
379 posix_memalign((void **)&tmp_cb, 4096, 4096 * 8);
380 posix_memalign((void **)&tmp_cr, 4096, 4096 * 8);
385 unique_lock<mutex> lock(mu);
386 any_frames_to_be_encoded.wait(lock, [this] { return !frames_to_be_encoded.empty() || should_quit; });
387 if (should_quit) break;
388 qf = move(frames_to_be_encoded.front());
389 frames_to_be_encoded.pop();
392 assert(global_flags.card_to_mjpeg_stream_export.count(qf.card_index)); // Or should_encode_mjpeg_for_card() would have returned false.
393 int stream_index = global_flags.card_to_mjpeg_stream_export[qf.card_index];
395 if (va_dpy != nullptr) {
396 // Will call back in the receiver thread.
397 encode_jpeg_va(move(qf));
399 update_siphon_streams();
401 HTTPD::StreamID multicam_id{ HTTPD::MULTICAM_STREAM, 0 };
402 HTTPD::StreamID siphon_id{ HTTPD::SIPHON_STREAM, qf.card_index };
403 assert(streams.count(multicam_id));
405 // Write audio before video, since Futatabi expects it.
406 if (qf.audio.size() > 0) {
407 write_audio_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index + global_flags.card_to_mjpeg_stream_export.size(), qf.audio);
408 if (streams.count(siphon_id)) {
409 write_audio_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/1, qf.audio);
413 // Encode synchronously, in the same thread.
414 vector<uint8_t> jpeg = encode_jpeg_libjpeg(qf);
415 write_mjpeg_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index, jpeg.data(), jpeg.size());
416 if (streams.count(siphon_id)) {
417 write_mjpeg_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/0, jpeg.data(), jpeg.size());
428 void MJPEGEncoder::write_mjpeg_packet(AVFormatContext *avctx, int64_t pts, unsigned stream_index, const uint8_t *jpeg, size_t jpeg_size)
431 memset(&pkt, 0, sizeof(pkt));
433 pkt.data = const_cast<uint8_t *>(jpeg);
434 pkt.size = jpeg_size;
435 pkt.stream_index = stream_index;
436 pkt.flags = AV_PKT_FLAG_KEY;
437 AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
438 pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
441 if (av_write_frame(avctx, &pkt) < 0) {
442 fprintf(stderr, "av_write_frame() failed\n");
447 void MJPEGEncoder::write_audio_packet(AVFormatContext *avctx, int64_t pts, unsigned stream_index, const vector<int32_t> &audio)
450 memset(&pkt, 0, sizeof(pkt));
452 pkt.data = reinterpret_cast<uint8_t *>(const_cast<int32_t *>(&audio[0]));
453 pkt.size = audio.size() * sizeof(audio[0]);
454 pkt.stream_index = stream_index;
455 pkt.flags = AV_PKT_FLAG_KEY;
456 AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
457 pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
458 size_t num_stereo_samples = audio.size() / 2;
459 pkt.duration = av_rescale_q(num_stereo_samples, AVRational{ 1, OUTPUT_FREQUENCY }, time_base);
461 if (av_write_frame(avctx, &pkt) < 0) {
462 fprintf(stderr, "av_write_frame() failed\n");
467 class VABufferDestroyer {
469 VABufferDestroyer(VADisplay dpy, VABufferID buf)
470 : dpy(dpy), buf(buf) {}
472 ~VABufferDestroyer() {
473 VAStatus va_status = vaDestroyBuffer(dpy, buf);
474 CHECK_VASTATUS(va_status, "vaDestroyBuffer");
484 void push16(uint16_t val, string *str)
486 str->push_back(val >> 8);
487 str->push_back(val & 0xff);
490 void push32(uint32_t val, string *str)
492 str->push_back(val >> 24);
493 str->push_back((val >> 16) & 0xff);
494 str->push_back((val >> 8) & 0xff);
495 str->push_back(val & 0xff);
500 void MJPEGEncoder::init_jpeg(unsigned width, unsigned height, const RGBTriplet &white_balance, VectorDestinationManager *dest, jpeg_compress_struct *cinfo, int y_h_samp_factor, int y_v_samp_factor)
503 cinfo->err = jpeg_std_error(&jerr);
504 jpeg_create_compress(cinfo);
506 cinfo->dest = (jpeg_destination_mgr *)dest;
508 cinfo->input_components = 3;
509 jpeg_set_defaults(cinfo);
510 jpeg_set_quality(cinfo, quality, /*force_baseline=*/false);
512 cinfo->image_width = width;
513 cinfo->image_height = height;
514 cinfo->raw_data_in = true;
515 jpeg_set_colorspace(cinfo, JCS_YCbCr);
516 cinfo->comp_info[0].h_samp_factor = y_h_samp_factor;
517 cinfo->comp_info[0].v_samp_factor = y_v_samp_factor;
518 cinfo->comp_info[1].h_samp_factor = 1;
519 cinfo->comp_info[1].v_samp_factor = 1;
520 cinfo->comp_info[2].h_samp_factor = 1;
521 cinfo->comp_info[2].v_samp_factor = 1;
522 cinfo->CCIR601_sampling = true; // Seems to be mostly ignored by libjpeg, though.
523 jpeg_start_compress(cinfo, true);
525 if (fabs(white_balance.r - 1.0f) > 1e-3 ||
526 fabs(white_balance.g - 1.0f) > 1e-3 ||
527 fabs(white_balance.b - 1.0f) > 1e-3) {
528 // Convert from (linear) RGB to XYZ.
529 Matrix3d rgb_to_xyz_matrix = movit::ColorspaceConversionEffect::get_xyz_matrix(COLORSPACE_sRGB);
530 Vector3d xyz = rgb_to_xyz_matrix * Vector3d(white_balance.r, white_balance.g, white_balance.b);
532 // Convert from XYZ to xyz by normalizing.
533 xyz /= (xyz[0] + xyz[1] + xyz[2]);
535 // Create a very rudimentary EXIF header to hold our white point.
538 // Exif header, followed by some padding.
542 // TIFF header first:
543 exif += "MM"; // Big endian.
548 // Offset of first IFD (relative to the MM, immediately after the header).
549 push32(exif.size() - 6 + 4, &exif);
551 // Now the actual IFD.
556 // WhitePoint tag ID.
557 push16(0x13e, &exif);
562 // Two values (x and y; z is implicit due to normalization).
565 // Offset (relative to the MM, immediately after the last IFD).
566 push32(exif.size() - 6 + 8, &exif);
571 // The actual values.
572 push32(lrintf(xyz[0] * 10000.0f), &exif);
573 push32(10000, &exif);
574 push32(lrintf(xyz[1] * 10000.0f), &exif);
575 push32(10000, &exif);
577 jpeg_write_marker(cinfo, JPEG_APP0 + 1, (const JOCTET *)exif.data(), exif.size());
580 // This comment marker is private to FFmpeg. It signals limited Y'CbCr range
581 // (and nothing else).
582 jpeg_write_marker(cinfo, JPEG_COM, (const JOCTET *)"CS=ITU601", strlen("CS=ITU601"));
585 vector<uint8_t> MJPEGEncoder::get_jpeg_header(unsigned width, unsigned height, const RGBTriplet &white_balance, int y_h_samp_factor, int y_v_samp_factor, jpeg_compress_struct *cinfo)
587 VectorDestinationManager dest;
588 init_jpeg(width, height, white_balance, &dest, cinfo, y_h_samp_factor, y_v_samp_factor);
590 // Make a dummy black image; there's seemingly no other easy way of
591 // making libjpeg outputting all of its headers.
592 assert(y_v_samp_factor <= 2); // Or we'd need larger JSAMPROW arrays below.
593 size_t block_height_y = 8 * y_v_samp_factor;
594 size_t block_height_cbcr = 8;
596 JSAMPROW yptr[16], cbptr[16], crptr[16];
597 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
598 memset(tmp_y, 0, 4096);
599 memset(tmp_cb, 0, 4096);
600 memset(tmp_cr, 0, 4096);
601 for (unsigned yy = 0; yy < block_height_y; ++yy) {
604 for (unsigned yy = 0; yy < block_height_cbcr; ++yy) {
608 for (unsigned y = 0; y < height; y += block_height_y) {
609 jpeg_write_raw_data(cinfo, data, block_height_y);
611 jpeg_finish_compress(cinfo);
613 // We're only interested in the header, not the data after it.
614 dest.term_destination();
615 for (size_t i = 0; i < dest.dest.size() - 1; ++i) {
616 if (dest.dest[i] == 0xff && dest.dest[i + 1] == 0xda) { // Start of scan (SOS).
617 unsigned len = dest.dest[i + 2] * 256 + dest.dest[i + 3];
618 dest.dest.resize(i + len + 2);
626 MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_parameters(unsigned width, unsigned height, unsigned y_h_samp_factor, unsigned y_v_samp_factor, const RGBTriplet &white_balance)
628 VAKey key{width, height, y_h_samp_factor, y_v_samp_factor, white_balance};
629 if (va_data_for_parameters.count(key)) {
630 return va_data_for_parameters[key];
633 // Use libjpeg to generate a header and set sane defaults for e.g.
634 // quantization tables. Then do the actual encode with VA-API.
635 jpeg_compress_struct cinfo;
636 vector<uint8_t> jpeg_header = get_jpeg_header(width, height, white_balance, y_h_samp_factor, y_v_samp_factor, &cinfo);
638 // Picture parameters.
639 VAEncPictureParameterBufferJPEG pic_param;
640 memset(&pic_param, 0, sizeof(pic_param));
641 pic_param.reconstructed_picture = VA_INVALID_ID;
642 pic_param.picture_width = cinfo.image_width;
643 pic_param.picture_height = cinfo.image_height;
644 for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
645 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
646 pic_param.component_id[component_idx] = comp->component_id;
647 pic_param.quantiser_table_selector[component_idx] = comp->quant_tbl_no;
649 pic_param.num_components = cinfo.num_components;
650 pic_param.num_scan = 1;
651 pic_param.sample_bit_depth = 8;
652 pic_param.coded_buf = VA_INVALID_ID; // To be filled out by caller.
653 pic_param.pic_flags.bits.huffman = 1;
654 pic_param.quality = 50; // Don't scale the given quantization matrices. (See gen8_mfc_jpeg_fqm_state)
656 // Quantization matrices.
657 VAQMatrixBufferJPEG q;
658 memset(&q, 0, sizeof(q));
660 q.load_lum_quantiser_matrix = true;
661 q.load_chroma_quantiser_matrix = true;
662 for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
663 const JQUANT_TBL *qtbl = cinfo.quant_tbl_ptrs[quant_tbl_idx];
664 assert((qtbl == nullptr) == (quant_tbl_idx >= 2));
665 if (qtbl == nullptr) continue;
667 uint8_t *qmatrix = (quant_tbl_idx == 0) ? q.lum_quantiser_matrix : q.chroma_quantiser_matrix;
668 for (int i = 0; i < 64; ++i) {
669 if (qtbl->quantval[i] > 255) {
670 fprintf(stderr, "Baseline JPEG only!\n");
673 qmatrix[i] = qtbl->quantval[jpeg_natural_order[i]];
677 // Huffman tables (arithmetic is not supported).
678 VAHuffmanTableBufferJPEGBaseline huff;
679 memset(&huff, 0, sizeof(huff));
681 for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
682 const JHUFF_TBL *ac_hufftbl = cinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
683 const JHUFF_TBL *dc_hufftbl = cinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
684 if (ac_hufftbl == nullptr) {
685 assert(dc_hufftbl == nullptr);
686 huff.load_huffman_table[huff_tbl_idx] = 0;
688 assert(dc_hufftbl != nullptr);
689 huff.load_huffman_table[huff_tbl_idx] = 1;
691 for (int i = 0; i < 16; ++i) {
692 huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
694 for (int i = 0; i < 12; ++i) {
695 huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
697 for (int i = 0; i < 16; ++i) {
698 huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
700 for (int i = 0; i < 162; ++i) {
701 huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
706 // Slice parameters (metadata about the slice).
707 VAEncSliceParameterBufferJPEG parms;
708 memset(&parms, 0, sizeof(parms));
709 for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
710 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
711 parms.components[component_idx].component_selector = comp->component_id;
712 parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
713 parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
714 if (parms.components[component_idx].dc_table_selector > 1 ||
715 parms.components[component_idx].ac_table_selector > 1) {
716 fprintf(stderr, "Uses too many Huffman tables\n");
720 parms.num_components = cinfo.num_components;
721 parms.restart_interval = cinfo.restart_interval;
723 jpeg_destroy_compress(&cinfo);
726 ret.jpeg_header = move(jpeg_header);
727 ret.pic_param = pic_param;
731 va_data_for_parameters[key] = ret;
735 void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf)
737 PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
738 unsigned width = qf.video_format.width;
739 unsigned height = qf.video_format.height;
741 VAResourcePool::VAResources resources;
742 ReleaseVAResources release;
743 if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
744 assert(is_uyvy(qf.frame));
745 resources = move(userdata->va_resources);
746 release = move(userdata->va_resources_release);
748 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
749 if (is_uyvy(qf.frame)) {
750 resources = va_pool->get_va_resources(width, height, VA_FOURCC_UYVY);
752 assert(is_i420(qf.frame));
753 resources = va_pool->get_va_resources(width, height, VA_FOURCC_NV12);
755 release = ReleaseVAResources(va_pool.get(), resources);
758 int y_h_samp_factor, y_v_samp_factor;
759 if (is_uyvy(qf.frame)) {
760 // 4:2:2 (sample Y' twice as often horizontally as Cb or Cr, vertical is left alone).
764 // 4:2:0 (sample Y' twice as often as Cb or Cr, in both directions)
765 assert(is_i420(qf.frame));
770 VAData va_data = get_va_data_for_parameters(width, height, y_h_samp_factor, y_v_samp_factor, qf.white_balance);
771 va_data.pic_param.coded_buf = resources.data_buffer;
773 VABufferID pic_param_buffer;
774 VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPictureParameterBufferType, sizeof(va_data.pic_param), 1, &va_data.pic_param, &pic_param_buffer);
775 CHECK_VASTATUS(va_status, "vaCreateBuffer");
776 VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
779 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAQMatrixBufferType, sizeof(va_data.q), 1, &va_data.q, &q_buffer);
780 CHECK_VASTATUS(va_status, "vaCreateBuffer");
781 VABufferDestroyer destroy_iq(va_dpy->va_dpy, q_buffer);
783 VABufferID huff_buffer;
784 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAHuffmanTableBufferType, sizeof(va_data.huff), 1, &va_data.huff, &huff_buffer);
785 CHECK_VASTATUS(va_status, "vaCreateBuffer");
786 VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
788 VABufferID slice_param_buffer;
789 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncSliceParameterBufferType, sizeof(va_data.parms), 1, &va_data.parms, &slice_param_buffer);
790 CHECK_VASTATUS(va_status, "vaCreateBuffer");
791 VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
793 if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
794 // The pixel data is already put into the image by the caller.
795 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
796 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
798 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
800 // Upload the pixel data.
801 uint8_t *surface_p = nullptr;
802 vaMapBuffer(va_dpy->va_dpy, resources.image.buf, (void **)&surface_p);
804 if (is_uyvy(qf.frame)) {
805 size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support.
806 size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
808 const uint8_t *src = qf.frame->data_copy + field_start;
809 uint8_t *dst = (unsigned char *)surface_p + resources.image.offsets[0];
810 memcpy_with_pitch(dst, src, qf.video_format.width * 2, resources.image.pitches[0], qf.video_format.height);
812 assert(is_i420(qf.frame));
813 assert(!qf.frame->interleaved); // Makes no sense for I420.
815 size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support.
816 const uint8_t *y_src = qf.frame->data + qf.video_format.width * field_start_line;
817 const uint8_t *cb_src = y_src + width * height;
818 const uint8_t *cr_src = cb_src + (width / 2) * (height / 2);
820 uint8_t *y_dst = (unsigned char *)surface_p + resources.image.offsets[0];
821 uint8_t *cbcr_dst = (unsigned char *)surface_p + resources.image.offsets[1];
823 memcpy_with_pitch(y_dst, y_src, qf.video_format.width, resources.image.pitches[0], qf.video_format.height);
824 interleave_with_pitch(cbcr_dst, cb_src, cr_src, qf.video_format.width / 2, resources.image.pitches[1], qf.video_format.height / 2);
827 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
828 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
831 qf.frame->data_copy = nullptr;
833 // Seemingly vaPutImage() (which triggers a GPU copy) is much nicer to the
834 // CPU than vaDeriveImage() and copying directly into the GPU's buffers.
835 // Exactly why is unclear, but it seems to involve L3 cache usage when there
836 // are many high-res (1080p+) images in play.
837 va_status = vaPutImage(va_dpy->va_dpy, resources.surface, resources.image.image_id, 0, 0, width, height, 0, 0, width, height);
838 CHECK_VASTATUS(va_status, "vaPutImage");
840 // Finally, stick in the JPEG header.
841 VAEncPackedHeaderParameterBuffer header_parm;
842 header_parm.type = VAEncPackedHeaderRawData;
843 header_parm.bit_length = 8 * va_data.jpeg_header.size();
845 VABufferID header_parm_buffer;
846 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPackedHeaderParameterBufferType, sizeof(header_parm), 1, &header_parm, &header_parm_buffer);
847 CHECK_VASTATUS(va_status, "vaCreateBuffer");
848 VABufferDestroyer destroy_header(va_dpy->va_dpy, header_parm_buffer);
850 VABufferID header_data_buffer;
851 va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPackedHeaderDataBufferType, va_data.jpeg_header.size(), 1, va_data.jpeg_header.data(), &header_data_buffer);
852 CHECK_VASTATUS(va_status, "vaCreateBuffer");
853 VABufferDestroyer destroy_header_data(va_dpy->va_dpy, header_data_buffer);
855 va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
856 CHECK_VASTATUS(va_status, "vaBeginPicture");
857 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
858 CHECK_VASTATUS(va_status, "vaRenderPicture(pic_param)");
859 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &q_buffer, 1);
860 CHECK_VASTATUS(va_status, "vaRenderPicture(q)");
861 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
862 CHECK_VASTATUS(va_status, "vaRenderPicture(huff)");
863 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
864 CHECK_VASTATUS(va_status, "vaRenderPicture(slice_param)");
865 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_parm_buffer, 1);
866 CHECK_VASTATUS(va_status, "vaRenderPicture(header_parm)");
867 va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_data_buffer, 1);
868 CHECK_VASTATUS(va_status, "vaRenderPicture(header_data)");
869 va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
870 CHECK_VASTATUS(va_status, "vaEndPicture");
872 qf.resources = move(resources);
873 qf.resource_releaser = move(release);
875 lock_guard<mutex> lock(mu);
876 frames_encoding.push(move(qf));
877 any_frames_encoding.notify_all();
880 void MJPEGEncoder::va_receiver_thread_func()
882 pthread_setname_np(pthread_self(), "MJPEG_Receive");
886 unique_lock<mutex> lock(mu);
887 any_frames_encoding.wait(lock, [this] { return !frames_encoding.empty() || should_quit; });
888 if (should_quit) return;
889 qf = move(frames_encoding.front());
890 frames_encoding.pop();
893 update_siphon_streams();
895 assert(global_flags.card_to_mjpeg_stream_export.count(qf.card_index)); // Or should_encode_mjpeg_for_card() would have returned false.
896 int stream_index = global_flags.card_to_mjpeg_stream_export[qf.card_index];
898 HTTPD::StreamID multicam_id{ HTTPD::MULTICAM_STREAM, 0 };
899 HTTPD::StreamID siphon_id{ HTTPD::SIPHON_STREAM, qf.card_index };
900 assert(streams.count(multicam_id));
901 assert(streams[multicam_id].avctx != nullptr);
903 // Write audio before video, since Futatabi expects it.
904 if (qf.audio.size() > 0) {
905 write_audio_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index + global_flags.card_to_mjpeg_stream_export.size(), qf.audio);
906 if (streams.count(siphon_id)) {
907 write_audio_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/1, qf.audio);
911 VAStatus va_status = vaSyncSurface(va_dpy->va_dpy, qf.resources.surface);
912 CHECK_VASTATUS(va_status, "vaSyncSurface");
914 VACodedBufferSegment *segment;
915 va_status = vaMapBuffer(va_dpy->va_dpy, qf.resources.data_buffer, (void **)&segment);
916 CHECK_VASTATUS(va_status, "vaMapBuffer");
918 const uint8_t *coded_buf = reinterpret_cast<uint8_t *>(segment->buf);
919 write_mjpeg_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index, coded_buf, segment->size);
920 if (streams.count(siphon_id)) {
921 write_mjpeg_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/0, coded_buf, segment->size);
924 va_status = vaUnmapBuffer(va_dpy->va_dpy, qf.resources.data_buffer);
925 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
929 vector<uint8_t> MJPEGEncoder::encode_jpeg_libjpeg(const QueuedFrame &qf)
931 unsigned width = qf.video_format.width;
932 unsigned height = qf.video_format.height;
934 VectorDestinationManager dest;
935 jpeg_compress_struct cinfo;
937 size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support.
939 PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
940 if (userdata->pixel_format == PixelFormat_8BitYCbCr) {
941 init_jpeg(width, height, qf.white_balance, &dest, &cinfo, /*y_h_samp_factor=*/2, /*y_v_samp_factor=*/1);
943 assert(qf.frame->interleaved);
944 size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
946 JSAMPROW yptr[8], cbptr[8], crptr[8];
947 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
948 for (unsigned y = 0; y < qf.video_format.height; y += 8) {
950 src = qf.frame->data_copy + field_start + y * qf.video_format.width * 2;
952 memcpy_interleaved(tmp_cbcr, tmp_y, src, qf.video_format.width * 8 * 2);
953 memcpy_interleaved(tmp_cb, tmp_cr, tmp_cbcr, qf.video_format.width * 8);
954 for (unsigned yy = 0; yy < 8; ++yy) {
955 yptr[yy] = tmp_y + yy * width;
956 cbptr[yy] = tmp_cb + yy * width / 2;
957 crptr[yy] = tmp_cr + yy * width / 2;
959 jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
962 assert(userdata->pixel_format == PixelFormat_8BitYCbCrPlanar);
964 const movit::YCbCrFormat &ycbcr = userdata->ycbcr_format;
965 init_jpeg(width, height, qf.white_balance, &dest, &cinfo, ycbcr.chroma_subsampling_x, ycbcr.chroma_subsampling_y);
966 assert(ycbcr.chroma_subsampling_y <= 2); // Or we'd need larger JSAMPROW arrays below.
968 size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support.
969 const uint8_t *y_start = qf.frame->data + qf.video_format.width * field_start_line;
970 const uint8_t *cb_start = y_start + width * height;
971 const uint8_t *cr_start = cb_start + (width / ycbcr.chroma_subsampling_x) * (height / ycbcr.chroma_subsampling_y);
973 size_t block_height_y = 8 * ycbcr.chroma_subsampling_y;
974 size_t block_height_cbcr = 8;
976 JSAMPROW yptr[16], cbptr[16], crptr[16];
977 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
978 for (unsigned y = 0; y < qf.video_format.height; y += block_height_y) {
979 for (unsigned yy = 0; yy < block_height_y; ++yy) {
980 yptr[yy] = const_cast<JSAMPROW>(y_start) + (y + yy) * width;
982 unsigned cbcr_y = y / ycbcr.chroma_subsampling_y;
983 for (unsigned yy = 0; yy < block_height_cbcr; ++yy) {
984 cbptr[yy] = const_cast<JSAMPROW>(cb_start) + (cbcr_y + yy) * width / ycbcr.chroma_subsampling_x;
985 crptr[yy] = const_cast<JSAMPROW>(cr_start) + (cbcr_y + yy) * width / ycbcr.chroma_subsampling_x;
987 jpeg_write_raw_data(&cinfo, data, block_height_y);
990 jpeg_finish_compress(&cinfo);
995 void MJPEGEncoder::add_stream(HTTPD::StreamID stream_id)
997 AVFormatContextWithCloser avctx;
999 // Set up the mux. We don't use the Mux wrapper, because it's geared towards
1000 // a situation with only one video stream (and possibly one audio stream)
1001 // with known width/height, and we don't need the extra functionality it provides.
1002 avctx.reset(avformat_alloc_context());
1003 avctx->oformat = av_guess_format("nut", nullptr, nullptr);
1005 uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
1006 avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, &ffmpeg_contexts[stream_id], nullptr, nullptr, nullptr);
1007 avctx->pb->write_data_type = &MJPEGEncoder::write_packet2_thunk;
1008 avctx->flags = AVFMT_FLAG_CUSTOM_IO;
1010 if (stream_id.type == HTTPD::MULTICAM_STREAM) {
1011 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
1012 add_video_stream(avctx.get());
1014 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
1015 add_audio_stream(avctx.get());
1018 assert(stream_id.type == HTTPD::SIPHON_STREAM);
1019 add_video_stream(avctx.get());
1020 add_audio_stream(avctx.get());
1022 finalize_mux(avctx.get());
1025 s.avctx = move(avctx);
1026 streams[stream_id] = move(s);
1029 void MJPEGEncoder::update_siphon_streams()
1031 // Bring the list of streams into sync with what the clients need.
1032 for (auto it = streams.begin(); it != streams.end(); ) {
1033 if (it->first.type != HTTPD::SIPHON_STREAM) {
1037 if (httpd->get_num_connected_siphon_clients(it->first.index) == 0) {
1038 av_free(it->second.avctx->pb->buffer);
1039 streams.erase(it++);
1044 for (unsigned stream_idx = 0; stream_idx < MAX_VIDEO_CARDS; ++stream_idx) {
1045 HTTPD::StreamID stream_id{ HTTPD::SIPHON_STREAM, stream_idx };
1046 if (streams.count(stream_id) == 0 && httpd->get_num_connected_siphon_clients(stream_idx) > 0) {
1047 add_stream(stream_id);
1052 void MJPEGEncoder::create_ffmpeg_context(HTTPD::StreamID stream_id)
1054 ffmpeg_contexts.emplace(stream_id, WritePacket2Context{ this, stream_id });