]> git.sesse.net Git - nageru/blob - nageru/mjpeg_encoder.cpp
Set CEF autoplay policy to be more lenient.
[nageru] / nageru / mjpeg_encoder.cpp
1 #include "mjpeg_encoder.h"
2
3 #include <Eigen/Core>
4 #include <algorithm>
5 #include <assert.h>
6 #include <bmusb/bmusb.h>
7 #include <jpeglib.h>
8 #include <math.h>
9 #include <movit/colorspace_conversion_effect.h>
10 #include <movit/effect.h>
11 #include <movit/image_format.h>
12 #include <movit/ycbcr.h>
13 #include <mutex>
14 #include <pthread.h>
15 #include <stddef.h>
16 #include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <string>
21 #include <thread>
22 #include <type_traits>
23 #include <utility>
24 #include <va/va.h>
25 #include <va/va_enc_jpeg.h>
26 #include <vector>
27 #if __SSE2__
28 #include <immintrin.h>
29 #endif
30
31 extern "C" {
32 #include <libavcodec/codec_id.h>
33 #include <libavcodec/defs.h>
34 #include <libavcodec/packet.h>
35 #include <libavformat/avformat.h>
36 #include <libavformat/avio.h>
37 #include <libavutil/avutil.h>
38 #include <libavutil/channel_layout.h>
39 #include <libavutil/dict.h>
40 #include <libavutil/mathematics.h>
41 #include <libavutil/mem.h>
42 #include <libavutil/pixfmt.h>
43 #include <libavutil/rational.h>
44 }
45
46 #include "flags.h"
47 #include "pbo_frame_allocator.h"
48 #include "ref_counted_frame.h"
49 #include "shared/ffmpeg_raii.h"
50 #include "shared/httpd.h"
51 #include "shared/memcpy_interleaved.h"
52 #include "shared/metrics.h"
53 #include "shared/shared_defs.h"
54 #include "shared/timebase.h"
55 #include "shared/va_display.h"
56 #include "shared/va_resource_pool.h"
57
58 using namespace Eigen;
59 using namespace bmusb;
60 using namespace movit;
61 using namespace std;
62
63 static VAImageFormat uyvy_format, nv12_format;
64
65 extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height);
66
67 // The inverse of memcpy_interleaved(), with (slow) support for pitch.
68 void interleave_with_pitch(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t src_width, size_t dst_pitch, size_t height)
69 {
70 #if __SSE2__
71         if (dst_pitch == src_width * 2 && (src_width * height) % 16 == 0) {
72                 __m128i *dptr = reinterpret_cast<__m128i *>(dst);
73                 const __m128i *sptr1 = reinterpret_cast<const __m128i *>(src1);
74                 const __m128i *sptr2 = reinterpret_cast<const __m128i *>(src2);
75                 for (size_t i = 0; i < src_width * height / 16; ++i) {
76                         __m128i data1 = _mm_loadu_si128(sptr1++);
77                         __m128i data2 = _mm_loadu_si128(sptr2++);
78                         _mm_storeu_si128(dptr++, _mm_unpacklo_epi8(data1, data2));
79                         _mm_storeu_si128(dptr++, _mm_unpackhi_epi8(data1, data2));
80                 }
81                 return;
82         }
83 #endif
84
85         for (size_t y = 0; y < height; ++y) {
86                 uint8_t *dptr = dst + y * dst_pitch;
87                 const uint8_t *sptr1 = src1 + y * src_width;
88                 const uint8_t *sptr2 = src2 + y * src_width;
89                 for (size_t x = 0; x < src_width; ++x) {
90                         *dptr++ = *sptr1++;
91                         *dptr++ = *sptr2++;
92                 }
93         }
94 }
95
96 // From libjpeg (although it's of course identical between implementations).
97 static const int jpeg_natural_order[DCTSIZE2] = {
98          0,  1,  8, 16,  9,  2,  3, 10,
99         17, 24, 32, 25, 18, 11,  4,  5,
100         12, 19, 26, 33, 40, 48, 41, 34,
101         27, 20, 13,  6,  7, 14, 21, 28,
102         35, 42, 49, 56, 57, 50, 43, 36,
103         29, 22, 15, 23, 30, 37, 44, 51,
104         58, 59, 52, 45, 38, 31, 39, 46,
105         53, 60, 61, 54, 47, 55, 62, 63,
106 };
107
108 struct VectorDestinationManager {
109         jpeg_destination_mgr pub;
110         std::vector<uint8_t> dest;
111
112         VectorDestinationManager()
113         {
114                 pub.init_destination = init_destination_thunk;
115                 pub.empty_output_buffer = empty_output_buffer_thunk;
116                 pub.term_destination = term_destination_thunk;
117         }
118
119         static void init_destination_thunk(j_compress_ptr ptr)
120         {
121                 ((VectorDestinationManager *)(ptr->dest))->init_destination();
122         }
123
124         inline void init_destination()
125         {
126                 make_room(0);
127         }
128
129         static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
130         {
131                 return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
132         }
133
134         inline bool empty_output_buffer()
135         {
136                 make_room(dest.size());  // Should ignore pub.free_in_buffer!
137                 return true;
138         }
139
140         inline void make_room(size_t bytes_used)
141         {
142                 dest.resize(bytes_used + 4096);
143                 dest.resize(dest.capacity());
144                 pub.next_output_byte = dest.data() + bytes_used;
145                 pub.free_in_buffer = dest.size() - bytes_used;
146         }
147
148         static void term_destination_thunk(j_compress_ptr ptr)
149         {
150                 ((VectorDestinationManager *)(ptr->dest))->term_destination();
151         }
152
153         inline void term_destination()
154         {
155                 dest.resize(dest.size() - pub.free_in_buffer);
156         }
157 };
158 static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
159
160 int MJPEGEncoder::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
161 {
162         WritePacket2Context *ctx = (WritePacket2Context *)opaque;
163         return ctx->mjpeg_encoder->write_packet2(ctx->stream_id, buf, buf_size, type, time);
164 }
165
166 int MJPEGEncoder::write_packet2(HTTPD::StreamID stream_id, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
167 {
168         string *mux_header = &streams[stream_id].mux_header;
169         if (type == AVIO_DATA_MARKER_HEADER) {
170                 mux_header->append((char *)buf, buf_size);
171                 httpd->set_header(stream_id, *mux_header);
172         } else {
173                 httpd->add_data(stream_id, (char *)buf, buf_size, /*keyframe=*/true, AV_NOPTS_VALUE, AVRational{ AV_TIME_BASE, 1 });
174         }
175         return buf_size;
176 }
177
178 namespace {
179
180 void add_video_stream(AVFormatContext *avctx)
181 {
182         AVStream *stream = avformat_new_stream(avctx, nullptr);
183         if (stream == nullptr) {
184                 fprintf(stderr, "avformat_new_stream() failed\n");
185                 abort();
186         }
187
188         // FFmpeg is very picky about having audio at 1/48000 timebase,
189         // no matter what we write. Even though we'd prefer our usual 1/120000,
190         // put the video on the same one, so that we can have locked audio.
191         stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
192         stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
193         stream->codecpar->codec_id = AV_CODEC_ID_MJPEG;
194
195         // Used for aspect ratio only. Can change without notice (the mux won't care).
196         stream->codecpar->width = global_flags.width;
197         stream->codecpar->height = global_flags.height;
198
199         // TODO: We could perhaps use the interpretation for each card here
200         // (or at least the command-line flags) instead of the defaults,
201         // but what would we do when they change?
202         stream->codecpar->color_primaries = AVCOL_PRI_BT709;
203         stream->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1;
204         stream->codecpar->color_space = AVCOL_SPC_BT709;
205         stream->codecpar->color_range = AVCOL_RANGE_MPEG;
206         stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
207         stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
208 }
209
210 void add_audio_stream(AVFormatContext *avctx)
211 {
212         AVStream *stream = avformat_new_stream(avctx, nullptr);
213         if (stream == nullptr) {
214                 fprintf(stderr, "avformat_new_stream() failed\n");
215                 abort();
216         }
217         stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
218         stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
219         stream->codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
220         stream->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
221         stream->codecpar->ch_layout.nb_channels = 2;
222         stream->codecpar->ch_layout.u.mask = AV_CH_LAYOUT_STEREO;
223         stream->codecpar->sample_rate = OUTPUT_FREQUENCY;
224 }
225
226 void finalize_mux(AVFormatContext *avctx)
227 {
228         AVDictionary *options = NULL;
229         vector<pair<string, string>> opts = MUX_OPTS;
230         for (pair<string, string> opt : opts) {
231                 av_dict_set(&options, opt.first.c_str(), opt.second.c_str(), 0);
232         }
233         if (avformat_write_header(avctx, &options) < 0) {
234                 fprintf(stderr, "avformat_write_header() failed\n");
235                 abort();
236         }
237 }
238
239 }  // namespace
240
241 MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display)
242         : httpd(httpd)
243 {
244         create_ffmpeg_context(HTTPD::StreamID{ HTTPD::MULTICAM_STREAM, 0 });
245         for (unsigned stream_idx = 0; stream_idx < MAX_VIDEO_CARDS; ++stream_idx) {
246                 create_ffmpeg_context(HTTPD::StreamID{ HTTPD::SIPHON_STREAM, stream_idx });
247         }
248
249         add_stream(HTTPD::StreamID{ HTTPD::MULTICAM_STREAM, 0 });
250
251         // Initialize VA-API.
252         VAConfigID config_id_422, config_id_420;
253         string error;
254         va_dpy = try_open_va(va_display, { VAProfileJPEGBaseline }, VAEntrypointEncPicture,
255                 {
256                         { "4:2:2", VA_RT_FORMAT_YUV422, VA_FOURCC_UYVY, &config_id_422, &uyvy_format },
257                         // We'd prefer VA_FOURCC_I420, but it's not supported by Intel's driver.
258                         { "4:2:0", VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, &config_id_420, &nv12_format }
259                 },
260                 /*chosen_profile=*/nullptr, &error);
261         if (va_dpy == nullptr) {
262                 fprintf(stderr, "Could not initialize VA-API for MJPEG encoding: %s. JPEGs will be encoded in software if needed.\n", error.c_str());
263         }
264
265         encoder_thread = thread(&MJPEGEncoder::encoder_thread_func, this);
266         if (va_dpy != nullptr) {
267                 va_pool.reset(new VAResourcePool(va_dpy->va_dpy, uyvy_format, nv12_format, config_id_422, config_id_420, /*with_data_buffer=*/true));
268                 va_receiver_thread = thread(&MJPEGEncoder::va_receiver_thread_func, this);
269         }
270
271         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }}, &metric_mjpeg_frames_zero_size_dropped);
272         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }}, &metric_mjpeg_frames_interlaced_dropped);
273         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }}, &metric_mjpeg_frames_unsupported_pixel_format_dropped);
274         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }}, &metric_mjpeg_frames_oversized_dropped);
275         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }}, &metric_mjpeg_overrun_dropped);
276         global_metrics.add("mjpeg_frames", {{ "status", "submitted" }}, &metric_mjpeg_overrun_submitted);
277
278         running = true;
279 }
280
281 MJPEGEncoder::~MJPEGEncoder()
282 {
283         for (auto &id_and_stream : streams) {
284                 av_free(id_and_stream.second.avctx->pb->buffer);
285         }
286
287         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }});
288         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }});
289         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }});
290         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }});
291         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }});
292         global_metrics.remove("mjpeg_frames", {{ "status", "submitted" }});
293 }
294
295 void MJPEGEncoder::stop()
296 {
297         if (!running) {
298                 return;
299         }
300         running = false;
301         should_quit = true;
302         any_frames_to_be_encoded.notify_all();
303         any_frames_encoding.notify_all();
304         encoder_thread.join();
305         if (va_dpy != nullptr) {
306                 va_receiver_thread.join();
307         }
308 }
309
310 namespace {
311
312 bool is_uyvy(RefCountedFrame frame)
313 {
314         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
315         return userdata->pixel_format == PixelFormat_8BitYCbCr && frame->interleaved;
316 }
317
318 bool is_i420(RefCountedFrame frame)
319 {
320         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
321         return userdata->pixel_format == PixelFormat_8BitYCbCrPlanar &&
322                 userdata->ycbcr_format.chroma_subsampling_x == 2 &&
323                 userdata->ycbcr_format.chroma_subsampling_y == 2;
324 }
325
326 }  // namespace
327
328 void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset, vector<int32_t> audio, const RGBTriplet &white_balance)
329 {
330         if (video_format.width == 0 || video_format.height == 0) {
331                 ++metric_mjpeg_frames_zero_size_dropped;
332                 return;
333         }
334         if (video_format.interlaced) {
335                 fprintf(stderr, "Card %u: Ignoring JPEG encoding for interlaced frame\n", card_index);
336                 ++metric_mjpeg_frames_interlaced_dropped;
337                 return;
338         }
339         if (!is_uyvy(frame) && !is_i420(frame)) {
340                 fprintf(stderr, "Card %u: Ignoring JPEG encoding for unsupported pixel format\n", card_index);
341                 ++metric_mjpeg_frames_unsupported_pixel_format_dropped;
342                 return;
343         }
344         if (video_format.width > 4096 || video_format.height > 4096) {
345                 fprintf(stderr, "Card %u: Ignoring JPEG encoding for oversized frame\n", card_index);
346                 ++metric_mjpeg_frames_oversized_dropped;
347                 return;
348         }
349
350         lock_guard<mutex> lock(mu);
351         if (frames_to_be_encoded.size() + frames_encoding.size() > 50) {
352                 fprintf(stderr, "WARNING: MJPEG encoding doesn't keep up, discarding frame.\n");
353                 ++metric_mjpeg_overrun_dropped;
354                 return;
355         }
356         ++metric_mjpeg_overrun_submitted;
357         frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset, move(audio), white_balance });
358         any_frames_to_be_encoded.notify_all();
359 }
360
361 bool MJPEGEncoder::should_encode_mjpeg_for_card(unsigned card_index)
362 {
363         // Only bother doing MJPEG encoding if there are any connected clients
364         // that want the stream.
365         if (httpd->get_num_connected_multicam_clients() == 0 &&
366             httpd->get_num_connected_siphon_clients(card_index) == 0) {
367                 return false;
368         }
369
370         auto it = global_flags.card_to_mjpeg_stream_export.find(card_index);
371         return (it != global_flags.card_to_mjpeg_stream_export.end());
372 }
373
374 void MJPEGEncoder::encoder_thread_func()
375 {
376         pthread_setname_np(pthread_self(), "MJPEG_Encode");
377         posix_memalign((void **)&tmp_y, 4096, 4096 * 8);
378         posix_memalign((void **)&tmp_cbcr, 4096, 4096 * 8);
379         posix_memalign((void **)&tmp_cb, 4096, 4096 * 8);
380         posix_memalign((void **)&tmp_cr, 4096, 4096 * 8);
381
382         for (;;) {
383                 QueuedFrame qf;
384                 {
385                         unique_lock<mutex> lock(mu);
386                         any_frames_to_be_encoded.wait(lock, [this] { return !frames_to_be_encoded.empty() || should_quit; });
387                         if (should_quit) break;
388                         qf = move(frames_to_be_encoded.front());
389                         frames_to_be_encoded.pop();
390                 }
391
392                 assert(global_flags.card_to_mjpeg_stream_export.count(qf.card_index));  // Or should_encode_mjpeg_for_card() would have returned false.
393                 int stream_index = global_flags.card_to_mjpeg_stream_export[qf.card_index];
394
395                 if (va_dpy != nullptr) {
396                         // Will call back in the receiver thread.
397                         encode_jpeg_va(move(qf));
398                 } else {
399                         update_siphon_streams();
400
401                         HTTPD::StreamID multicam_id{ HTTPD::MULTICAM_STREAM, 0 };
402                         HTTPD::StreamID siphon_id{ HTTPD::SIPHON_STREAM, qf.card_index };
403                         assert(streams.count(multicam_id));
404
405                         // Write audio before video, since Futatabi expects it.
406                         if (qf.audio.size() > 0) {
407                                 write_audio_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index + global_flags.card_to_mjpeg_stream_export.size(), qf.audio);
408                                 if (streams.count(siphon_id)) {
409                                         write_audio_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/1, qf.audio);
410                                 }
411                         }
412
413                         // Encode synchronously, in the same thread.
414                         vector<uint8_t> jpeg = encode_jpeg_libjpeg(qf);
415                         write_mjpeg_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index, jpeg.data(), jpeg.size());
416                         if (streams.count(siphon_id)) {
417                                 write_mjpeg_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/0, jpeg.data(), jpeg.size());
418                         }
419                 }
420         }
421
422         free(tmp_y);
423         free(tmp_cbcr);
424         free(tmp_cb);
425         free(tmp_cr);
426 }
427
428 void MJPEGEncoder::write_mjpeg_packet(AVFormatContext *avctx, int64_t pts, unsigned stream_index, const uint8_t *jpeg, size_t jpeg_size)
429 {
430         AVPacket pkt;
431         memset(&pkt, 0, sizeof(pkt));
432         pkt.buf = nullptr;
433         pkt.data = const_cast<uint8_t *>(jpeg);
434         pkt.size = jpeg_size;
435         pkt.stream_index = stream_index;
436         pkt.flags = AV_PKT_FLAG_KEY;
437         AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
438         pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
439         pkt.duration = 0;
440
441         if (av_write_frame(avctx, &pkt) < 0) {
442                 fprintf(stderr, "av_write_frame() failed\n");
443                 abort();
444         }
445 }
446
447 void MJPEGEncoder::write_audio_packet(AVFormatContext *avctx, int64_t pts, unsigned stream_index, const vector<int32_t> &audio)
448 {
449         AVPacket pkt;
450         memset(&pkt, 0, sizeof(pkt));
451         pkt.buf = nullptr;
452         pkt.data = reinterpret_cast<uint8_t *>(const_cast<int32_t *>(&audio[0]));
453         pkt.size = audio.size() * sizeof(audio[0]);
454         pkt.stream_index = stream_index;
455         pkt.flags = AV_PKT_FLAG_KEY;
456         AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
457         pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
458         size_t num_stereo_samples = audio.size() / 2;
459         pkt.duration = av_rescale_q(num_stereo_samples, AVRational{ 1, OUTPUT_FREQUENCY }, time_base);
460
461         if (av_write_frame(avctx, &pkt) < 0) {
462                 fprintf(stderr, "av_write_frame() failed\n");
463                 abort();
464         }
465 }
466
467 class VABufferDestroyer {
468 public:
469         VABufferDestroyer(VADisplay dpy, VABufferID buf)
470                 : dpy(dpy), buf(buf) {}
471
472         ~VABufferDestroyer() {
473                 VAStatus va_status = vaDestroyBuffer(dpy, buf);
474                 CHECK_VASTATUS(va_status, "vaDestroyBuffer");
475         }
476
477 private:
478         VADisplay dpy;
479         VABufferID buf;
480 };
481
482 namespace {
483
484 void push16(uint16_t val, string *str)
485 {
486         str->push_back(val >> 8);
487         str->push_back(val & 0xff);
488 }
489
490 void push32(uint32_t val, string *str)
491 {
492         str->push_back(val >> 24);
493         str->push_back((val >> 16) & 0xff);
494         str->push_back((val >> 8) & 0xff);
495         str->push_back(val & 0xff);
496 }
497
498 }  // namespace
499
500 void MJPEGEncoder::init_jpeg(unsigned width, unsigned height, const RGBTriplet &white_balance, VectorDestinationManager *dest, jpeg_compress_struct *cinfo, int y_h_samp_factor, int y_v_samp_factor)
501 {
502         jpeg_error_mgr jerr;
503         cinfo->err = jpeg_std_error(&jerr);
504         jpeg_create_compress(cinfo);
505
506         cinfo->dest = (jpeg_destination_mgr *)dest;
507
508         cinfo->input_components = 3;
509         jpeg_set_defaults(cinfo);
510         jpeg_set_quality(cinfo, quality, /*force_baseline=*/false);
511
512         cinfo->image_width = width;
513         cinfo->image_height = height;
514         cinfo->raw_data_in = true;
515         jpeg_set_colorspace(cinfo, JCS_YCbCr);
516         cinfo->comp_info[0].h_samp_factor = y_h_samp_factor;
517         cinfo->comp_info[0].v_samp_factor = y_v_samp_factor;
518         cinfo->comp_info[1].h_samp_factor = 1;
519         cinfo->comp_info[1].v_samp_factor = 1;
520         cinfo->comp_info[2].h_samp_factor = 1;
521         cinfo->comp_info[2].v_samp_factor = 1;
522         cinfo->CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
523         jpeg_start_compress(cinfo, true);
524
525         if (fabs(white_balance.r - 1.0f) > 1e-3 ||
526             fabs(white_balance.g - 1.0f) > 1e-3 ||
527             fabs(white_balance.b - 1.0f) > 1e-3) {
528                 // Convert from (linear) RGB to XYZ.
529                 Matrix3d rgb_to_xyz_matrix = movit::ColorspaceConversionEffect::get_xyz_matrix(COLORSPACE_sRGB);
530                 Vector3d xyz = rgb_to_xyz_matrix * Vector3d(white_balance.r, white_balance.g, white_balance.b);
531
532                 // Convert from XYZ to xyz by normalizing.
533                 xyz /= (xyz[0] + xyz[1] + xyz[2]);
534
535                 // Create a very rudimentary EXIF header to hold our white point.
536                 string exif;
537
538                 // Exif header, followed by some padding.
539                 exif = "Exif";
540                 push16(0, &exif);
541
542                 // TIFF header first:
543                 exif += "MM";  // Big endian.
544
545                 // Magic number.
546                 push16(42, &exif);
547
548                 // Offset of first IFD (relative to the MM, immediately after the header).
549                 push32(exif.size() - 6 + 4, &exif);
550
551                 // Now the actual IFD.
552
553                 // One entry.
554                 push16(1, &exif);
555
556                 // WhitePoint tag ID.
557                 push16(0x13e, &exif);
558
559                 // Rational type.
560                 push16(5, &exif);
561
562                 // Two values (x and y; z is implicit due to normalization).
563                 push32(2, &exif);
564
565                 // Offset (relative to the MM, immediately after the last IFD).
566                 push32(exif.size() - 6 + 8, &exif);
567
568                 // No more IFDs.
569                 push32(0, &exif);
570
571                 // The actual values.
572                 push32(lrintf(xyz[0] * 10000.0f), &exif);
573                 push32(10000, &exif);
574                 push32(lrintf(xyz[1] * 10000.0f), &exif);
575                 push32(10000, &exif);
576
577                 jpeg_write_marker(cinfo, JPEG_APP0 + 1, (const JOCTET *)exif.data(), exif.size());
578         }
579
580         // This comment marker is private to FFmpeg. It signals limited Y'CbCr range
581         // (and nothing else).
582         jpeg_write_marker(cinfo, JPEG_COM, (const JOCTET *)"CS=ITU601", strlen("CS=ITU601"));
583 }
584
585 vector<uint8_t> MJPEGEncoder::get_jpeg_header(unsigned width, unsigned height, const RGBTriplet &white_balance, int y_h_samp_factor, int y_v_samp_factor, jpeg_compress_struct *cinfo)
586 {
587         VectorDestinationManager dest;
588         init_jpeg(width, height, white_balance, &dest, cinfo, y_h_samp_factor, y_v_samp_factor);
589
590         // Make a dummy black image; there's seemingly no other easy way of
591         // making libjpeg outputting all of its headers.
592         assert(y_v_samp_factor <= 2);  // Or we'd need larger JSAMPROW arrays below.
593         size_t block_height_y = 8 * y_v_samp_factor;
594         size_t block_height_cbcr = 8;
595
596         JSAMPROW yptr[16], cbptr[16], crptr[16];
597         JSAMPARRAY data[3] = { yptr, cbptr, crptr };
598         memset(tmp_y, 0, 4096);
599         memset(tmp_cb, 0, 4096);
600         memset(tmp_cr, 0, 4096);
601         for (unsigned yy = 0; yy < block_height_y; ++yy) {
602                 yptr[yy] = tmp_y;
603         }
604         for (unsigned yy = 0; yy < block_height_cbcr; ++yy) {
605                 cbptr[yy] = tmp_cb;
606                 crptr[yy] = tmp_cr;
607         }
608         for (unsigned y = 0; y < height; y += block_height_y) {
609                 jpeg_write_raw_data(cinfo, data, block_height_y);
610         }
611         jpeg_finish_compress(cinfo);
612
613         // We're only interested in the header, not the data after it.
614         dest.term_destination();
615         for (size_t i = 0; i < dest.dest.size() - 1; ++i) {
616                 if (dest.dest[i] == 0xff && dest.dest[i + 1] == 0xda) {  // Start of scan (SOS).
617                         unsigned len = dest.dest[i + 2] * 256 + dest.dest[i + 3];
618                         dest.dest.resize(i + len + 2);
619                         break;
620                 }
621         }
622
623         return dest.dest;
624 }
625
626 MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_parameters(unsigned width, unsigned height, unsigned y_h_samp_factor, unsigned y_v_samp_factor, const RGBTriplet &white_balance)
627 {
628         VAKey key{width, height, y_h_samp_factor, y_v_samp_factor, white_balance};
629         if (va_data_for_parameters.count(key)) {
630                 return va_data_for_parameters[key];
631         }
632
633         // Use libjpeg to generate a header and set sane defaults for e.g.
634         // quantization tables. Then do the actual encode with VA-API.
635         jpeg_compress_struct cinfo;
636         vector<uint8_t> jpeg_header = get_jpeg_header(width, height, white_balance, y_h_samp_factor, y_v_samp_factor, &cinfo);
637
638         // Picture parameters.
639         VAEncPictureParameterBufferJPEG pic_param;
640         memset(&pic_param, 0, sizeof(pic_param));
641         pic_param.reconstructed_picture = VA_INVALID_ID;
642         pic_param.picture_width = cinfo.image_width;
643         pic_param.picture_height = cinfo.image_height;
644         for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
645                 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
646                 pic_param.component_id[component_idx] = comp->component_id;
647                 pic_param.quantiser_table_selector[component_idx] = comp->quant_tbl_no;
648         }
649         pic_param.num_components = cinfo.num_components;
650         pic_param.num_scan = 1;
651         pic_param.sample_bit_depth = 8;
652         pic_param.coded_buf = VA_INVALID_ID;  // To be filled out by caller.
653         pic_param.pic_flags.bits.huffman = 1;
654         pic_param.quality = 50;  // Don't scale the given quantization matrices. (See gen8_mfc_jpeg_fqm_state)
655
656         // Quantization matrices.
657         VAQMatrixBufferJPEG q;
658         memset(&q, 0, sizeof(q));
659
660         q.load_lum_quantiser_matrix = true;
661         q.load_chroma_quantiser_matrix = true;
662         for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
663                 const JQUANT_TBL *qtbl = cinfo.quant_tbl_ptrs[quant_tbl_idx];
664                 assert((qtbl == nullptr) == (quant_tbl_idx >= 2));
665                 if (qtbl == nullptr) continue;
666
667                 uint8_t *qmatrix = (quant_tbl_idx == 0) ? q.lum_quantiser_matrix : q.chroma_quantiser_matrix;
668                 for (int i = 0; i < 64; ++i) {
669                         if (qtbl->quantval[i] > 255) {
670                                 fprintf(stderr, "Baseline JPEG only!\n");
671                                 abort();
672                         }
673                         qmatrix[i] = qtbl->quantval[jpeg_natural_order[i]];
674                 }
675         }
676
677         // Huffman tables (arithmetic is not supported).
678         VAHuffmanTableBufferJPEGBaseline huff;
679         memset(&huff, 0, sizeof(huff));
680
681         for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
682                 const JHUFF_TBL *ac_hufftbl = cinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
683                 const JHUFF_TBL *dc_hufftbl = cinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
684                 if (ac_hufftbl == nullptr) {
685                         assert(dc_hufftbl == nullptr);
686                         huff.load_huffman_table[huff_tbl_idx] = 0;
687                 } else {
688                         assert(dc_hufftbl != nullptr);
689                         huff.load_huffman_table[huff_tbl_idx] = 1;
690
691                         for (int i = 0; i < 16; ++i) {
692                                 huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
693                         }
694                         for (int i = 0; i < 12; ++i) {
695                                 huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
696                         }
697                         for (int i = 0; i < 16; ++i) {
698                                 huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
699                         }
700                         for (int i = 0; i < 162; ++i) {
701                                 huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
702                         }
703                 }
704         }
705
706         // Slice parameters (metadata about the slice).
707         VAEncSliceParameterBufferJPEG parms;
708         memset(&parms, 0, sizeof(parms));
709         for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
710                 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
711                 parms.components[component_idx].component_selector = comp->component_id;
712                 parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
713                 parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
714                 if (parms.components[component_idx].dc_table_selector > 1 ||
715                     parms.components[component_idx].ac_table_selector > 1) {
716                         fprintf(stderr, "Uses too many Huffman tables\n");
717                         abort();
718                 }
719         }
720         parms.num_components = cinfo.num_components;
721         parms.restart_interval = cinfo.restart_interval;
722
723         jpeg_destroy_compress(&cinfo);
724
725         VAData ret;
726         ret.jpeg_header = move(jpeg_header);
727         ret.pic_param = pic_param;
728         ret.q = q;
729         ret.huff = huff;
730         ret.parms = parms;
731         va_data_for_parameters[key] = ret;
732         return ret;
733 }
734
735 void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf)
736 {
737         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
738         unsigned width = qf.video_format.width;
739         unsigned height = qf.video_format.height;
740
741         VAResourcePool::VAResources resources;
742         ReleaseVAResources release;
743         if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
744                 assert(is_uyvy(qf.frame));
745                 resources = move(userdata->va_resources);
746                 release = move(userdata->va_resources_release);
747         } else {
748                 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
749                 if (is_uyvy(qf.frame)) {
750                         resources = va_pool->get_va_resources(width, height, VA_FOURCC_UYVY);
751                 } else {
752                         assert(is_i420(qf.frame));
753                         resources = va_pool->get_va_resources(width, height, VA_FOURCC_NV12);
754                 }
755                 release = ReleaseVAResources(va_pool.get(), resources);
756         }
757
758         int y_h_samp_factor, y_v_samp_factor;
759         if (is_uyvy(qf.frame)) {
760                 // 4:2:2 (sample Y' twice as often horizontally as Cb or Cr, vertical is left alone).
761                 y_h_samp_factor = 2;
762                 y_v_samp_factor = 1;
763         } else {
764                 // 4:2:0 (sample Y' twice as often as Cb or Cr, in both directions)
765                 assert(is_i420(qf.frame));
766                 y_h_samp_factor = 2;
767                 y_v_samp_factor = 2;
768         }
769
770         VAData va_data = get_va_data_for_parameters(width, height, y_h_samp_factor, y_v_samp_factor, qf.white_balance);
771         va_data.pic_param.coded_buf = resources.data_buffer;
772
773         VABufferID pic_param_buffer;
774         VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPictureParameterBufferType, sizeof(va_data.pic_param), 1, &va_data.pic_param, &pic_param_buffer);
775         CHECK_VASTATUS(va_status, "vaCreateBuffer");
776         VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
777
778         VABufferID q_buffer;
779         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAQMatrixBufferType, sizeof(va_data.q), 1, &va_data.q, &q_buffer);
780         CHECK_VASTATUS(va_status, "vaCreateBuffer");
781         VABufferDestroyer destroy_iq(va_dpy->va_dpy, q_buffer);
782
783         VABufferID huff_buffer;
784         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAHuffmanTableBufferType, sizeof(va_data.huff), 1, &va_data.huff, &huff_buffer);
785         CHECK_VASTATUS(va_status, "vaCreateBuffer");
786         VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
787
788         VABufferID slice_param_buffer;
789         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncSliceParameterBufferType, sizeof(va_data.parms), 1, &va_data.parms, &slice_param_buffer);
790         CHECK_VASTATUS(va_status, "vaCreateBuffer");
791         VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
792
793         if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
794                 // The pixel data is already put into the image by the caller.
795                 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
796                 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
797         } else {
798                 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
799
800                 // Upload the pixel data.
801                 uint8_t *surface_p = nullptr;
802                 vaMapBuffer(va_dpy->va_dpy, resources.image.buf, (void **)&surface_p);
803
804                 if (is_uyvy(qf.frame)) {
805                         size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
806                         size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
807
808                         const uint8_t *src = qf.frame->data_copy + field_start;
809                         uint8_t *dst = (unsigned char *)surface_p + resources.image.offsets[0];
810                         memcpy_with_pitch(dst, src, qf.video_format.width * 2, resources.image.pitches[0], qf.video_format.height);
811                 } else {
812                         assert(is_i420(qf.frame));
813                         assert(!qf.frame->interleaved);  // Makes no sense for I420.
814
815                         size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
816                         const uint8_t *y_src = qf.frame->data + qf.video_format.width * field_start_line;
817                         const uint8_t *cb_src = y_src + width * height;
818                         const uint8_t *cr_src = cb_src + (width / 2) * (height / 2);
819
820                         uint8_t *y_dst = (unsigned char *)surface_p + resources.image.offsets[0];
821                         uint8_t *cbcr_dst = (unsigned char *)surface_p + resources.image.offsets[1];
822
823                         memcpy_with_pitch(y_dst, y_src, qf.video_format.width, resources.image.pitches[0], qf.video_format.height);
824                         interleave_with_pitch(cbcr_dst, cb_src, cr_src, qf.video_format.width / 2, resources.image.pitches[1], qf.video_format.height / 2);
825                 }
826
827                 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
828                 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
829         }
830
831         qf.frame->data_copy = nullptr;
832
833         // Seemingly vaPutImage() (which triggers a GPU copy) is much nicer to the
834         // CPU than vaDeriveImage() and copying directly into the GPU's buffers.
835         // Exactly why is unclear, but it seems to involve L3 cache usage when there
836         // are many high-res (1080p+) images in play.
837         va_status = vaPutImage(va_dpy->va_dpy, resources.surface, resources.image.image_id, 0, 0, width, height, 0, 0, width, height);
838         CHECK_VASTATUS(va_status, "vaPutImage");
839
840         // Finally, stick in the JPEG header.
841         VAEncPackedHeaderParameterBuffer header_parm;
842         header_parm.type = VAEncPackedHeaderRawData;
843         header_parm.bit_length = 8 * va_data.jpeg_header.size();
844
845         VABufferID header_parm_buffer;
846         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPackedHeaderParameterBufferType, sizeof(header_parm), 1, &header_parm, &header_parm_buffer);
847         CHECK_VASTATUS(va_status, "vaCreateBuffer");
848         VABufferDestroyer destroy_header(va_dpy->va_dpy, header_parm_buffer);
849
850         VABufferID header_data_buffer;
851         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPackedHeaderDataBufferType, va_data.jpeg_header.size(), 1, va_data.jpeg_header.data(), &header_data_buffer);
852         CHECK_VASTATUS(va_status, "vaCreateBuffer");
853         VABufferDestroyer destroy_header_data(va_dpy->va_dpy, header_data_buffer);
854
855         va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
856         CHECK_VASTATUS(va_status, "vaBeginPicture");
857         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
858         CHECK_VASTATUS(va_status, "vaRenderPicture(pic_param)");
859         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &q_buffer, 1);
860         CHECK_VASTATUS(va_status, "vaRenderPicture(q)");
861         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
862         CHECK_VASTATUS(va_status, "vaRenderPicture(huff)");
863         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
864         CHECK_VASTATUS(va_status, "vaRenderPicture(slice_param)");
865         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_parm_buffer, 1);
866         CHECK_VASTATUS(va_status, "vaRenderPicture(header_parm)");
867         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_data_buffer, 1);
868         CHECK_VASTATUS(va_status, "vaRenderPicture(header_data)");
869         va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
870         CHECK_VASTATUS(va_status, "vaEndPicture");
871
872         qf.resources = move(resources);
873         qf.resource_releaser = move(release);
874
875         lock_guard<mutex> lock(mu);
876         frames_encoding.push(move(qf));
877         any_frames_encoding.notify_all();
878 }
879
880 void MJPEGEncoder::va_receiver_thread_func()
881 {
882         pthread_setname_np(pthread_self(), "MJPEG_Receive");
883         for (;;) {
884                 QueuedFrame qf;
885                 {
886                         unique_lock<mutex> lock(mu);
887                         any_frames_encoding.wait(lock, [this] { return !frames_encoding.empty() || should_quit; });
888                         if (should_quit) return;
889                         qf = move(frames_encoding.front());
890                         frames_encoding.pop();
891                 }
892
893                 update_siphon_streams();
894
895                 assert(global_flags.card_to_mjpeg_stream_export.count(qf.card_index));  // Or should_encode_mjpeg_for_card() would have returned false.
896                 int stream_index = global_flags.card_to_mjpeg_stream_export[qf.card_index];
897
898                 HTTPD::StreamID multicam_id{ HTTPD::MULTICAM_STREAM, 0 };
899                 HTTPD::StreamID siphon_id{ HTTPD::SIPHON_STREAM, qf.card_index };
900                 assert(streams.count(multicam_id));
901                 assert(streams[multicam_id].avctx != nullptr);
902
903                 // Write audio before video, since Futatabi expects it.
904                 if (qf.audio.size() > 0) {
905                         write_audio_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index + global_flags.card_to_mjpeg_stream_export.size(), qf.audio);
906                         if (streams.count(siphon_id)) {
907                                 write_audio_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/1, qf.audio);
908                         }
909                 }
910
911                 VAStatus va_status = vaSyncSurface(va_dpy->va_dpy, qf.resources.surface);
912                 CHECK_VASTATUS(va_status, "vaSyncSurface");
913
914                 VACodedBufferSegment *segment;
915                 va_status = vaMapBuffer(va_dpy->va_dpy, qf.resources.data_buffer, (void **)&segment);
916                 CHECK_VASTATUS(va_status, "vaMapBuffer");
917
918                 const uint8_t *coded_buf = reinterpret_cast<uint8_t *>(segment->buf);
919                 write_mjpeg_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index, coded_buf, segment->size);
920                 if (streams.count(siphon_id)) {
921                         write_mjpeg_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/0, coded_buf, segment->size);
922                 }
923
924                 va_status = vaUnmapBuffer(va_dpy->va_dpy, qf.resources.data_buffer);
925                 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
926         }
927 }
928
929 vector<uint8_t> MJPEGEncoder::encode_jpeg_libjpeg(const QueuedFrame &qf)
930 {
931         unsigned width = qf.video_format.width;
932         unsigned height = qf.video_format.height;
933
934         VectorDestinationManager dest;
935         jpeg_compress_struct cinfo;
936
937         size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
938
939         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
940         if (userdata->pixel_format == PixelFormat_8BitYCbCr) {
941                 init_jpeg(width, height, qf.white_balance, &dest, &cinfo, /*y_h_samp_factor=*/2, /*y_v_samp_factor=*/1);
942
943                 assert(qf.frame->interleaved);
944                 size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
945
946                 JSAMPROW yptr[8], cbptr[8], crptr[8];
947                 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
948                 for (unsigned y = 0; y < qf.video_format.height; y += 8) {
949                         const uint8_t *src;
950                         src = qf.frame->data_copy + field_start + y * qf.video_format.width * 2;
951
952                         memcpy_interleaved(tmp_cbcr, tmp_y, src, qf.video_format.width * 8 * 2);
953                         memcpy_interleaved(tmp_cb, tmp_cr, tmp_cbcr, qf.video_format.width * 8);
954                         for (unsigned yy = 0; yy < 8; ++yy) {
955                                 yptr[yy] = tmp_y + yy * width;
956                                 cbptr[yy] = tmp_cb + yy * width / 2;
957                                 crptr[yy] = tmp_cr + yy * width / 2;
958                         }
959                         jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
960                 }
961         } else {
962                 assert(userdata->pixel_format == PixelFormat_8BitYCbCrPlanar);
963
964                 const movit::YCbCrFormat &ycbcr = userdata->ycbcr_format;
965                 init_jpeg(width, height, qf.white_balance, &dest, &cinfo, ycbcr.chroma_subsampling_x, ycbcr.chroma_subsampling_y);
966                 assert(ycbcr.chroma_subsampling_y <= 2);  // Or we'd need larger JSAMPROW arrays below.
967
968                 size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
969                 const uint8_t *y_start = qf.frame->data + qf.video_format.width * field_start_line;
970                 const uint8_t *cb_start = y_start + width * height;
971                 const uint8_t *cr_start = cb_start + (width / ycbcr.chroma_subsampling_x) * (height / ycbcr.chroma_subsampling_y);
972
973                 size_t block_height_y = 8 * ycbcr.chroma_subsampling_y;
974                 size_t block_height_cbcr = 8;
975
976                 JSAMPROW yptr[16], cbptr[16], crptr[16];
977                 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
978                 for (unsigned y = 0; y < qf.video_format.height; y += block_height_y) {
979                         for (unsigned yy = 0; yy < block_height_y; ++yy) {
980                                 yptr[yy] = const_cast<JSAMPROW>(y_start) + (y + yy) * width;
981                         }
982                         unsigned cbcr_y = y / ycbcr.chroma_subsampling_y;
983                         for (unsigned yy = 0; yy < block_height_cbcr; ++yy) {
984                                 cbptr[yy] = const_cast<JSAMPROW>(cb_start) + (cbcr_y + yy) * width / ycbcr.chroma_subsampling_x;
985                                 crptr[yy] = const_cast<JSAMPROW>(cr_start) + (cbcr_y + yy) * width / ycbcr.chroma_subsampling_x;
986                         }
987                         jpeg_write_raw_data(&cinfo, data, block_height_y);
988                 }
989         }
990         jpeg_finish_compress(&cinfo);
991
992         return dest.dest;
993 }
994
995 void MJPEGEncoder::add_stream(HTTPD::StreamID stream_id)
996 {
997         AVFormatContextWithCloser avctx;
998
999         // Set up the mux. We don't use the Mux wrapper, because it's geared towards
1000         // a situation with only one video stream (and possibly one audio stream)
1001         // with known width/height, and we don't need the extra functionality it provides.
1002         avctx.reset(avformat_alloc_context());
1003         avctx->oformat = av_guess_format("nut", nullptr, nullptr);
1004
1005         uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
1006         avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, &ffmpeg_contexts[stream_id], nullptr, nullptr, nullptr);
1007         avctx->pb->write_data_type = &MJPEGEncoder::write_packet2_thunk;
1008         avctx->flags = AVFMT_FLAG_CUSTOM_IO;
1009
1010         if (stream_id.type == HTTPD::MULTICAM_STREAM) {
1011                 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
1012                         add_video_stream(avctx.get());
1013                 }
1014                 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
1015                         add_audio_stream(avctx.get());
1016                 }
1017         } else {
1018                 assert(stream_id.type == HTTPD::SIPHON_STREAM);
1019                 add_video_stream(avctx.get());
1020                 add_audio_stream(avctx.get());
1021         }
1022         finalize_mux(avctx.get());
1023
1024         Stream s;
1025         s.avctx = move(avctx);
1026         streams[stream_id] = move(s);
1027 }
1028
1029 void MJPEGEncoder::update_siphon_streams()
1030 {
1031         // Bring the list of streams into sync with what the clients need.
1032         for (auto it = streams.begin(); it != streams.end(); ) {
1033                 if (it->first.type != HTTPD::SIPHON_STREAM) {
1034                         ++it;
1035                         continue;
1036                 }
1037                 if (httpd->get_num_connected_siphon_clients(it->first.index) == 0) {
1038                         av_free(it->second.avctx->pb->buffer);
1039                         streams.erase(it++);
1040                 } else {
1041                         ++it;
1042                 }
1043         }
1044         for (unsigned stream_idx = 0; stream_idx < MAX_VIDEO_CARDS; ++stream_idx) {
1045                 HTTPD::StreamID stream_id{ HTTPD::SIPHON_STREAM, stream_idx };
1046                 if (streams.count(stream_id) == 0 && httpd->get_num_connected_siphon_clients(stream_idx) > 0) {
1047                         add_stream(stream_id);
1048                 }
1049         }
1050 }
1051
1052 void MJPEGEncoder::create_ffmpeg_context(HTTPD::StreamID stream_id)
1053 {
1054         ffmpeg_contexts.emplace(stream_id, WritePacket2Context{ this, stream_id });
1055 }