git.sesse.net Git - nageru/blob - nageru/mjpeg_encoder.cpp

   1 #include "mjpeg_encoder.h"
   2
   3 #include <Eigen/Core>
   4 #include <algorithm>
   5 #include <assert.h>
   6 #include <bmusb/bmusb.h>
   7 #include <jpeglib.h>
   8 #include <math.h>
   9 #include <movit/colorspace_conversion_effect.h>
  10 #include <movit/effect.h>
  11 #include <movit/image_format.h>
  12 #include <movit/ycbcr.h>
  13 #include <mutex>
  14 #include <pthread.h>
  15 #include <stddef.h>
  16 #include <stdint.h>
  17 #include <stdio.h>
  18 #include <stdlib.h>
  19 #include <string.h>
  20 #include <string>
  21 #include <thread>
  22 #include <type_traits>
  23 #include <utility>
  24 #include <va/va.h>
  25 #include <va/va_enc_jpeg.h>
  26 #include <vector>
  27 #if __SSE2__
  28 #include <immintrin.h>
  29 #endif
  30
  31 extern "C" {
  32 #include <libavcodec/codec_id.h>
  33 #include <libavcodec/defs.h>
  34 #include <libavcodec/packet.h>
  35 #include <libavformat/avformat.h>
  36 #include <libavformat/avio.h>
  37 #include <libavutil/avutil.h>
  38 #include <libavutil/channel_layout.h>
  39 #include <libavutil/dict.h>
  40 #include <libavutil/mathematics.h>
  41 #include <libavutil/mem.h>
  42 #include <libavutil/pixfmt.h>
  43 #include <libavutil/rational.h>
  44 }
  45
  46 #include "flags.h"
  47 #include "pbo_frame_allocator.h"
  48 #include "ref_counted_frame.h"
  49 #include "shared/ffmpeg_raii.h"
  50 #include "shared/httpd.h"
  51 #include "shared/memcpy_interleaved.h"
  52 #include "shared/metrics.h"
  53 #include "shared/shared_defs.h"
  54 #include "shared/timebase.h"
  55 #include "shared/va_display.h"
  56 #include "shared/va_resource_pool.h"
  57
  58 using namespace Eigen;
  59 using namespace bmusb;
  60 using namespace movit;
  61 using namespace std;
  62
  63 static VAImageFormat uyvy_format, nv12_format;
  64
  65 extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height);
  66
  67 // The inverse of memcpy_interleaved(), with (slow) support for pitch.
  68 void interleave_with_pitch(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t src_width, size_t dst_pitch, size_t height)
  69 {
  70 #if __SSE2__
  71         if (dst_pitch == src_width * 2 && (src_width * height) % 16 == 0) {
  72                 __m128i *dptr = reinterpret_cast<__m128i *>(dst);
  73                 const __m128i *sptr1 = reinterpret_cast<const __m128i *>(src1);
  74                 const __m128i *sptr2 = reinterpret_cast<const __m128i *>(src2);
  75                 for (size_t i = 0; i < src_width * height / 16; ++i) {
  76                         __m128i data1 = _mm_loadu_si128(sptr1++);
  77                         __m128i data2 = _mm_loadu_si128(sptr2++);
  78                         _mm_storeu_si128(dptr++, _mm_unpacklo_epi8(data1, data2));
  79                         _mm_storeu_si128(dptr++, _mm_unpackhi_epi8(data1, data2));
  80                 }
  81                 return;
  82         }
  83 #endif
  84
  85         for (size_t y = 0; y < height; ++y) {
  86                 uint8_t *dptr = dst + y * dst_pitch;
  87                 const uint8_t *sptr1 = src1 + y * src_width;
  88                 const uint8_t *sptr2 = src2 + y * src_width;
  89                 for (size_t x = 0; x < src_width; ++x) {
  90                         *dptr++ = *sptr1++;
  91                         *dptr++ = *sptr2++;
  92                 }
  93         }
  94 }
  95
  96 // From libjpeg (although it's of course identical between implementations).
  97 static const int jpeg_natural_order[DCTSIZE2] = {
  98          0,  1,  8, 16,  9,  2,  3, 10,
  99         17, 24, 32, 25, 18, 11,  4,  5,
 100         12, 19, 26, 33, 40, 48, 41, 34,
 101         27, 20, 13,  6,  7, 14, 21, 28,
 102         35, 42, 49, 56, 57, 50, 43, 36,
 103         29, 22, 15, 23, 30, 37, 44, 51,
 104         58, 59, 52, 45, 38, 31, 39, 46,
 105         53, 60, 61, 54, 47, 55, 62, 63,
 106 };
 107
 108 struct VectorDestinationManager {
 109         jpeg_destination_mgr pub;
 110         std::vector<uint8_t> dest;
 111
 112         VectorDestinationManager()
 113         {
 114                 pub.init_destination = init_destination_thunk;
 115                 pub.empty_output_buffer = empty_output_buffer_thunk;
 116                 pub.term_destination = term_destination_thunk;
 117         }
 118
 119         static void init_destination_thunk(j_compress_ptr ptr)
 120         {
 121                 ((VectorDestinationManager *)(ptr->dest))->init_destination();
 122         }
 123
 124         inline void init_destination()
 125         {
 126                 make_room(0);
 127         }
 128
 129         static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
 130         {
 131                 return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
 132         }
 133
 134         inline bool empty_output_buffer()
 135         {
 136                 make_room(dest.size());  // Should ignore pub.free_in_buffer!
 137                 return true;
 138         }
 139
 140         inline void make_room(size_t bytes_used)
 141         {
 142                 dest.resize(bytes_used + 4096);
 143                 dest.resize(dest.capacity());
 144                 pub.next_output_byte = dest.data() + bytes_used;
 145                 pub.free_in_buffer = dest.size() - bytes_used;
 146         }
 147
 148         static void term_destination_thunk(j_compress_ptr ptr)
 149         {
 150                 ((VectorDestinationManager *)(ptr->dest))->term_destination();
 151         }
 152
 153         inline void term_destination()
 154         {
 155                 dest.resize(dest.size() - pub.free_in_buffer);
 156         }
 157 };
 158 static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
 159
 160 int MJPEGEncoder::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
 161 {
 162         WritePacket2Context *ctx = (WritePacket2Context *)opaque;
 163         return ctx->mjpeg_encoder->write_packet2(ctx->stream_id, buf, buf_size, type, time);
 164 }
 165
 166 int MJPEGEncoder::write_packet2(HTTPD::StreamID stream_id, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
 167 {
 168         string *mux_header = &streams[stream_id].mux_header;
 169         if (type == AVIO_DATA_MARKER_HEADER) {
 170                 mux_header->append((char *)buf, buf_size);
 171                 httpd->set_header(stream_id, *mux_header);
 172         } else {
 173                 httpd->add_data(stream_id, (char *)buf, buf_size, /*keyframe=*/true, AV_NOPTS_VALUE, AVRational{ AV_TIME_BASE, 1 });
 174         }
 175         return buf_size;
 176 }
 177
 178 namespace {
 179
 180 void add_video_stream(AVFormatContext *avctx)
 181 {
 182         AVStream *stream = avformat_new_stream(avctx, nullptr);
 183         if (stream == nullptr) {
 184                 fprintf(stderr, "avformat_new_stream() failed\n");
 185                 abort();
 186         }
 187
 188         // FFmpeg is very picky about having audio at 1/48000 timebase,
 189         // no matter what we write. Even though we'd prefer our usual 1/120000,
 190         // put the video on the same one, so that we can have locked audio.
 191         stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
 192         stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
 193         stream->codecpar->codec_id = AV_CODEC_ID_MJPEG;
 194
 195         // Used for aspect ratio only. Can change without notice (the mux won't care).
 196         stream->codecpar->width = global_flags.width;
 197         stream->codecpar->height = global_flags.height;
 198
 199         // TODO: We could perhaps use the interpretation for each card here
 200         // (or at least the command-line flags) instead of the defaults,
 201         // but what would we do when they change?
 202         stream->codecpar->color_primaries = AVCOL_PRI_BT709;
 203         stream->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1;
 204         stream->codecpar->color_space = AVCOL_SPC_BT709;
 205         stream->codecpar->color_range = AVCOL_RANGE_MPEG;
 206         stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
 207         stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
 208 }
 209
 210 void add_audio_stream(AVFormatContext *avctx)
 211 {
 212         AVStream *stream = avformat_new_stream(avctx, nullptr);
 213         if (stream == nullptr) {
 214                 fprintf(stderr, "avformat_new_stream() failed\n");
 215                 abort();
 216         }
 217         stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
 218         stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
 219         stream->codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
 220         stream->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
 221         stream->codecpar->ch_layout.nb_channels = 2;
 222         stream->codecpar->ch_layout.u.mask = AV_CH_LAYOUT_STEREO;
 223         stream->codecpar->sample_rate = OUTPUT_FREQUENCY;
 224 }
 225
 226 void finalize_mux(AVFormatContext *avctx)
 227 {
 228         AVDictionary *options = NULL;
 229         vector<pair<string, string>> opts = MUX_OPTS;
 230         for (pair<string, string> opt : opts) {
 231                 av_dict_set(&options, opt.first.c_str(), opt.second.c_str(), 0);
 232         }
 233         if (avformat_write_header(avctx, &options) < 0) {
 234                 fprintf(stderr, "avformat_write_header() failed\n");
 235                 abort();
 236         }
 237 }
 238
 239 }  // namespace
 240
 241 MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display)
 242         : httpd(httpd)
 243 {
 244         create_ffmpeg_context(HTTPD::StreamID{ HTTPD::MULTICAM_STREAM, 0 });
 245         for (unsigned stream_idx = 0; stream_idx < MAX_VIDEO_CARDS; ++stream_idx) {
 246                 create_ffmpeg_context(HTTPD::StreamID{ HTTPD::SIPHON_STREAM, stream_idx });
 247         }
 248
 249         add_stream(HTTPD::StreamID{ HTTPD::MULTICAM_STREAM, 0 });
 250
 251         // Initialize VA-API.
 252         VAConfigID config_id_422, config_id_420;
 253         string error;
 254         va_dpy = try_open_va(va_display, { VAProfileJPEGBaseline }, VAEntrypointEncPicture,
 255                 {
 256                         { "4:2:2", VA_RT_FORMAT_YUV422, VA_FOURCC_UYVY, &config_id_422, &uyvy_format },
 257                         // We'd prefer VA_FOURCC_I420, but it's not supported by Intel's driver.
 258                         { "4:2:0", VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, &config_id_420, &nv12_format }
 259                 },
 260                 /*chosen_profile=*/nullptr, &error);
 261         if (va_dpy == nullptr) {
 262                 fprintf(stderr, "Could not initialize VA-API for MJPEG encoding: %s. JPEGs will be encoded in software if needed.\n", error.c_str());
 263         }
 264
 265         encoder_thread = thread(&MJPEGEncoder::encoder_thread_func, this);
 266         if (va_dpy != nullptr) {
 267                 va_pool.reset(new VAResourcePool(va_dpy->va_dpy, uyvy_format, nv12_format, config_id_422, config_id_420, /*with_data_buffer=*/true));
 268                 va_receiver_thread = thread(&MJPEGEncoder::va_receiver_thread_func, this);
 269         }
 270
 271         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }}, &metric_mjpeg_frames_zero_size_dropped);
 272         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }}, &metric_mjpeg_frames_interlaced_dropped);
 273         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }}, &metric_mjpeg_frames_unsupported_pixel_format_dropped);
 274         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }}, &metric_mjpeg_frames_oversized_dropped);
 275         global_metrics.add("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }}, &metric_mjpeg_overrun_dropped);
 276         global_metrics.add("mjpeg_frames", {{ "status", "submitted" }}, &metric_mjpeg_overrun_submitted);
 277
 278         running = true;
 279 }
 280
 281 MJPEGEncoder::~MJPEGEncoder()
 282 {
 283         for (auto &id_and_stream : streams) {
 284                 av_free(id_and_stream.second.avctx->pb->buffer);
 285         }
 286
 287         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "zero_size" }});
 288         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "interlaced" }});
 289         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "unsupported_pixel_format" }});
 290         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "oversized" }});
 291         global_metrics.remove("mjpeg_frames", {{ "status", "dropped" }, { "reason", "overrun" }});
 292         global_metrics.remove("mjpeg_frames", {{ "status", "submitted" }});
 293 }
 294
 295 void MJPEGEncoder::stop()
 296 {
 297         if (!running) {
 298                 return;
 299         }
 300         running = false;
 301         should_quit = true;
 302         any_frames_to_be_encoded.notify_all();
 303         any_frames_encoding.notify_all();
 304         encoder_thread.join();
 305         if (va_dpy != nullptr) {
 306                 va_receiver_thread.join();
 307         }
 308 }
 309
 310 namespace {
 311
 312 bool is_uyvy(RefCountedFrame frame)
 313 {
 314         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
 315         return userdata->pixel_format == PixelFormat_8BitYCbCr && frame->interleaved;
 316 }
 317
 318 bool is_i420(RefCountedFrame frame)
 319 {
 320         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
 321         return userdata->pixel_format == PixelFormat_8BitYCbCrPlanar &&
 322                 userdata->ycbcr_format.chroma_subsampling_x == 2 &&
 323                 userdata->ycbcr_format.chroma_subsampling_y == 2;
 324 }
 325
 326 }  // namespace
 327
 328 void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset, vector<int32_t> audio, const RGBTriplet &white_balance)
 329 {
 330         if (video_format.width == 0 || video_format.height == 0) {
 331                 ++metric_mjpeg_frames_zero_size_dropped;
 332                 return;
 333         }
 334         if (video_format.interlaced) {
 335                 fprintf(stderr, "Card %u: Ignoring JPEG encoding for interlaced frame\n", card_index);
 336                 ++metric_mjpeg_frames_interlaced_dropped;
 337                 return;
 338         }
 339         if (!is_uyvy(frame) && !is_i420(frame)) {
 340                 fprintf(stderr, "Card %u: Ignoring JPEG encoding for unsupported pixel format\n", card_index);
 341                 ++metric_mjpeg_frames_unsupported_pixel_format_dropped;
 342                 return;
 343         }
 344         if (video_format.width > 4096 || video_format.height > 4096) {
 345                 fprintf(stderr, "Card %u: Ignoring JPEG encoding for oversized frame\n", card_index);
 346                 ++metric_mjpeg_frames_oversized_dropped;
 347                 return;
 348         }
 349
 350         lock_guard<mutex> lock(mu);
 351         if (frames_to_be_encoded.size() + frames_encoding.size() > 50) {
 352                 fprintf(stderr, "WARNING: MJPEG encoding doesn't keep up, discarding frame.\n");
 353                 ++metric_mjpeg_overrun_dropped;
 354                 return;
 355         }
 356         ++metric_mjpeg_overrun_submitted;
 357         frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset, move(audio), white_balance });
 358         any_frames_to_be_encoded.notify_all();
 359 }
 360
 361 bool MJPEGEncoder::should_encode_mjpeg_for_card(unsigned card_index)
 362 {
 363         // Only bother doing MJPEG encoding if there are any connected clients
 364         // that want the stream.
 365         if (httpd->get_num_connected_multicam_clients() == 0 &&
 366             httpd->get_num_connected_siphon_clients(card_index) == 0) {
 367                 return false;
 368         }
 369
 370         auto it = global_flags.card_to_mjpeg_stream_export.find(card_index);
 371         return (it != global_flags.card_to_mjpeg_stream_export.end());
 372 }
 373
 374 void MJPEGEncoder::encoder_thread_func()
 375 {
 376         pthread_setname_np(pthread_self(), "MJPEG_Encode");
 377         posix_memalign((void **)&tmp_y, 4096, 4096 * 8);
 378         posix_memalign((void **)&tmp_cbcr, 4096, 4096 * 8);
 379         posix_memalign((void **)&tmp_cb, 4096, 4096 * 8);
 380         posix_memalign((void **)&tmp_cr, 4096, 4096 * 8);
 381
 382         for (;;) {
 383                 QueuedFrame qf;
 384                 {
 385                         unique_lock<mutex> lock(mu);
 386                         any_frames_to_be_encoded.wait(lock, [this] { return !frames_to_be_encoded.empty() || should_quit; });
 387                         if (should_quit) break;
 388                         qf = move(frames_to_be_encoded.front());
 389                         frames_to_be_encoded.pop();
 390                 }
 391
 392                 assert(global_flags.card_to_mjpeg_stream_export.count(qf.card_index));  // Or should_encode_mjpeg_for_card() would have returned false.
 393                 int stream_index = global_flags.card_to_mjpeg_stream_export[qf.card_index];
 394
 395                 if (va_dpy != nullptr) {
 396                         // Will call back in the receiver thread.
 397                         encode_jpeg_va(move(qf));
 398                 } else {
 399                         update_siphon_streams();
 400
 401                         HTTPD::StreamID multicam_id{ HTTPD::MULTICAM_STREAM, 0 };
 402                         HTTPD::StreamID siphon_id{ HTTPD::SIPHON_STREAM, qf.card_index };
 403                         assert(streams.count(multicam_id));
 404
 405                         // Write audio before video, since Futatabi expects it.
 406                         if (qf.audio.size() > 0) {
 407                                 write_audio_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index + global_flags.card_to_mjpeg_stream_export.size(), qf.audio);
 408                                 if (streams.count(siphon_id)) {
 409                                         write_audio_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/1, qf.audio);
 410                                 }
 411                         }
 412
 413                         // Encode synchronously, in the same thread.
 414                         vector<uint8_t> jpeg = encode_jpeg_libjpeg(qf);
 415                         write_mjpeg_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index, jpeg.data(), jpeg.size());
 416                         if (streams.count(siphon_id)) {
 417                                 write_mjpeg_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/0, jpeg.data(), jpeg.size());
 418                         }
 419                 }
 420         }
 421
 422         free(tmp_y);
 423         free(tmp_cbcr);
 424         free(tmp_cb);
 425         free(tmp_cr);
 426 }
 427
 428 void MJPEGEncoder::write_mjpeg_packet(AVFormatContext *avctx, int64_t pts, unsigned stream_index, const uint8_t *jpeg, size_t jpeg_size)
 429 {
 430         AVPacket pkt;
 431         memset(&pkt, 0, sizeof(pkt));
 432         pkt.buf = nullptr;
 433         pkt.data = const_cast<uint8_t *>(jpeg);
 434         pkt.size = jpeg_size;
 435         pkt.stream_index = stream_index;
 436         pkt.flags = AV_PKT_FLAG_KEY;
 437         AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
 438         pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
 439         pkt.duration = 0;
 440
 441         if (av_write_frame(avctx, &pkt) < 0) {
 442                 fprintf(stderr, "av_write_frame() failed\n");
 443                 abort();
 444         }
 445 }
 446
 447 void MJPEGEncoder::write_audio_packet(AVFormatContext *avctx, int64_t pts, unsigned stream_index, const vector<int32_t> &audio)
 448 {
 449         AVPacket pkt;
 450         memset(&pkt, 0, sizeof(pkt));
 451         pkt.buf = nullptr;
 452         pkt.data = reinterpret_cast<uint8_t *>(const_cast<int32_t *>(&audio[0]));
 453         pkt.size = audio.size() * sizeof(audio[0]);
 454         pkt.stream_index = stream_index;
 455         pkt.flags = AV_PKT_FLAG_KEY;
 456         AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
 457         pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
 458         size_t num_stereo_samples = audio.size() / 2;
 459         pkt.duration = av_rescale_q(num_stereo_samples, AVRational{ 1, OUTPUT_FREQUENCY }, time_base);
 460
 461         if (av_write_frame(avctx, &pkt) < 0) {
 462                 fprintf(stderr, "av_write_frame() failed\n");
 463                 abort();
 464         }
 465 }
 466
 467 class VABufferDestroyer {
 468 public:
 469         VABufferDestroyer(VADisplay dpy, VABufferID buf)
 470                 : dpy(dpy), buf(buf) {}
 471
 472         ~VABufferDestroyer() {
 473                 VAStatus va_status = vaDestroyBuffer(dpy, buf);
 474                 CHECK_VASTATUS(va_status, "vaDestroyBuffer");
 475         }
 476
 477 private:
 478         VADisplay dpy;
 479         VABufferID buf;
 480 };
 481
 482 namespace {
 483
 484 void push16(uint16_t val, string *str)
 485 {
 486         str->push_back(val >> 8);
 487         str->push_back(val & 0xff);
 488 }
 489
 490 void push32(uint32_t val, string *str)
 491 {
 492         str->push_back(val >> 24);
 493         str->push_back((val >> 16) & 0xff);
 494         str->push_back((val >> 8) & 0xff);
 495         str->push_back(val & 0xff);
 496 }
 497
 498 }  // namespace
 499
 500 void MJPEGEncoder::init_jpeg(unsigned width, unsigned height, const RGBTriplet &white_balance, VectorDestinationManager *dest, jpeg_compress_struct *cinfo, int y_h_samp_factor, int y_v_samp_factor)
 501 {
 502         jpeg_error_mgr jerr;
 503         cinfo->err = jpeg_std_error(&jerr);
 504         jpeg_create_compress(cinfo);
 505
 506         cinfo->dest = (jpeg_destination_mgr *)dest;
 507
 508         cinfo->input_components = 3;
 509         jpeg_set_defaults(cinfo);
 510         jpeg_set_quality(cinfo, quality, /*force_baseline=*/false);
 511
 512         cinfo->image_width = width;
 513         cinfo->image_height = height;
 514         cinfo->raw_data_in = true;
 515         jpeg_set_colorspace(cinfo, JCS_YCbCr);
 516         cinfo->comp_info[0].h_samp_factor = y_h_samp_factor;
 517         cinfo->comp_info[0].v_samp_factor = y_v_samp_factor;
 518         cinfo->comp_info[1].h_samp_factor = 1;
 519         cinfo->comp_info[1].v_samp_factor = 1;
 520         cinfo->comp_info[2].h_samp_factor = 1;
 521         cinfo->comp_info[2].v_samp_factor = 1;
 522         cinfo->CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
 523         jpeg_start_compress(cinfo, true);
 524
 525         if (fabs(white_balance.r - 1.0f) > 1e-3 ||
 526             fabs(white_balance.g - 1.0f) > 1e-3 ||
 527             fabs(white_balance.b - 1.0f) > 1e-3) {
 528                 // Convert from (linear) RGB to XYZ.
 529                 Matrix3d rgb_to_xyz_matrix = movit::ColorspaceConversionEffect::get_xyz_matrix(COLORSPACE_sRGB);
 530                 Vector3d xyz = rgb_to_xyz_matrix * Vector3d(white_balance.r, white_balance.g, white_balance.b);
 531
 532                 // Convert from XYZ to xyz by normalizing.
 533                 xyz /= (xyz[0] + xyz[1] + xyz[2]);
 534
 535                 // Create a very rudimentary EXIF header to hold our white point.
 536                 string exif;
 537
 538                 // Exif header, followed by some padding.
 539                 exif = "Exif";
 540                 push16(0, &exif);
 541
 542                 // TIFF header first:
 543                 exif += "MM";  // Big endian.
 544
 545                 // Magic number.
 546                 push16(42, &exif);
 547
 548                 // Offset of first IFD (relative to the MM, immediately after the header).
 549                 push32(exif.size() - 6 + 4, &exif);
 550
 551                 // Now the actual IFD.
 552
 553                 // One entry.
 554                 push16(1, &exif);
 555
 556                 // WhitePoint tag ID.
 557                 push16(0x13e, &exif);
 558
 559                 // Rational type.
 560                 push16(5, &exif);
 561
 562                 // Two values (x and y; z is implicit due to normalization).
 563                 push32(2, &exif);
 564
 565                 // Offset (relative to the MM, immediately after the last IFD).
 566                 push32(exif.size() - 6 + 8, &exif);
 567
 568                 // No more IFDs.
 569                 push32(0, &exif);
 570
 571                 // The actual values.
 572                 push32(lrintf(xyz[0] * 10000.0f), &exif);
 573                 push32(10000, &exif);
 574                 push32(lrintf(xyz[1] * 10000.0f), &exif);
 575                 push32(10000, &exif);
 576
 577                 jpeg_write_marker(cinfo, JPEG_APP0 + 1, (const JOCTET *)exif.data(), exif.size());
 578         }
 579
 580         // This comment marker is private to FFmpeg. It signals limited Y'CbCr range
 581         // (and nothing else).
 582         jpeg_write_marker(cinfo, JPEG_COM, (const JOCTET *)"CS=ITU601", strlen("CS=ITU601"));
 583 }
 584
 585 vector<uint8_t> MJPEGEncoder::get_jpeg_header(unsigned width, unsigned height, const RGBTriplet &white_balance, int y_h_samp_factor, int y_v_samp_factor, jpeg_compress_struct *cinfo)
 586 {
 587         VectorDestinationManager dest;
 588         init_jpeg(width, height, white_balance, &dest, cinfo, y_h_samp_factor, y_v_samp_factor);
 589
 590         // Make a dummy black image; there's seemingly no other easy way of
 591         // making libjpeg outputting all of its headers.
 592         assert(y_v_samp_factor <= 2);  // Or we'd need larger JSAMPROW arrays below.
 593         size_t block_height_y = 8 * y_v_samp_factor;
 594         size_t block_height_cbcr = 8;
 595
 596         JSAMPROW yptr[16], cbptr[16], crptr[16];
 597         JSAMPARRAY data[3] = { yptr, cbptr, crptr };
 598         memset(tmp_y, 0, 4096);
 599         memset(tmp_cb, 0, 4096);
 600         memset(tmp_cr, 0, 4096);
 601         for (unsigned yy = 0; yy < block_height_y; ++yy) {
 602                 yptr[yy] = tmp_y;
 603         }
 604         for (unsigned yy = 0; yy < block_height_cbcr; ++yy) {
 605                 cbptr[yy] = tmp_cb;
 606                 crptr[yy] = tmp_cr;
 607         }
 608         for (unsigned y = 0; y < height; y += block_height_y) {
 609                 jpeg_write_raw_data(cinfo, data, block_height_y);
 610         }
 611         jpeg_finish_compress(cinfo);
 612
 613         // We're only interested in the header, not the data after it.
 614         dest.term_destination();
 615         for (size_t i = 0; i < dest.dest.size() - 1; ++i) {
 616                 if (dest.dest[i] == 0xff && dest.dest[i + 1] == 0xda) {  // Start of scan (SOS).
 617                         unsigned len = dest.dest[i + 2] * 256 + dest.dest[i + 3];
 618                         dest.dest.resize(i + len + 2);
 619                         break;
 620                 }
 621         }
 622
 623         return dest.dest;
 624 }
 625
 626 MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_parameters(unsigned width, unsigned height, unsigned y_h_samp_factor, unsigned y_v_samp_factor, const RGBTriplet &white_balance)
 627 {
 628         VAKey key{width, height, y_h_samp_factor, y_v_samp_factor, white_balance};
 629         if (va_data_for_parameters.count(key)) {
 630                 return va_data_for_parameters[key];
 631         }
 632
 633         // Use libjpeg to generate a header and set sane defaults for e.g.
 634         // quantization tables. Then do the actual encode with VA-API.
 635         jpeg_compress_struct cinfo;
 636         vector<uint8_t> jpeg_header = get_jpeg_header(width, height, white_balance, y_h_samp_factor, y_v_samp_factor, &cinfo);
 637
 638         // Picture parameters.
 639         VAEncPictureParameterBufferJPEG pic_param;
 640         memset(&pic_param, 0, sizeof(pic_param));
 641         pic_param.reconstructed_picture = VA_INVALID_ID;
 642         pic_param.picture_width = cinfo.image_width;
 643         pic_param.picture_height = cinfo.image_height;
 644         for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
 645                 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
 646                 pic_param.component_id[component_idx] = comp->component_id;
 647                 pic_param.quantiser_table_selector[component_idx] = comp->quant_tbl_no;
 648         }
 649         pic_param.num_components = cinfo.num_components;
 650         pic_param.num_scan = 1;
 651         pic_param.sample_bit_depth = 8;
 652         pic_param.coded_buf = VA_INVALID_ID;  // To be filled out by caller.
 653         pic_param.pic_flags.bits.huffman = 1;
 654         pic_param.quality = 50;  // Don't scale the given quantization matrices. (See gen8_mfc_jpeg_fqm_state)
 655
 656         // Quantization matrices.
 657         VAQMatrixBufferJPEG q;
 658         memset(&q, 0, sizeof(q));
 659
 660         q.load_lum_quantiser_matrix = true;
 661         q.load_chroma_quantiser_matrix = true;
 662         for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
 663                 const JQUANT_TBL *qtbl = cinfo.quant_tbl_ptrs[quant_tbl_idx];
 664                 assert((qtbl == nullptr) == (quant_tbl_idx >= 2));
 665                 if (qtbl == nullptr) continue;
 666
 667                 uint8_t *qmatrix = (quant_tbl_idx == 0) ? q.lum_quantiser_matrix : q.chroma_quantiser_matrix;
 668                 for (int i = 0; i < 64; ++i) {
 669                         if (qtbl->quantval[i] > 255) {
 670                                 fprintf(stderr, "Baseline JPEG only!\n");
 671                                 abort();
 672                         }
 673                         qmatrix[i] = qtbl->quantval[jpeg_natural_order[i]];
 674                 }
 675         }
 676
 677         // Huffman tables (arithmetic is not supported).
 678         VAHuffmanTableBufferJPEGBaseline huff;
 679         memset(&huff, 0, sizeof(huff));
 680
 681         for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
 682                 const JHUFF_TBL *ac_hufftbl = cinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
 683                 const JHUFF_TBL *dc_hufftbl = cinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
 684                 if (ac_hufftbl == nullptr) {
 685                         assert(dc_hufftbl == nullptr);
 686                         huff.load_huffman_table[huff_tbl_idx] = 0;
 687                 } else {
 688                         assert(dc_hufftbl != nullptr);
 689                         huff.load_huffman_table[huff_tbl_idx] = 1;
 690
 691                         for (int i = 0; i < 16; ++i) {
 692                                 huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
 693                         }
 694                         for (int i = 0; i < 12; ++i) {
 695                                 huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
 696                         }
 697                         for (int i = 0; i < 16; ++i) {
 698                                 huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
 699                         }
 700                         for (int i = 0; i < 162; ++i) {
 701                                 huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
 702                         }
 703                 }
 704         }
 705
 706         // Slice parameters (metadata about the slice).
 707         VAEncSliceParameterBufferJPEG parms;
 708         memset(&parms, 0, sizeof(parms));
 709         for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
 710                 const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
 711                 parms.components[component_idx].component_selector = comp->component_id;
 712                 parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
 713                 parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
 714                 if (parms.components[component_idx].dc_table_selector > 1 ||
 715                     parms.components[component_idx].ac_table_selector > 1) {
 716                         fprintf(stderr, "Uses too many Huffman tables\n");
 717                         abort();
 718                 }
 719         }
 720         parms.num_components = cinfo.num_components;
 721         parms.restart_interval = cinfo.restart_interval;
 722
 723         jpeg_destroy_compress(&cinfo);
 724
 725         VAData ret;
 726         ret.jpeg_header = move(jpeg_header);
 727         ret.pic_param = pic_param;
 728         ret.q = q;
 729         ret.huff = huff;
 730         ret.parms = parms;
 731         va_data_for_parameters[key] = ret;
 732         return ret;
 733 }
 734
 735 void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf)
 736 {
 737         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
 738         unsigned width = qf.video_format.width;
 739         unsigned height = qf.video_format.height;
 740
 741         VAResourcePool::VAResources resources;
 742         ReleaseVAResources release;
 743         if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
 744                 assert(is_uyvy(qf.frame));
 745                 resources = move(userdata->va_resources);
 746                 release = move(userdata->va_resources_release);
 747         } else {
 748                 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
 749                 if (is_uyvy(qf.frame)) {
 750                         resources = va_pool->get_va_resources(width, height, VA_FOURCC_UYVY);
 751                 } else {
 752                         assert(is_i420(qf.frame));
 753                         resources = va_pool->get_va_resources(width, height, VA_FOURCC_NV12);
 754                 }
 755                 release = ReleaseVAResources(va_pool.get(), resources);
 756         }
 757
 758         int y_h_samp_factor, y_v_samp_factor;
 759         if (is_uyvy(qf.frame)) {
 760                 // 4:2:2 (sample Y' twice as often horizontally as Cb or Cr, vertical is left alone).
 761                 y_h_samp_factor = 2;
 762                 y_v_samp_factor = 1;
 763         } else {
 764                 // 4:2:0 (sample Y' twice as often as Cb or Cr, in both directions)
 765                 assert(is_i420(qf.frame));
 766                 y_h_samp_factor = 2;
 767                 y_v_samp_factor = 2;
 768         }
 769
 770         VAData va_data = get_va_data_for_parameters(width, height, y_h_samp_factor, y_v_samp_factor, qf.white_balance);
 771         va_data.pic_param.coded_buf = resources.data_buffer;
 772
 773         VABufferID pic_param_buffer;
 774         VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPictureParameterBufferType, sizeof(va_data.pic_param), 1, &va_data.pic_param, &pic_param_buffer);
 775         CHECK_VASTATUS(va_status, "vaCreateBuffer");
 776         VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
 777
 778         VABufferID q_buffer;
 779         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAQMatrixBufferType, sizeof(va_data.q), 1, &va_data.q, &q_buffer);
 780         CHECK_VASTATUS(va_status, "vaCreateBuffer");
 781         VABufferDestroyer destroy_iq(va_dpy->va_dpy, q_buffer);
 782
 783         VABufferID huff_buffer;
 784         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAHuffmanTableBufferType, sizeof(va_data.huff), 1, &va_data.huff, &huff_buffer);
 785         CHECK_VASTATUS(va_status, "vaCreateBuffer");
 786         VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
 787
 788         VABufferID slice_param_buffer;
 789         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncSliceParameterBufferType, sizeof(va_data.parms), 1, &va_data.parms, &slice_param_buffer);
 790         CHECK_VASTATUS(va_status, "vaCreateBuffer");
 791         VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
 792
 793         if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) {
 794                 // The pixel data is already put into the image by the caller.
 795                 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
 796                 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
 797         } else {
 798                 assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC);
 799
 800                 // Upload the pixel data.
 801                 uint8_t *surface_p = nullptr;
 802                 vaMapBuffer(va_dpy->va_dpy, resources.image.buf, (void **)&surface_p);
 803
 804                 if (is_uyvy(qf.frame)) {
 805                         size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
 806                         size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
 807
 808                         const uint8_t *src = qf.frame->data_copy + field_start;
 809                         uint8_t *dst = (unsigned char *)surface_p + resources.image.offsets[0];
 810                         memcpy_with_pitch(dst, src, qf.video_format.width * 2, resources.image.pitches[0], qf.video_format.height);
 811                 } else {
 812                         assert(is_i420(qf.frame));
 813                         assert(!qf.frame->interleaved);  // Makes no sense for I420.
 814
 815                         size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
 816                         const uint8_t *y_src = qf.frame->data + qf.video_format.width * field_start_line;
 817                         const uint8_t *cb_src = y_src + width * height;
 818                         const uint8_t *cr_src = cb_src + (width / 2) * (height / 2);
 819
 820                         uint8_t *y_dst = (unsigned char *)surface_p + resources.image.offsets[0];
 821                         uint8_t *cbcr_dst = (unsigned char *)surface_p + resources.image.offsets[1];
 822
 823                         memcpy_with_pitch(y_dst, y_src, qf.video_format.width, resources.image.pitches[0], qf.video_format.height);
 824                         interleave_with_pitch(cbcr_dst, cb_src, cr_src, qf.video_format.width / 2, resources.image.pitches[1], qf.video_format.height / 2);
 825                 }
 826
 827                 va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
 828                 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
 829         }
 830
 831         qf.frame->data_copy = nullptr;
 832
 833         // Seemingly vaPutImage() (which triggers a GPU copy) is much nicer to the
 834         // CPU than vaDeriveImage() and copying directly into the GPU's buffers.
 835         // Exactly why is unclear, but it seems to involve L3 cache usage when there
 836         // are many high-res (1080p+) images in play.
 837         va_status = vaPutImage(va_dpy->va_dpy, resources.surface, resources.image.image_id, 0, 0, width, height, 0, 0, width, height);
 838         CHECK_VASTATUS(va_status, "vaPutImage");
 839
 840         // Finally, stick in the JPEG header.
 841         VAEncPackedHeaderParameterBuffer header_parm;
 842         header_parm.type = VAEncPackedHeaderRawData;
 843         header_parm.bit_length = 8 * va_data.jpeg_header.size();
 844
 845         VABufferID header_parm_buffer;
 846         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPackedHeaderParameterBufferType, sizeof(header_parm), 1, &header_parm, &header_parm_buffer);
 847         CHECK_VASTATUS(va_status, "vaCreateBuffer");
 848         VABufferDestroyer destroy_header(va_dpy->va_dpy, header_parm_buffer);
 849
 850         VABufferID header_data_buffer;
 851         va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAEncPackedHeaderDataBufferType, va_data.jpeg_header.size(), 1, va_data.jpeg_header.data(), &header_data_buffer);
 852         CHECK_VASTATUS(va_status, "vaCreateBuffer");
 853         VABufferDestroyer destroy_header_data(va_dpy->va_dpy, header_data_buffer);
 854
 855         va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
 856         CHECK_VASTATUS(va_status, "vaBeginPicture");
 857         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
 858         CHECK_VASTATUS(va_status, "vaRenderPicture(pic_param)");
 859         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &q_buffer, 1);
 860         CHECK_VASTATUS(va_status, "vaRenderPicture(q)");
 861         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
 862         CHECK_VASTATUS(va_status, "vaRenderPicture(huff)");
 863         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
 864         CHECK_VASTATUS(va_status, "vaRenderPicture(slice_param)");
 865         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_parm_buffer, 1);
 866         CHECK_VASTATUS(va_status, "vaRenderPicture(header_parm)");
 867         va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_data_buffer, 1);
 868         CHECK_VASTATUS(va_status, "vaRenderPicture(header_data)");
 869         va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
 870         CHECK_VASTATUS(va_status, "vaEndPicture");
 871
 872         qf.resources = move(resources);
 873         qf.resource_releaser = move(release);
 874
 875         lock_guard<mutex> lock(mu);
 876         frames_encoding.push(move(qf));
 877         any_frames_encoding.notify_all();
 878 }
 879
 880 void MJPEGEncoder::va_receiver_thread_func()
 881 {
 882         pthread_setname_np(pthread_self(), "MJPEG_Receive");
 883         for (;;) {
 884                 QueuedFrame qf;
 885                 {
 886                         unique_lock<mutex> lock(mu);
 887                         any_frames_encoding.wait(lock, [this] { return !frames_encoding.empty() || should_quit; });
 888                         if (should_quit) return;
 889                         qf = move(frames_encoding.front());
 890                         frames_encoding.pop();
 891                 }
 892
 893                 update_siphon_streams();
 894
 895                 assert(global_flags.card_to_mjpeg_stream_export.count(qf.card_index));  // Or should_encode_mjpeg_for_card() would have returned false.
 896                 int stream_index = global_flags.card_to_mjpeg_stream_export[qf.card_index];
 897
 898                 HTTPD::StreamID multicam_id{ HTTPD::MULTICAM_STREAM, 0 };
 899                 HTTPD::StreamID siphon_id{ HTTPD::SIPHON_STREAM, qf.card_index };
 900                 assert(streams.count(multicam_id));
 901                 assert(streams[multicam_id].avctx != nullptr);
 902
 903                 // Write audio before video, since Futatabi expects it.
 904                 if (qf.audio.size() > 0) {
 905                         write_audio_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index + global_flags.card_to_mjpeg_stream_export.size(), qf.audio);
 906                         if (streams.count(siphon_id)) {
 907                                 write_audio_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/1, qf.audio);
 908                         }
 909                 }
 910
 911                 VAStatus va_status = vaSyncSurface(va_dpy->va_dpy, qf.resources.surface);
 912                 CHECK_VASTATUS(va_status, "vaSyncSurface");
 913
 914                 VACodedBufferSegment *segment;
 915                 va_status = vaMapBuffer(va_dpy->va_dpy, qf.resources.data_buffer, (void **)&segment);
 916                 CHECK_VASTATUS(va_status, "vaMapBuffer");
 917
 918                 const uint8_t *coded_buf = reinterpret_cast<uint8_t *>(segment->buf);
 919                 write_mjpeg_packet(streams[multicam_id].avctx.get(), qf.pts, stream_index, coded_buf, segment->size);
 920                 if (streams.count(siphon_id)) {
 921                         write_mjpeg_packet(streams[siphon_id].avctx.get(), qf.pts, /*stream_index=*/0, coded_buf, segment->size);
 922                 }
 923
 924                 va_status = vaUnmapBuffer(va_dpy->va_dpy, qf.resources.data_buffer);
 925                 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
 926         }
 927 }
 928
 929 vector<uint8_t> MJPEGEncoder::encode_jpeg_libjpeg(const QueuedFrame &qf)
 930 {
 931         unsigned width = qf.video_format.width;
 932         unsigned height = qf.video_format.height;
 933
 934         VectorDestinationManager dest;
 935         jpeg_compress_struct cinfo;
 936
 937         size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
 938
 939         PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata;
 940         if (userdata->pixel_format == PixelFormat_8BitYCbCr) {
 941                 init_jpeg(width, height, qf.white_balance, &dest, &cinfo, /*y_h_samp_factor=*/2, /*y_v_samp_factor=*/1);
 942
 943                 assert(qf.frame->interleaved);
 944                 size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
 945
 946                 JSAMPROW yptr[8], cbptr[8], crptr[8];
 947                 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
 948                 for (unsigned y = 0; y < qf.video_format.height; y += 8) {
 949                         const uint8_t *src;
 950                         src = qf.frame->data_copy + field_start + y * qf.video_format.width * 2;
 951
 952                         memcpy_interleaved(tmp_cbcr, tmp_y, src, qf.video_format.width * 8 * 2);
 953                         memcpy_interleaved(tmp_cb, tmp_cr, tmp_cbcr, qf.video_format.width * 8);
 954                         for (unsigned yy = 0; yy < 8; ++yy) {
 955                                 yptr[yy] = tmp_y + yy * width;
 956                                 cbptr[yy] = tmp_cb + yy * width / 2;
 957                                 crptr[yy] = tmp_cr + yy * width / 2;
 958                         }
 959                         jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
 960                 }
 961         } else {
 962                 assert(userdata->pixel_format == PixelFormat_8BitYCbCrPlanar);
 963
 964                 const movit::YCbCrFormat &ycbcr = userdata->ycbcr_format;
 965                 init_jpeg(width, height, qf.white_balance, &dest, &cinfo, ycbcr.chroma_subsampling_x, ycbcr.chroma_subsampling_y);
 966                 assert(ycbcr.chroma_subsampling_y <= 2);  // Or we'd need larger JSAMPROW arrays below.
 967
 968                 size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
 969                 const uint8_t *y_start = qf.frame->data + qf.video_format.width * field_start_line;
 970                 const uint8_t *cb_start = y_start + width * height;
 971                 const uint8_t *cr_start = cb_start + (width / ycbcr.chroma_subsampling_x) * (height / ycbcr.chroma_subsampling_y);
 972
 973                 size_t block_height_y = 8 * ycbcr.chroma_subsampling_y;
 974                 size_t block_height_cbcr = 8;
 975
 976                 JSAMPROW yptr[16], cbptr[16], crptr[16];
 977                 JSAMPARRAY data[3] = { yptr, cbptr, crptr };
 978                 for (unsigned y = 0; y < qf.video_format.height; y += block_height_y) {
 979                         for (unsigned yy = 0; yy < block_height_y; ++yy) {
 980                                 yptr[yy] = const_cast<JSAMPROW>(y_start) + (y + yy) * width;
 981                         }
 982                         unsigned cbcr_y = y / ycbcr.chroma_subsampling_y;
 983                         for (unsigned yy = 0; yy < block_height_cbcr; ++yy) {
 984                                 cbptr[yy] = const_cast<JSAMPROW>(cb_start) + (cbcr_y + yy) * width / ycbcr.chroma_subsampling_x;
 985                                 crptr[yy] = const_cast<JSAMPROW>(cr_start) + (cbcr_y + yy) * width / ycbcr.chroma_subsampling_x;
 986                         }
 987                         jpeg_write_raw_data(&cinfo, data, block_height_y);
 988                 }
 989         }
 990         jpeg_finish_compress(&cinfo);
 991
 992         return dest.dest;
 993 }
 994
 995 void MJPEGEncoder::add_stream(HTTPD::StreamID stream_id)
 996 {
 997         AVFormatContextWithCloser avctx;
 998
 999         // Set up the mux. We don't use the Mux wrapper, because it's geared towards
1000         // a situation with only one video stream (and possibly one audio stream)
1001         // with known width/height, and we don't need the extra functionality it provides.
1002         avctx.reset(avformat_alloc_context());
1003         avctx->oformat = av_guess_format("nut", nullptr, nullptr);
1004
1005         uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
1006         avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, &ffmpeg_contexts[stream_id], nullptr, nullptr, nullptr);
1007         avctx->pb->write_data_type = &MJPEGEncoder::write_packet2_thunk;
1008         avctx->flags = AVFMT_FLAG_CUSTOM_IO;
1009
1010         if (stream_id.type == HTTPD::MULTICAM_STREAM) {
1011                 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
1012                         add_video_stream(avctx.get());
1013                 }
1014                 for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
1015                         add_audio_stream(avctx.get());
1016                 }
1017         } else {
1018                 assert(stream_id.type == HTTPD::SIPHON_STREAM);
1019                 add_video_stream(avctx.get());
1020                 add_audio_stream(avctx.get());
1021         }
1022         finalize_mux(avctx.get());
1023
1024         Stream s;
1025         s.avctx = move(avctx);
1026         streams[stream_id] = move(s);
1027 }
1028
1029 void MJPEGEncoder::update_siphon_streams()
1030 {
1031         // Bring the list of streams into sync with what the clients need.
1032         for (auto it = streams.begin(); it != streams.end(); ) {
1033                 if (it->first.type != HTTPD::SIPHON_STREAM) {
1034                         ++it;
1035                         continue;
1036                 }
1037                 if (httpd->get_num_connected_siphon_clients(it->first.index) == 0) {
1038                         av_free(it->second.avctx->pb->buffer);
1039                         streams.erase(it++);
1040                 } else {
1041                         ++it;
1042                 }
1043         }
1044         for (unsigned stream_idx = 0; stream_idx < MAX_VIDEO_CARDS; ++stream_idx) {
1045                 HTTPD::StreamID stream_id{ HTTPD::SIPHON_STREAM, stream_idx };
1046                 if (streams.count(stream_id) == 0 && httpd->get_num_connected_siphon_clients(stream_idx) > 0) {
1047                         add_stream(stream_id);
1048                 }
1049         }
1050 }
1051
1052 void MJPEGEncoder::create_ffmpeg_context(HTTPD::StreamID stream_id)
1053 {
1054         ffmpeg_contexts.emplace(stream_id, WritePacket2Context{ this, stream_id });
1055 }