]> git.sesse.net Git - nageru/commitdiff
Add functionality for MJPEG export.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 14 Oct 2018 22:14:22 +0000 (00:14 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 11 Nov 2018 13:20:38 +0000 (14:20 +0100)
This simply exports all incoming frames under /multicam.mp4,
as one big 4:2:2 MJPEG multi-video stream. The intended use is
for multi-camera recording, or for instant replay.

15 files changed:
README
decklink_capture.cpp
httpd.cpp
httpd.h
kaeru.cpp
meson.build
mixer.cpp
mixer.h
mjpeg_encoder.cpp [new file with mode: 0644]
mjpeg_encoder.h [new file with mode: 0644]
pbo_frame_allocator.cpp
quicksync_encoder.cpp
quicksync_encoder_impl.h
va_display_with_cleanup.h [new file with mode: 0644]
video_encoder.cpp

diff --git a/README b/README
index e02ea23d39cd9668be1e04689894aaf58b866f4a..1bc3d41ede44bccddce077fb89cc366bc0827cb2 100644 (file)
--- a/README
+++ b/README
@@ -61,6 +61,8 @@ Nageru currently needs:
  - ffmpeg for muxing, and for encoding audio. You will need at least
    version 3.1.
 
+ - libjpeg, for encoding the MJPEG multi-camera stream if requested.
+
  - Working OpenGL; Movit works with almost any modern OpenGL implementation.
    Nageru has been tested with Intel on Mesa (you want 11.2 or newer, due
    to critical stability bugfixes), and with NVIDIA's proprietary drivers.
index 881e1817b1b7262f295584ac24e7f6d80fd1b65e..0f48e3e77f48ffe18c74175675060b52ea02f50b 100644 (file)
@@ -265,6 +265,9 @@ HRESULT STDMETHODCALLTYPE DeckLinkCapture::VideoInputFrameArrived(
                        } else {
                                memcpy(current_video_frame.data, frame_bytes, num_bytes);
                        }
+                       if (current_video_frame.data_copy != nullptr) {
+                               memcpy(current_video_frame.data_copy, frame_bytes, num_bytes);
+                       }
                        current_video_frame.len += num_bytes;
 
                        video_format.width = width;
index f6441768bbdde1df3d31e2b42a2cb13430019376..8d33e75a4f6b5bde8e6f8b79aa23ffa007e8351f 100644 (file)
--- a/httpd.cpp
+++ b/httpd.cpp
@@ -58,11 +58,13 @@ void HTTPD::stop()
        }
 }
 
-void HTTPD::add_data(const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase)
+void HTTPD::add_data(StreamType stream_type, const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase)
 {
        unique_lock<mutex> lock(streams_mutex);
        for (Stream *stream : streams) {
-               stream->add_data(buf, size, keyframe ? Stream::DATA_TYPE_KEYFRAME : Stream::DATA_TYPE_OTHER, time, timebase);
+               if (stream->get_stream_type() == stream_type) {
+                       stream->add_data(buf, size, keyframe ? Stream::DATA_TYPE_KEYFRAME : Stream::DATA_TYPE_OTHER, time, timebase);
+               }
        }
 }
 
@@ -87,6 +89,12 @@ int HTTPD::answer_to_connection(MHD_Connection *connection,
        } else {
                framing = HTTPD::Stream::FRAMING_RAW;
        }
+       HTTPD::StreamType stream_type;
+       if (strcmp(url, "/multicam.mp4") == 0) {
+               stream_type = HTTPD::StreamType::MULTICAM_STREAM;
+       } else {
+               stream_type = HTTPD::StreamType::MAIN_STREAM;
+       }
 
        if (strcmp(url, "/metrics") == 0) {
                string contents = global_metrics.serialize();
@@ -121,8 +129,8 @@ int HTTPD::answer_to_connection(MHD_Connection *connection,
                return ret;
        }
 
-       HTTPD::Stream *stream = new HTTPD::Stream(this, framing);
-       stream->add_data(header.data(), header.size(), Stream::DATA_TYPE_HEADER, AV_NOPTS_VALUE, AVRational{ 1, 0 });
+       HTTPD::Stream *stream = new HTTPD::Stream(this, framing, stream_type);
+       stream->add_data(header[stream_type].data(), header[stream_type].size(), Stream::DATA_TYPE_HEADER, AV_NOPTS_VALUE, AVRational{ 1, 0 });
        {
                unique_lock<mutex> lock(streams_mutex);
                streams.insert(stream);
diff --git a/httpd.h b/httpd.h
index 57c649b61158c6f73b498aeca6556b97bc52f179..1ff5c51108facf03ca1b0d8fc2f076a63938e595 100644 (file)
--- a/httpd.h
+++ b/httpd.h
@@ -31,9 +31,15 @@ public:
        HTTPD();
        ~HTTPD();
 
+       enum StreamType {
+               MAIN_STREAM,
+               MULTICAM_STREAM,
+               NUM_STREAM_TYPES
+       };
+
        // Should be called before start().
-       void set_header(const std::string &data) {
-               header = data;
+       void set_header(StreamType stream_type, const std::string &data) {
+               header[stream_type] = data;
        }
 
        // Should be called before start() (due to threading issues).
@@ -47,7 +53,7 @@ public:
 
        void start(int port);
        void stop();
-       void add_data(const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase);
+       void add_data(StreamType stream_type, const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase);
        int64_t get_num_connected_clients() const {
                return metric_num_connected_clients.load();
        }
@@ -72,7 +78,8 @@ private:
                        FRAMING_RAW,
                        FRAMING_METACUBE
                };
-               Stream(HTTPD *parent, Framing framing) : parent(parent), framing(framing) {}
+               Stream(HTTPD *parent, Framing framing, StreamType stream_type)
+                       : parent(parent), framing(framing), stream_type(stream_type) {}
 
                static ssize_t reader_callback_thunk(void *cls, uint64_t pos, char *buf, size_t max);
                ssize_t reader_callback(uint64_t pos, char *buf, size_t max);
@@ -85,6 +92,7 @@ private:
                void add_data(const char *buf, size_t size, DataType data_type, int64_t time, AVRational timebase);
                void stop();
                HTTPD *get_parent() const { return parent; }
+               StreamType get_stream_type() const { return stream_type; }
 
        private:
                HTTPD *parent;
@@ -96,6 +104,7 @@ private:
                std::deque<std::string> buffered_data;  // Protected by <buffer_mutex>.
                size_t used_of_buffered_data = 0;  // How many bytes of the first element of <buffered_data> that is already used. Protected by <mutex>.
                size_t seen_keyframe = false;
+               StreamType stream_type;
        };
 
        MHD_Daemon *mhd = nullptr;
@@ -106,7 +115,7 @@ private:
                CORSPolicy cors_policy;
        };
        std::unordered_map<std::string, Endpoint> endpoints;
-       std::string header;
+       std::string header[NUM_STREAM_TYPES];
 
        // Metrics.
        std::atomic<int64_t> metric_num_connected_clients{0};
index 10f1e9394b2a5c0ddab4e42a226b0ff7af4ccc73..b3a9bb3e43f92e09501198cd54c190d3a5579538 100644 (file)
--- a/kaeru.cpp
+++ b/kaeru.cpp
@@ -31,6 +31,8 @@ BasicStats *global_basic_stats = nullptr;
 QuittableSleeper should_quit;
 MuxMetrics stream_mux_metrics;
 
+namespace {
+
 int write_packet(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
 {
        static bool seen_sync_markers = false;
@@ -47,13 +49,15 @@ int write_packet(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType ty
 
        if (type == AVIO_DATA_MARKER_HEADER) {
                stream_mux_header.append((char *)buf, buf_size);
-               httpd->set_header(stream_mux_header);
+               httpd->set_header(HTTPD::MAIN_STREAM, stream_mux_header);
        } else {
-               httpd->add_data((char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
+               httpd->add_data(HTTPD::MAIN_STREAM, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
        }
        return buf_size;
 }
 
+}  // namespace
+
 unique_ptr<Mux> create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x264_encoder, AudioEncoder *audio_encoder)
 {
        AVFormatContext *avctx = avformat_alloc_context();
index cc161f2dae4e4a12576960e7c89d398289564fa7..d35dcb5661a7c0b3a66d880a87003f79de3d131c 100644 (file)
@@ -190,7 +190,7 @@ nageru_link_with += audio
 # Mixer objects.
 srcs += ['chroma_subsampler.cpp', 'v210_converter.cpp', 'mixer.cpp', 'pbo_frame_allocator.cpp',
        'context.cpp', 'theme.cpp', 'image_input.cpp', 'alsa_output.cpp',
-       'disk_space_estimator.cpp', 'timecode_renderer.cpp', 'tweaked_inputs.cpp']
+       'disk_space_estimator.cpp', 'timecode_renderer.cpp', 'tweaked_inputs.cpp', 'mjpeg_encoder.cpp']
 
 # Streaming and encoding objects (largely the set that is shared between Nageru and Kaeru).
 stream_srcs = ['quicksync_encoder.cpp', 'x264_encoder.cpp', 'x264_dynamic.cpp', 'x264_speed_control.cpp', 'video_encoder.cpp',
index deaa8e73079710b7f13b8dffd6a9f084f0ae9b4c..294040f1bfd6fc6dde71a793af91c536e2c9f12e 100644 (file)
--- a/mixer.cpp
+++ b/mixer.cpp
 #include "flags.h"
 #include "input_mapping.h"
 #include "metrics.h"
+#include "mjpeg_encoder.h"
 #include "pbo_frame_allocator.h"
 #include "ref_counted_gl_sync.h"
 #include "resampling_queue.h"
 #include "timebase.h"
 #include "timecode_renderer.h"
 #include "v210_converter.h"
+#include "va_display_with_cleanup.h"
 #include "video_encoder.h"
 
 #undef Status
@@ -356,6 +358,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
        display_chain->finalize();
 
        video_encoder.reset(new VideoEncoder(resource_pool.get(), h264_encoder_surface, global_flags.va_display, global_flags.width, global_flags.height, &httpd, global_disk_space_estimator));
+       mjpeg_encoder.reset(new MJPEGEncoder(&httpd, global_flags.va_display));
 
        // Must be instantiated after VideoEncoder has initialized global_flags.use_zerocopy.
        theme.reset(new Theme(global_flags.theme_filename, global_flags.theme_dirs, resource_pool.get(), num_cards));
@@ -499,6 +502,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
 
 Mixer::~Mixer()
 {
+       mjpeg_encoder->stop();
        httpd.stop();
        BMUSBCapture::stop_bm_thread();
 
@@ -954,6 +958,9 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                        new_frame.upload_func = upload_func;
                        new_frame.dropped_frames = dropped_frames;
                        new_frame.received_timestamp = video_frame.received_timestamp;  // Ignore the audio timestamp.
+                       new_frame.video_format = video_format;
+                       new_frame.y_offset = y_offset;
+                       new_frame.cbcr_offset = cbcr_offset;
                        card->new_frames.push_back(move(new_frame));
                        card->jitter_history.frame_arrived(video_frame.received_timestamp, frame_length, dropped_frames);
                        card->may_have_dropped_last_frame = false;
@@ -1063,6 +1070,14 @@ void Mixer::thread_func()
                                new_frame->upload_func();
                                new_frame->upload_func = nullptr;
                        }
+
+                       // There are situations where we could possibly want to
+                       // include FFmpeg inputs (CEF inputs are unlikely),
+                       // but they're not necessarily in 4:2:2 Y'CbCr, so it would
+                       // require more functionality the the JPEG encoder.
+                       if (card_index < num_cards) {
+                               mjpeg_encoder->upload_frame(pts_int, card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset);
+                       }
                }
 
                int64_t frame_duration = output_frame_info.frame_duration;
diff --git a/mixer.h b/mixer.h
index 32a1ea476f21547137e680fb13675c80b6b42835..84313e5130b0816b2a997ee9265d637781cb3aed 100644 (file)
--- a/mixer.h
+++ b/mixer.h
@@ -42,6 +42,7 @@
 class ALSAOutput;
 class ChromaSubsampler;
 class DeckLinkOutput;
+class MJPEGEncoder;
 class QSurface;
 class QSurfaceFormat;
 class TimecodeRenderer;
@@ -475,6 +476,7 @@ private:
        std::unique_ptr<ChromaSubsampler> chroma_subsampler;
        std::unique_ptr<v210Converter> v210_converter;
        std::unique_ptr<VideoEncoder> video_encoder;
+       std::unique_ptr<MJPEGEncoder> mjpeg_encoder;
 
        std::unique_ptr<TimecodeRenderer> timecode_renderer;
        std::atomic<bool> display_timecode_in_stream{false};
@@ -530,6 +532,12 @@ private:
                        std::function<void()> upload_func;  // Needs to be called to actually upload the texture to OpenGL.
                        unsigned dropped_frames = 0;  // Number of dropped frames before this one.
                        std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min();
+
+                       // Used for MJPEG encoding. (upload_func packs everything it needs
+                       // into the functor, but would otherwise also use these.)
+                       // width=0 or height=0 means a broken frame, ie., do not upload.
+                       bmusb::VideoFormat video_format;
+                       size_t y_offset, cbcr_offset;
                };
                std::deque<NewFrame> new_frames;
                std::condition_variable new_frames_changed;  // Set whenever new_frames is changed.
diff --git a/mjpeg_encoder.cpp b/mjpeg_encoder.cpp
new file mode 100644 (file)
index 0000000..f3e17b3
--- /dev/null
@@ -0,0 +1,677 @@
+#include "mjpeg_encoder.h"
+
+#include <jpeglib.h>
+#include <unistd.h>
+#if __SSE2__
+#include <immintrin.h>
+#endif
+#include <list>
+
+extern "C" {
+#include <libavformat/avformat.h>
+}
+
+#include "defs.h"
+#include "ffmpeg_raii.h"
+#include "flags.h"
+#include "httpd.h"
+#include "memcpy_interleaved.h"
+#include "pbo_frame_allocator.h"
+#include "timebase.h"
+#include "va_display_with_cleanup.h"
+
+#include <va/va.h>
+#include <va/va_drm.h>
+#include <va/va_x11.h>
+
+using namespace bmusb;
+using namespace std;
+
+extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height);
+
+#define CHECK_VASTATUS(va_status, func)                                 \
+    if (va_status != VA_STATUS_SUCCESS) {                               \
+        fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+        exit(1);                                                        \
+    }
+
+// From libjpeg (although it's of course identical between implementations).
+static const int jpeg_natural_order[DCTSIZE2] = {
+        0,  1,  8, 16,  9,  2,  3, 10,
+       17, 24, 32, 25, 18, 11,  4,  5,
+       12, 19, 26, 33, 40, 48, 41, 34,
+       27, 20, 13,  6,  7, 14, 21, 28,
+       35, 42, 49, 56, 57, 50, 43, 36,
+       29, 22, 15, 23, 30, 37, 44, 51,
+       58, 59, 52, 45, 38, 31, 39, 46,
+       53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+struct VectorDestinationManager {
+       jpeg_destination_mgr pub;
+       std::vector<uint8_t> dest;
+
+       VectorDestinationManager()
+       {
+               pub.init_destination = init_destination_thunk;
+               pub.empty_output_buffer = empty_output_buffer_thunk;
+               pub.term_destination = term_destination_thunk;
+       }
+
+       static void init_destination_thunk(j_compress_ptr ptr)
+       {
+               ((VectorDestinationManager *)(ptr->dest))->init_destination();
+       }
+
+       inline void init_destination()
+       {
+               make_room(0);
+       }
+
+       static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
+       {
+               return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
+       }
+
+       inline bool empty_output_buffer()
+       {
+               make_room(dest.size());  // Should ignore pub.free_in_buffer!
+               return true;
+       }
+
+       inline void make_room(size_t bytes_used)
+       {
+               dest.resize(bytes_used + 4096);
+               dest.resize(dest.capacity());
+               pub.next_output_byte = dest.data() + bytes_used;
+               pub.free_in_buffer = dest.size() - bytes_used;
+       }
+
+       static void term_destination_thunk(j_compress_ptr ptr)
+       {
+               ((VectorDestinationManager *)(ptr->dest))->term_destination();
+       }
+
+       inline void term_destination()
+       {
+               dest.resize(dest.size() - pub.free_in_buffer);
+       }
+};
+static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
+
+int MJPEGEncoder::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
+{
+       MJPEGEncoder *engine = (MJPEGEncoder *)opaque;
+       return engine->write_packet2(buf, buf_size, type, time);
+}
+
+int MJPEGEncoder::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
+{
+       if (type == AVIO_DATA_MARKER_HEADER) {
+               mux_header.append((char *)buf, buf_size);
+               httpd->set_header(HTTPD::MULTICAM_STREAM, mux_header);
+       } else {
+               httpd->add_data(HTTPD::MULTICAM_STREAM, (char *)buf, buf_size, /*keyframe=*/true, AV_NOPTS_VALUE, AVRational{ AV_TIME_BASE, 1 });
+       }
+       return buf_size;
+}
+
+MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display)
+       : httpd(httpd)
+{
+       encoder_thread = thread(&MJPEGEncoder::encoder_thread_func, this);
+
+       // Set up the mux. We don't use the Mux wrapper, because it's geared towards
+       // a situation with only one video stream (and possibly one audio stream)
+       // with known width/height, and we don't need the extra functionality it provides.
+       avctx.reset(avformat_alloc_context());
+       avctx->oformat = av_guess_format("mp4", nullptr, nullptr);
+
+       uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
+       avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
+       avctx->pb->write_data_type = &MJPEGEncoder::write_packet2_thunk;
+       avctx->flags = AVFMT_FLAG_CUSTOM_IO;
+
+       for (int card_idx = 0; card_idx < global_flags.num_cards; ++card_idx) {
+               AVStream *stream = avformat_new_stream(avctx.get(), nullptr);
+               if (stream == nullptr) {
+                       fprintf(stderr, "avformat_new_stream() failed\n");
+                       exit(1);
+               }
+               stream->time_base = AVRational{ 1, TIMEBASE };
+               stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+               stream->codecpar->codec_id = AV_CODEC_ID_MJPEG;
+
+               // Used for aspect ratio only. Can change without notice (the mux won't care).
+               stream->codecpar->width = global_flags.width;
+               stream->codecpar->height = global_flags.height;
+
+               // TODO: We could perhaps use the interpretation for each card here
+               // (or at least the command-line flags) instead of the defaults,
+               // but what would we do when they change?
+               stream->codecpar->color_primaries = AVCOL_PRI_BT709;
+               stream->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1;
+               stream->codecpar->color_space = AVCOL_SPC_BT709;
+               stream->codecpar->color_range = AVCOL_RANGE_MPEG;
+               stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
+               stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
+       }
+
+       AVDictionary *options = NULL;
+       vector<pair<string, string>> opts = MUX_OPTS;
+       for (pair<string, string> opt : opts) {
+               av_dict_set(&options, opt.first.c_str(), opt.second.c_str(), 0);
+       }
+       if (avformat_write_header(avctx.get(), &options) < 0) {
+               fprintf(stderr, "avformat_write_header() failed\n");
+               exit(1);
+       }
+
+       // Initialize VA-API.
+       string error;
+       va_dpy = try_open_va(va_display, &error, &config_id);
+       if (va_dpy == nullptr) {
+               fprintf(stderr, "Could not initialize VA-API for MJPEG encoding: %s. JPEGs will be encoded in software if needed.\n", error.c_str());
+       }
+
+       running = true;
+}
+
+void MJPEGEncoder::stop()
+{
+       if (!running) {
+               return;
+       }
+       running = false;
+       should_quit = true;
+       any_frames_to_be_encoded.notify_all();
+       encoder_thread.join();
+}
+
+unique_ptr<VADisplayWithCleanup> MJPEGEncoder::try_open_va(const string &va_display, string *error, VAConfigID *config_id)
+{
+       unique_ptr<VADisplayWithCleanup> va_dpy = va_open_display(va_display);
+       if (va_dpy == nullptr) {
+               if (error) *error = "Opening VA display failed";
+               return nullptr;
+       }
+       int major_ver, minor_ver;
+       VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver);
+       if (va_status != VA_STATUS_SUCCESS) {
+               char buf[256];
+               snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status);
+               if (error != nullptr) *error = buf;
+               return nullptr;
+       }
+
+       VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 };
+       va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointEncPicture,
+               &attr, 1, config_id);
+       if (va_status == VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT) {
+               if (error != nullptr) *error = "No hardware support";
+               return nullptr;
+       } else if (va_status != VA_STATUS_SUCCESS) {
+               char buf[256];
+               snprintf(buf, sizeof(buf), "vaCreateConfig() failed with status %d\n", va_status);
+               if (error != nullptr) *error = buf;
+               return nullptr;
+       }
+
+       int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy);
+       assert(num_formats > 0);
+
+       unique_ptr<VAImageFormat[]> formats(new VAImageFormat[num_formats]);
+       va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats);
+       if (va_status != VA_STATUS_SUCCESS) {
+               char buf[256];
+               snprintf(buf, sizeof(buf), "vaQueryImageFormats() failed with status %d\n", va_status);
+               if (error != nullptr) *error = buf;
+               return nullptr;
+       }
+
+       return va_dpy;
+}
+
+void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset)
+{
+       PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
+       if (video_format.width == 0 || video_format.height == 0) {
+               return;
+       }
+       if (video_format.interlaced) {
+               fprintf(stderr, "Card %u: Ignoring JPEG encoding for interlaced frame\n", card_index);
+               return;
+       }
+       if (userdata->pixel_format != PixelFormat_8BitYCbCr ||
+           !frame->interleaved) {
+               fprintf(stderr, "Card %u: Ignoring JPEG encoding for unsupported pixel format\n", card_index);
+               return;
+       }
+       if (video_format.width > 4096 || video_format.height > 4096) {
+               fprintf(stderr, "Card %u: Ignoring JPEG encoding for oversized frame\n", card_index);
+               return;
+       }
+
+       lock_guard<mutex> lock(mu);
+       frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset });
+       any_frames_to_be_encoded.notify_all();
+}
+
+void MJPEGEncoder::encoder_thread_func()
+{
+       pthread_setname_np(pthread_self(), "MJPEG_Encode");
+       posix_memalign((void **)&tmp_y, 4096, 4096 * 8);
+       posix_memalign((void **)&tmp_cbcr, 4096, 4096 * 8);
+       posix_memalign((void **)&tmp_cb, 4096, 4096 * 8);
+       posix_memalign((void **)&tmp_cr, 4096, 4096 * 8);
+
+       unique_lock<mutex> lock(mu);
+       for (;;) {
+               any_frames_to_be_encoded.wait(lock, [this] { return !frames_to_be_encoded.empty() || should_quit; });
+               if (should_quit) return;
+               QueuedFrame qf = move(frames_to_be_encoded.front());
+               frames_to_be_encoded.pop();
+
+               vector<uint8_t> jpeg = encode_jpeg(qf);
+
+               AVPacket pkt;
+               memset(&pkt, 0, sizeof(pkt));
+               pkt.buf = nullptr;
+               pkt.data = &jpeg[0];
+               pkt.size = jpeg.size();
+               pkt.stream_index = qf.card_index;
+               pkt.flags = AV_PKT_FLAG_KEY;
+               pkt.pts = pkt.dts = qf.pts;
+
+               if (av_write_frame(avctx.get(), &pkt) < 0) {
+                       fprintf(stderr, "av_write_frame() failed\n");
+                       exit(1);
+               }
+       }
+}
+
+class VABufferDestroyer {
+public:
+       VABufferDestroyer(VADisplay dpy, VABufferID buf)
+               : dpy(dpy), buf(buf) {}
+
+       ~VABufferDestroyer() {
+               VAStatus va_status = vaDestroyBuffer(dpy, buf);
+               CHECK_VASTATUS(va_status, "vaDestroyBuffer");
+       }
+
+private:
+       VADisplay dpy;
+       VABufferID buf;
+};
+
+MJPEGEncoder::VAResources MJPEGEncoder::get_va_resources(unsigned width, unsigned height)
+{
+       {
+               lock_guard<mutex> lock(va_resources_mutex);
+               for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) {
+                       if (it->width == width && it->height == height) {
+                               VAResources ret = *it;
+                               va_resources_freelist.erase(it);
+                               return ret;
+                       }
+               }
+       }
+
+       VAResources ret;
+
+       ret.width = width;
+       ret.height = height;
+
+       VASurfaceAttrib attrib;
+       attrib.flags = VA_SURFACE_ATTRIB_SETTABLE;
+       attrib.type = VASurfaceAttribPixelFormat;
+       attrib.value.type = VAGenericValueTypeInteger;
+       attrib.value.value.i = VA_FOURCC_UYVY;
+
+       VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
+               width, height,
+               &ret.surface, 1, &attrib, 1);
+       CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+       va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
+       CHECK_VASTATUS(va_status, "vaCreateContext");
+
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncCodedBufferType, width * height * 3 + 8192, 1, nullptr, &ret.data_buffer);
+       CHECK_VASTATUS(va_status, "vaCreateBuffer");
+
+       return ret;
+}
+
+void MJPEGEncoder::release_va_resources(MJPEGEncoder::VAResources resources)
+{
+       lock_guard<mutex> lock(va_resources_mutex);
+       if (va_resources_freelist.size() > 10) {
+               auto it = va_resources_freelist.end();
+               --it;
+
+               VAStatus va_status = vaDestroyBuffer(va_dpy->va_dpy, it->data_buffer);
+               CHECK_VASTATUS(va_status, "vaDestroyBuffer");
+
+               va_status = vaDestroyContext(va_dpy->va_dpy, it->context);
+               CHECK_VASTATUS(va_status, "vaDestroyContext");
+
+               va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1);
+               CHECK_VASTATUS(va_status, "vaDestroySurfaces");
+
+               va_resources_freelist.erase(it);
+       }
+
+       va_resources_freelist.push_front(resources);
+}
+
+void MJPEGEncoder::init_jpeg_422(unsigned width, unsigned height, VectorDestinationManager *dest, jpeg_compress_struct *cinfo)
+{
+       jpeg_error_mgr jerr;
+       cinfo->err = jpeg_std_error(&jerr);
+       jpeg_create_compress(cinfo);
+
+       cinfo->dest = (jpeg_destination_mgr *)dest;
+
+       cinfo->input_components = 3;
+       jpeg_set_defaults(cinfo);
+       jpeg_set_quality(cinfo, quality, /*force_baseline=*/false);
+
+       cinfo->image_width = width;
+       cinfo->image_height = height;
+       cinfo->raw_data_in = true;
+       jpeg_set_colorspace(cinfo, JCS_YCbCr);
+       cinfo->comp_info[0].h_samp_factor = 2;
+       cinfo->comp_info[0].v_samp_factor = 1;
+       cinfo->comp_info[1].h_samp_factor = 1;
+       cinfo->comp_info[1].v_samp_factor = 1;
+       cinfo->comp_info[2].h_samp_factor = 1;
+       cinfo->comp_info[2].v_samp_factor = 1;
+       cinfo->CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
+       jpeg_start_compress(cinfo, true);
+}
+
+vector<uint8_t> MJPEGEncoder::get_jpeg_header(unsigned width, unsigned height, jpeg_compress_struct *cinfo)
+{
+       VectorDestinationManager dest;
+       init_jpeg_422(width, height, &dest, cinfo);
+
+       // Make a dummy black image; there's seemingly no other easy way of
+       // making libjpeg outputting all of its headers.
+       JSAMPROW yptr[8], cbptr[8], crptr[8];
+       JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+       memset(tmp_y, 0, 4096);
+       memset(tmp_cb, 0, 4096);
+       memset(tmp_cr, 0, 4096);
+       for (unsigned yy = 0; yy < 8; ++yy) {
+               yptr[yy] = tmp_y;
+               cbptr[yy] = tmp_cb;
+               crptr[yy] = tmp_cr;
+       }
+       for (unsigned y = 0; y < height; y += 8) {
+               jpeg_write_raw_data(cinfo, data, /*num_lines=*/8);
+       }
+       jpeg_finish_compress(cinfo);
+
+       // We're only interested in the header, not the data after it.
+       dest.term_destination();
+       for (size_t i = 0; i < dest.dest.size() - 1; ++i) {
+               if (dest.dest[i] == 0xff && dest.dest[i + 1] == 0xda) {  // Start of scan (SOS).
+                       unsigned len = dest.dest[i + 2] * 256 + dest.dest[i + 3];
+                       dest.dest.resize(i + len + 2);
+                       break;
+               }
+       }
+
+       return dest.dest;
+}
+
+MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_resolution(unsigned width, unsigned height)
+{
+       pair<unsigned, unsigned> key(width, height);
+       if (va_data_for_resolution.count(key)) {
+               return va_data_for_resolution[key];
+       }
+
+       // Use libjpeg to generate a header and set sane defaults for e.g.
+       // quantization tables. Then do the actual encode with VA-API.
+       jpeg_compress_struct cinfo;
+       vector<uint8_t> jpeg_header = get_jpeg_header(width, height, &cinfo);
+
+       // Picture parameters.
+       VAEncPictureParameterBufferJPEG pic_param;
+       memset(&pic_param, 0, sizeof(pic_param));
+       pic_param.reconstructed_picture = VA_INVALID_ID;
+       pic_param.picture_width = cinfo.image_width;
+       pic_param.picture_height = cinfo.image_height;
+       for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
+               const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
+               pic_param.component_id[component_idx] = comp->component_id;
+               pic_param.quantiser_table_selector[component_idx] = comp->quant_tbl_no;
+       }
+       pic_param.num_components = cinfo.num_components;
+       pic_param.num_scan = 1;
+       pic_param.sample_bit_depth = 8;
+       pic_param.coded_buf = VA_INVALID_ID;  // To be filled out by caller.
+       pic_param.pic_flags.bits.huffman = 1;
+       pic_param.quality = 50;  // Don't scale the given quantization matrices. (See gen8_mfc_jpeg_fqm_state)
+
+       // Quantization matrices.
+       VAQMatrixBufferJPEG q;
+       memset(&q, 0, sizeof(q));
+
+       q.load_lum_quantiser_matrix = true;
+       q.load_chroma_quantiser_matrix = true;
+       for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
+               const JQUANT_TBL *qtbl = cinfo.quant_tbl_ptrs[quant_tbl_idx];
+               assert((qtbl == nullptr) == (quant_tbl_idx >= 2));
+               if (qtbl == nullptr) continue;
+
+               uint8_t *qmatrix = (quant_tbl_idx == 0) ? q.lum_quantiser_matrix : q.chroma_quantiser_matrix;
+               for (int i = 0; i < 64; ++i) {
+                       if (qtbl->quantval[i] > 255) {
+                               fprintf(stderr, "Baseline JPEG only!\n");
+                               abort();
+                       }
+                       qmatrix[i] = qtbl->quantval[jpeg_natural_order[i]];
+               }
+       }
+
+       // Huffman tables (arithmetic is not supported).
+       VAHuffmanTableBufferJPEGBaseline huff;
+       memset(&huff, 0, sizeof(huff));
+
+       for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
+               const JHUFF_TBL *ac_hufftbl = cinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
+               const JHUFF_TBL *dc_hufftbl = cinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
+               if (ac_hufftbl == nullptr) {
+                       assert(dc_hufftbl == nullptr);
+                       huff.load_huffman_table[huff_tbl_idx] = 0;
+               } else {
+                       assert(dc_hufftbl != nullptr);
+                       huff.load_huffman_table[huff_tbl_idx] = 1;
+
+                       for (int i = 0; i < 16; ++i) {
+                               huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
+                       }
+                       for (int i = 0; i < 12; ++i) {
+                               huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
+                       }
+                       for (int i = 0; i < 16; ++i) {
+                               huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
+                       }
+                       for (int i = 0; i < 162; ++i) {
+                               huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
+                       }
+               }
+       }
+
+       // Slice parameters (metadata about the slice).
+       VAEncSliceParameterBufferJPEG parms;
+       memset(&parms, 0, sizeof(parms));
+       for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) {
+               const jpeg_component_info *comp = &cinfo.comp_info[component_idx];
+               parms.components[component_idx].component_selector = comp->component_id;
+               parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
+               parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
+               if (parms.components[component_idx].dc_table_selector > 1 ||
+                   parms.components[component_idx].ac_table_selector > 1) {
+                       fprintf(stderr, "Uses too many Huffman tables\n");
+                       abort();
+               }
+       }
+       parms.num_components = cinfo.num_components;
+       parms.restart_interval = cinfo.restart_interval;
+
+       jpeg_destroy_compress(&cinfo);
+
+       VAData ret;
+       ret.jpeg_header = move(jpeg_header);
+       ret.pic_param = pic_param;
+       ret.q = q;
+       ret.huff = huff;
+       ret.parms = parms;
+       va_data_for_resolution[key] = ret;
+       return ret;
+}
+
+vector<uint8_t> MJPEGEncoder::encode_jpeg(const QueuedFrame &qf)
+{
+       if (va_dpy != nullptr) {
+               return encode_jpeg_va(qf);
+       } else {
+               return encode_jpeg_libjpeg(qf);
+       }
+}
+
+vector<uint8_t> MJPEGEncoder::encode_jpeg_va(const QueuedFrame &qf)
+{
+       unsigned width = qf.video_format.width;
+       unsigned height = qf.video_format.height;
+
+       VAResources resources = get_va_resources(width, height);
+       ReleaseVAResources release(this, resources);
+
+       VAData va_data = get_va_data_for_resolution(width, height);
+       va_data.pic_param.coded_buf = resources.data_buffer;
+
+       VABufferID pic_param_buffer;
+       VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPictureParameterBufferType, sizeof(va_data.pic_param), 1, &va_data.pic_param, &pic_param_buffer);
+       CHECK_VASTATUS(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
+
+       VABufferID q_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAQMatrixBufferType, sizeof(va_data.q), 1, &va_data.q, &q_buffer);
+       CHECK_VASTATUS(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_iq(va_dpy->va_dpy, q_buffer);
+
+       VABufferID huff_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAHuffmanTableBufferType, sizeof(va_data.huff), 1, &va_data.huff, &huff_buffer);
+       CHECK_VASTATUS(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
+
+       VABufferID slice_param_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncSliceParameterBufferType, sizeof(va_data.parms), 1, &va_data.parms, &slice_param_buffer);
+       CHECK_VASTATUS(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
+
+       VAImage image;
+       va_status = vaDeriveImage(va_dpy->va_dpy, resources.surface, &image);
+       CHECK_VASTATUS(va_status, "vaDeriveImage");
+
+       // Upload the pixel data.
+       uint8_t *surface_p = nullptr;
+       vaMapBuffer(va_dpy->va_dpy, image.buf, (void **)&surface_p);
+
+       size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
+       size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
+
+       {
+               const uint8_t *src = qf.frame->data_copy + field_start;
+               uint8_t *dst = (unsigned char *)surface_p + image.offsets[0];
+               memcpy_with_pitch(dst, src, qf.video_format.width * 2, image.pitches[0], qf.video_format.height);
+       }
+
+       va_status = vaUnmapBuffer(va_dpy->va_dpy, image.buf);
+       CHECK_VASTATUS(va_status, "vaUnmapBuffer");
+       va_status = vaDestroyImage(va_dpy->va_dpy, image.image_id);
+       CHECK_VASTATUS(va_status, "vaDestroyImage");
+
+       // Finally, stick in the JPEG header.
+       VAEncPackedHeaderParameterBuffer header_parm;
+       header_parm.type = VAEncPackedHeaderRawData;
+       header_parm.bit_length = 8 * va_data.jpeg_header.size();
+
+       VABufferID header_parm_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPackedHeaderParameterBufferType, sizeof(header_parm), 1, &header_parm, &header_parm_buffer);
+       CHECK_VASTATUS(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_header(va_dpy->va_dpy, header_parm_buffer);
+
+       VABufferID header_data_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPackedHeaderDataBufferType, va_data.jpeg_header.size(), 1, va_data.jpeg_header.data(), &header_data_buffer);
+       CHECK_VASTATUS(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_header_data(va_dpy->va_dpy, header_data_buffer);
+
+       va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
+       CHECK_VASTATUS(va_status, "vaBeginPicture");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
+       CHECK_VASTATUS(va_status, "vaRenderPicture(pic_param)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &q_buffer, 1);
+       CHECK_VASTATUS(va_status, "vaRenderPicture(q)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
+       CHECK_VASTATUS(va_status, "vaRenderPicture(huff)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
+       CHECK_VASTATUS(va_status, "vaRenderPicture(slice_param)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_parm_buffer, 1);
+       CHECK_VASTATUS(va_status, "vaRenderPicture(header_parm)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_data_buffer, 1);
+       CHECK_VASTATUS(va_status, "vaRenderPicture(header_data)");
+       va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
+       CHECK_VASTATUS(va_status, "vaEndPicture");
+
+       va_status = vaSyncSurface(va_dpy->va_dpy, resources.surface);
+       CHECK_VASTATUS(va_status, "vaSyncSurface");
+
+       VACodedBufferSegment *segment;
+       va_status = vaMapBuffer(va_dpy->va_dpy, resources.data_buffer, (void **)&segment);
+       CHECK_VASTATUS(va_status, "vaMapBuffer");
+
+       const char *coded_buf = reinterpret_cast<char *>(segment->buf);
+       vector<uint8_t> jpeg(coded_buf, coded_buf + segment->size);
+
+       va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.data_buffer);
+       CHECK_VASTATUS(va_status, "vaUnmapBuffer");
+
+       return jpeg;
+}
+
+vector<uint8_t> MJPEGEncoder::encode_jpeg_libjpeg(const QueuedFrame &qf)
+{
+       unsigned width = qf.video_format.width;
+       unsigned height = qf.video_format.height;
+
+       VectorDestinationManager dest;
+       jpeg_compress_struct cinfo;
+       init_jpeg_422(width, height, &dest, &cinfo);
+
+       size_t field_start_line = qf.video_format.extra_lines_top;  // No interlacing support.
+       size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2;
+
+       JSAMPROW yptr[8], cbptr[8], crptr[8];
+       JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+       for (unsigned y = 0; y < qf.video_format.height; y += 8) {
+               const uint8_t *src = qf.frame->data_copy + field_start + y * qf.video_format.width * 2;
+
+               memcpy_interleaved(tmp_y, tmp_cbcr, src, qf.video_format.width * 8 * 2);
+               memcpy_interleaved(tmp_cb, tmp_cr, tmp_cbcr, qf.video_format.width * 8);
+               for (unsigned yy = 0; yy < 8; ++yy) {
+                       yptr[yy] = tmp_y + yy * width;
+                       cbptr[yy] = tmp_cb + yy * width / 2;
+                       crptr[yy] = tmp_cr + yy * width / 2;
+               }
+               jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
+       }
+       jpeg_finish_compress(&cinfo);
+
+       return dest.dest;
+}
diff --git a/mjpeg_encoder.h b/mjpeg_encoder.h
new file mode 100644 (file)
index 0000000..ab8632a
--- /dev/null
@@ -0,0 +1,121 @@
+#ifndef _MJPEG_ENCODER_H
+#define _MJPEG_ENCODER_H 1
+
+#include "ffmpeg_raii.h"
+#include "ref_counted_frame.h"
+
+extern "C" {
+
+#include <libavformat/avio.h>
+
+}  // extern "C"
+
+#include <atomic>
+#include <bmusb/bmusb.h>
+#include <condition_variable>
+#include <list>
+#include <mutex>
+#include <queue>
+#include <stdint.h>
+#include <string>
+#include <thread>
+
+#include <va/va.h>
+
+class HTTPD;
+struct jpeg_compress_struct;
+struct VADisplayWithCleanup;
+struct VectorDestinationManager;
+
+class MJPEGEncoder {
+public:
+       MJPEGEncoder(HTTPD *httpd, const std::string &va_display);
+       void stop();
+       void upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset);
+
+private:
+       static constexpr int quality = 90;
+
+       struct QueuedFrame {
+               int64_t pts;
+               unsigned card_index;
+               RefCountedFrame frame;
+               bmusb::VideoFormat video_format;
+               size_t y_offset, cbcr_offset;
+       };
+
+       void encoder_thread_func();
+       std::vector<uint8_t> encode_jpeg(const QueuedFrame &qf);
+       std::vector<uint8_t> encode_jpeg_va(const QueuedFrame &qf);
+       std::vector<uint8_t> encode_jpeg_libjpeg(const QueuedFrame &qf);
+       void init_jpeg_422(unsigned width, unsigned height, VectorDestinationManager *dest, jpeg_compress_struct *cinfo);
+       std::vector<uint8_t> get_jpeg_header(unsigned width, unsigned height, jpeg_compress_struct *cinfo);
+
+       static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+       int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+
+       std::thread encoder_thread;
+
+       std::mutex mu;
+       std::queue<QueuedFrame> frames_to_be_encoded;  // Under mu.
+       std::condition_variable any_frames_to_be_encoded;
+
+       std::queue<QueuedFrame> frames_encoding;  // Under mu.
+       std::condition_variable any_frames_encoding;
+
+       AVFormatContextWithCloser avctx;
+       HTTPD *httpd;
+       std::string mux_header;
+       std::atomic<bool> should_quit{false};
+       bool running = false;
+
+       std::unique_ptr<VADisplayWithCleanup> va_dpy;
+       VAConfigID config_id;
+
+       struct VAData {
+               std::vector<uint8_t> jpeg_header;
+               VAEncPictureParameterBufferJPEG pic_param;
+               VAQMatrixBufferJPEG q;
+               VAHuffmanTableBufferJPEGBaseline huff;
+               VAEncSliceParameterBufferJPEG parms;
+       };
+       std::map<std::pair<unsigned, unsigned>, VAData> va_data_for_resolution;
+       VAData get_va_data_for_resolution(unsigned width, unsigned height);
+
+       struct VAResources {
+               unsigned width, height;
+               VASurfaceID surface;
+               VAContextID context;
+               VABufferID data_buffer;
+       };
+       std::list<VAResources> va_resources_freelist;
+       std::mutex va_resources_mutex;
+       VAResources get_va_resources(unsigned width, unsigned height);
+       void release_va_resources(VAResources resources);
+
+       // RAII wrapper to release VAResources on return (even on error).
+       class ReleaseVAResources {
+       public:
+               ReleaseVAResources(MJPEGEncoder *mjpeg, const VAResources &resources)
+                       : mjpeg(mjpeg), resources(resources) {}
+               ~ReleaseVAResources()
+               {
+                       if (!committed) {
+                               mjpeg->release_va_resources(resources);
+                       }
+               }
+
+               void commit() { committed = true; }
+
+       private:
+               MJPEGEncoder * const mjpeg;
+               const VAResources &resources;
+               bool committed = false;
+       };
+
+       static std::unique_ptr<VADisplayWithCleanup> try_open_va(const std::string &va_display, std::string *error, VAConfigID *config_id);
+
+       uint8_t *tmp_y, *tmp_cbcr, *tmp_cb, *tmp_cr;  // Private to the encoder thread.
+};
+
+#endif  // !defined(_MJPEG_ENCODER_H)
index ea17f1259fde4076320c9ebdb37da6c714a6c5f3..c8687025e2f2f1d5dc0d193a07035b01e272f970 100644 (file)
@@ -52,6 +52,7 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w
        Frame frame;
        frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT);
        frame.data2 = frame.data + frame_size / 2;
+       frame.data_copy = new uint8_t[frame_size];
        check_error();
        frame.size = frame_size;
        frame.userdata = &userdata[frame_idx];
@@ -214,6 +215,8 @@ PBOFrameAllocator::~PBOFrameAllocator()
 
 void PBOFrameAllocator::destroy_frame(Frame *frame)
 {
+       delete[] frame->data_copy;
+
        GLuint pbo = ((Userdata *)frame->userdata)->pbo;
        glBindBuffer(buffer, pbo);
        check_error();
index 5200ce61b4911ab4e18692da0bfc17b483ae548d..bca7ffd91983e73ed819d7889c9925cd58acae2f 100644 (file)
@@ -1939,8 +1939,6 @@ void QuickSyncEncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_
        stream_mux->add_packet(pkt, pts, pts);
 }
 
-namespace {
-
 void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height)
 {
        if (src_width == dst_pitch) {
@@ -1954,8 +1952,6 @@ void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_
        }
 }
 
-}  // namespace
-
 void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration)
 {
        // Wait for the GPU to be done with the frame.
index 7645b99b6b34edf5e8afd83b91a6a9e7f981ba10..0317b6af0393723bb1ca38ac0100947d8c800c85 100644 (file)
@@ -20,6 +20,7 @@
 #include "timebase.h"
 #include "print_latency.h"
 #include "ref_counted_gl_sync.h"
+#include "va_display_with_cleanup.h"
 
 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
 #define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
@@ -39,16 +40,6 @@ class DiskSpaceEstimator;
 class QSurface;
 class X264Encoder;
 
-struct VADisplayWithCleanup {
-       ~VADisplayWithCleanup();
-
-       VADisplay va_dpy;
-       Display *x11_display = nullptr;
-       bool can_use_zerocopy = true;
-       int drm_fd = -1;
-};
-std::unique_ptr<VADisplayWithCleanup> va_open_display(const std::string &va_display);  // Can return nullptr on failure.
-
 class QuickSyncEncoderImpl {
 public:
        QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator);
diff --git a/va_display_with_cleanup.h b/va_display_with_cleanup.h
new file mode 100644 (file)
index 0000000..3f9b1c5
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef _VA_DISPLAY_WITH_CLEANUP
+#define _VA_DISPLAY_WITH_CLEANUP 1
+
+#include <va/va.h>
+#include <X11/Xlib.h>
+
+#include <memory>
+
+struct VADisplayWithCleanup {
+       ~VADisplayWithCleanup();
+
+       VADisplay va_dpy;
+       Display *x11_display = nullptr;
+       bool can_use_zerocopy = true;
+       int drm_fd = -1;
+};
+std::unique_ptr<VADisplayWithCleanup> va_open_display(const std::string &va_display);  // Can return nullptr on failure.
+
+#endif  // !defined(_VA_DISPLAY_WITH_CLEANUP)
index 6344b8c6a04a89e0fe6e9e312999a2ad449ab93f..2b8fcd5edec9e0a9ea3a79aed79d39d02494375d 100644 (file)
@@ -216,9 +216,9 @@ int VideoEncoder::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType t
 
        if (type == AVIO_DATA_MARKER_HEADER) {
                stream_mux_header.append((char *)buf, buf_size);
-               httpd->set_header(stream_mux_header);
+               httpd->set_header(HTTPD::MAIN_STREAM, stream_mux_header);
        } else {
-               httpd->add_data((char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
+               httpd->add_data(HTTPD::MAIN_STREAM, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
        }
        return buf_size;
 }