From: Steinar H. Gunderson Date: Sun, 14 Oct 2018 22:14:22 +0000 (+0200) Subject: Add functionality for MJPEG export. X-Git-Tag: 1.8.0~58^2 X-Git-Url: https://git.sesse.net/?p=nageru;a=commitdiff_plain;h=8e86ae071c057a26e933b8452ac541050ba2cb9f Add functionality for MJPEG export. This simply exports all incoming frames under /multicam.mp4, as one big 4:2:2 MJPEG multi-video stream. The intended use is for multi-camera recording, or for instant replay. --- diff --git a/bmusb b/bmusb index b447be0..e9aa80b 160000 --- a/bmusb +++ b/bmusb @@ -1 +1 @@ -Subproject commit b447be096623b949a2117c71af03bb96d329d988 +Subproject commit e9aa80b8e6a09cf2c8b3458a1c395b6f1b6a58cc diff --git a/decklink_capture.cpp b/decklink_capture.cpp index 881e181..0f48e3e 100644 --- a/decklink_capture.cpp +++ b/decklink_capture.cpp @@ -265,6 +265,9 @@ HRESULT STDMETHODCALLTYPE DeckLinkCapture::VideoInputFrameArrived( } else { memcpy(current_video_frame.data, frame_bytes, num_bytes); } + if (current_video_frame.data_copy != nullptr) { + memcpy(current_video_frame.data_copy, frame_bytes, num_bytes); + } current_video_frame.len += num_bytes; video_format.width = width; diff --git a/httpd.cpp b/httpd.cpp index f644176..8d33e75 100644 --- a/httpd.cpp +++ b/httpd.cpp @@ -58,11 +58,13 @@ void HTTPD::stop() } } -void HTTPD::add_data(const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase) +void HTTPD::add_data(StreamType stream_type, const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase) { unique_lock lock(streams_mutex); for (Stream *stream : streams) { - stream->add_data(buf, size, keyframe ? Stream::DATA_TYPE_KEYFRAME : Stream::DATA_TYPE_OTHER, time, timebase); + if (stream->get_stream_type() == stream_type) { + stream->add_data(buf, size, keyframe ? Stream::DATA_TYPE_KEYFRAME : Stream::DATA_TYPE_OTHER, time, timebase); + } } } @@ -87,6 +89,12 @@ int HTTPD::answer_to_connection(MHD_Connection *connection, } else { framing = HTTPD::Stream::FRAMING_RAW; } + HTTPD::StreamType stream_type; + if (strcmp(url, "/multicam.mp4") == 0) { + stream_type = HTTPD::StreamType::MULTICAM_STREAM; + } else { + stream_type = HTTPD::StreamType::MAIN_STREAM; + } if (strcmp(url, "/metrics") == 0) { string contents = global_metrics.serialize(); @@ -121,8 +129,8 @@ int HTTPD::answer_to_connection(MHD_Connection *connection, return ret; } - HTTPD::Stream *stream = new HTTPD::Stream(this, framing); - stream->add_data(header.data(), header.size(), Stream::DATA_TYPE_HEADER, AV_NOPTS_VALUE, AVRational{ 1, 0 }); + HTTPD::Stream *stream = new HTTPD::Stream(this, framing, stream_type); + stream->add_data(header[stream_type].data(), header[stream_type].size(), Stream::DATA_TYPE_HEADER, AV_NOPTS_VALUE, AVRational{ 1, 0 }); { unique_lock lock(streams_mutex); streams.insert(stream); diff --git a/httpd.h b/httpd.h index 57c649b..1ff5c51 100644 --- a/httpd.h +++ b/httpd.h @@ -31,9 +31,15 @@ public: HTTPD(); ~HTTPD(); + enum StreamType { + MAIN_STREAM, + MULTICAM_STREAM, + NUM_STREAM_TYPES + }; + // Should be called before start(). - void set_header(const std::string &data) { - header = data; + void set_header(StreamType stream_type, const std::string &data) { + header[stream_type] = data; } // Should be called before start() (due to threading issues). @@ -47,7 +53,7 @@ public: void start(int port); void stop(); - void add_data(const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase); + void add_data(StreamType stream_type, const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase); int64_t get_num_connected_clients() const { return metric_num_connected_clients.load(); } @@ -72,7 +78,8 @@ private: FRAMING_RAW, FRAMING_METACUBE }; - Stream(HTTPD *parent, Framing framing) : parent(parent), framing(framing) {} + Stream(HTTPD *parent, Framing framing, StreamType stream_type) + : parent(parent), framing(framing), stream_type(stream_type) {} static ssize_t reader_callback_thunk(void *cls, uint64_t pos, char *buf, size_t max); ssize_t reader_callback(uint64_t pos, char *buf, size_t max); @@ -85,6 +92,7 @@ private: void add_data(const char *buf, size_t size, DataType data_type, int64_t time, AVRational timebase); void stop(); HTTPD *get_parent() const { return parent; } + StreamType get_stream_type() const { return stream_type; } private: HTTPD *parent; @@ -96,6 +104,7 @@ private: std::deque buffered_data; // Protected by . size_t used_of_buffered_data = 0; // How many bytes of the first element of that is already used. Protected by . size_t seen_keyframe = false; + StreamType stream_type; }; MHD_Daemon *mhd = nullptr; @@ -106,7 +115,7 @@ private: CORSPolicy cors_policy; }; std::unordered_map endpoints; - std::string header; + std::string header[NUM_STREAM_TYPES]; // Metrics. std::atomic metric_num_connected_clients{0}; diff --git a/kaeru.cpp b/kaeru.cpp index 10f1e93..b3a9bb3 100644 --- a/kaeru.cpp +++ b/kaeru.cpp @@ -31,6 +31,8 @@ BasicStats *global_basic_stats = nullptr; QuittableSleeper should_quit; MuxMetrics stream_mux_metrics; +namespace { + int write_packet(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time) { static bool seen_sync_markers = false; @@ -47,13 +49,15 @@ int write_packet(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType ty if (type == AVIO_DATA_MARKER_HEADER) { stream_mux_header.append((char *)buf, buf_size); - httpd->set_header(stream_mux_header); + httpd->set_header(HTTPD::MAIN_STREAM, stream_mux_header); } else { - httpd->add_data((char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 }); + httpd->add_data(HTTPD::MAIN_STREAM, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 }); } return buf_size; } +} // namespace + unique_ptr create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x264_encoder, AudioEncoder *audio_encoder) { AVFormatContext *avctx = avformat_alloc_context(); diff --git a/meson.build b/meson.build index cc161f2..d35dcb5 100644 --- a/meson.build +++ b/meson.build @@ -190,7 +190,7 @@ nageru_link_with += audio # Mixer objects. srcs += ['chroma_subsampler.cpp', 'v210_converter.cpp', 'mixer.cpp', 'pbo_frame_allocator.cpp', 'context.cpp', 'theme.cpp', 'image_input.cpp', 'alsa_output.cpp', - 'disk_space_estimator.cpp', 'timecode_renderer.cpp', 'tweaked_inputs.cpp'] + 'disk_space_estimator.cpp', 'timecode_renderer.cpp', 'tweaked_inputs.cpp', 'mjpeg_encoder.cpp'] # Streaming and encoding objects (largely the set that is shared between Nageru and Kaeru). stream_srcs = ['quicksync_encoder.cpp', 'x264_encoder.cpp', 'x264_dynamic.cpp', 'x264_speed_control.cpp', 'video_encoder.cpp', diff --git a/mixer.cpp b/mixer.cpp index deaa8e7..294040f 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -46,12 +46,14 @@ #include "flags.h" #include "input_mapping.h" #include "metrics.h" +#include "mjpeg_encoder.h" #include "pbo_frame_allocator.h" #include "ref_counted_gl_sync.h" #include "resampling_queue.h" #include "timebase.h" #include "timecode_renderer.h" #include "v210_converter.h" +#include "va_display_with_cleanup.h" #include "video_encoder.h" #undef Status @@ -356,6 +358,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) display_chain->finalize(); video_encoder.reset(new VideoEncoder(resource_pool.get(), h264_encoder_surface, global_flags.va_display, global_flags.width, global_flags.height, &httpd, global_disk_space_estimator)); + mjpeg_encoder.reset(new MJPEGEncoder(&httpd, global_flags.va_display)); // Must be instantiated after VideoEncoder has initialized global_flags.use_zerocopy. theme.reset(new Theme(global_flags.theme_filename, global_flags.theme_dirs, resource_pool.get(), num_cards)); @@ -499,6 +502,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) Mixer::~Mixer() { + mjpeg_encoder->stop(); httpd.stop(); BMUSBCapture::stop_bm_thread(); @@ -954,6 +958,9 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode, new_frame.upload_func = upload_func; new_frame.dropped_frames = dropped_frames; new_frame.received_timestamp = video_frame.received_timestamp; // Ignore the audio timestamp. + new_frame.video_format = video_format; + new_frame.y_offset = y_offset; + new_frame.cbcr_offset = cbcr_offset; card->new_frames.push_back(move(new_frame)); card->jitter_history.frame_arrived(video_frame.received_timestamp, frame_length, dropped_frames); card->may_have_dropped_last_frame = false; @@ -1063,6 +1070,14 @@ void Mixer::thread_func() new_frame->upload_func(); new_frame->upload_func = nullptr; } + + // There are situations where we could possibly want to + // include FFmpeg inputs (CEF inputs are unlikely), + // but they're not necessarily in 4:2:2 Y'CbCr, so it would + // require more functionality the the JPEG encoder. + if (card_index < num_cards) { + mjpeg_encoder->upload_frame(pts_int, card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset); + } } int64_t frame_duration = output_frame_info.frame_duration; diff --git a/mixer.h b/mixer.h index 32a1ea4..84313e5 100644 --- a/mixer.h +++ b/mixer.h @@ -42,6 +42,7 @@ class ALSAOutput; class ChromaSubsampler; class DeckLinkOutput; +class MJPEGEncoder; class QSurface; class QSurfaceFormat; class TimecodeRenderer; @@ -475,6 +476,7 @@ private: std::unique_ptr chroma_subsampler; std::unique_ptr v210_converter; std::unique_ptr video_encoder; + std::unique_ptr mjpeg_encoder; std::unique_ptr timecode_renderer; std::atomic display_timecode_in_stream{false}; @@ -530,6 +532,12 @@ private: std::function upload_func; // Needs to be called to actually upload the texture to OpenGL. unsigned dropped_frames = 0; // Number of dropped frames before this one. std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min(); + + // Used for MJPEG encoding. (upload_func packs everything it needs + // into the functor, but would otherwise also use these.) + // width=0 or height=0 means a broken frame, ie., do not upload. + bmusb::VideoFormat video_format; + size_t y_offset, cbcr_offset; }; std::deque new_frames; std::condition_variable new_frames_changed; // Set whenever new_frames is changed. diff --git a/mjpeg_encoder.cpp b/mjpeg_encoder.cpp new file mode 100644 index 0000000..740b059 --- /dev/null +++ b/mjpeg_encoder.cpp @@ -0,0 +1,677 @@ +#include "mjpeg_encoder.h" + +#include +#include +#if __SSE2__ +#include +#endif +#include + +extern "C" { +#include +} + +#include "defs.h" +#include "ffmpeg_raii.h" +#include "flags.h" +#include "httpd.h" +#include "memcpy_interleaved.h" +#include "pbo_frame_allocator.h" +#include "timebase.h" +#include "va_display_with_cleanup.h" + +#include +#include +#include + +using namespace bmusb; +using namespace std; + +extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height); + +#define CHECK_VASTATUS(va_status, func) \ + if (va_status != VA_STATUS_SUCCESS) { \ + fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ + exit(1); \ + } + +// From libjpeg (although it's of course identical between implementations). +static const int jpeg_natural_order[DCTSIZE2] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, +}; + +struct VectorDestinationManager { + jpeg_destination_mgr pub; + std::vector dest; + + VectorDestinationManager() + { + pub.init_destination = init_destination_thunk; + pub.empty_output_buffer = empty_output_buffer_thunk; + pub.term_destination = term_destination_thunk; + } + + static void init_destination_thunk(j_compress_ptr ptr) + { + ((VectorDestinationManager *)(ptr->dest))->init_destination(); + } + + inline void init_destination() + { + make_room(0); + } + + static boolean empty_output_buffer_thunk(j_compress_ptr ptr) + { + return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer(); + } + + inline bool empty_output_buffer() + { + make_room(dest.size()); // Should ignore pub.free_in_buffer! + return true; + } + + inline void make_room(size_t bytes_used) + { + dest.resize(bytes_used + 4096); + dest.resize(dest.capacity()); + pub.next_output_byte = dest.data() + bytes_used; + pub.free_in_buffer = dest.size() - bytes_used; + } + + static void term_destination_thunk(j_compress_ptr ptr) + { + ((VectorDestinationManager *)(ptr->dest))->term_destination(); + } + + inline void term_destination() + { + dest.resize(dest.size() - pub.free_in_buffer); + } +}; +static_assert(std::is_standard_layout::value, ""); + +int MJPEGEncoder::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time) +{ + MJPEGEncoder *engine = (MJPEGEncoder *)opaque; + return engine->write_packet2(buf, buf_size, type, time); +} + +int MJPEGEncoder::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time) +{ + if (type == AVIO_DATA_MARKER_HEADER) { + mux_header.append((char *)buf, buf_size); + httpd->set_header(HTTPD::MULTICAM_STREAM, mux_header); + } else { + httpd->add_data(HTTPD::MULTICAM_STREAM, (char *)buf, buf_size, /*keyframe=*/true, AV_NOPTS_VALUE, AVRational{ AV_TIME_BASE, 1 }); + } + return buf_size; +} + +MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display) + : httpd(httpd) +{ + encoder_thread = thread(&MJPEGEncoder::encoder_thread_func, this); + + // Set up the mux. We don't use the Mux wrapper, because it's geared towards + // a situation with only one video stream (and possibly one audio stream) + // with known width/height, and we don't need the extra functionality it provides. + avctx.reset(avformat_alloc_context()); + avctx->oformat = av_guess_format("mp4", nullptr, nullptr); + + uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE); + avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr); + avctx->pb->write_data_type = &MJPEGEncoder::write_packet2_thunk; + avctx->flags = AVFMT_FLAG_CUSTOM_IO; + + for (int card_idx = 0; card_idx < global_flags.num_cards; ++card_idx) { + AVStream *stream = avformat_new_stream(avctx.get(), nullptr); + if (stream == nullptr) { + fprintf(stderr, "avformat_new_stream() failed\n"); + exit(1); + } + stream->time_base = AVRational{ 1, TIMEBASE }; + stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; + stream->codecpar->codec_id = AV_CODEC_ID_MJPEG; + + // Used for aspect ratio only. Can change without notice (the mux won't care). + stream->codecpar->width = global_flags.width; + stream->codecpar->height = global_flags.height; + + // TODO: We could perhaps use the interpretation for each card here + // (or at least the command-line flags) instead of the defaults, + // but what would we do when they change? + stream->codecpar->color_primaries = AVCOL_PRI_BT709; + stream->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1; + stream->codecpar->color_space = AVCOL_SPC_BT709; + stream->codecpar->color_range = AVCOL_RANGE_MPEG; + stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT; + stream->codecpar->field_order = AV_FIELD_PROGRESSIVE; + } + + AVDictionary *options = NULL; + vector> opts = MUX_OPTS; + for (pair opt : opts) { + av_dict_set(&options, opt.first.c_str(), opt.second.c_str(), 0); + } + if (avformat_write_header(avctx.get(), &options) < 0) { + fprintf(stderr, "avformat_write_header() failed\n"); + exit(1); + } + + // Initialize VA-API. + string error; + va_dpy = try_open_va(va_display, &error, &config_id); + if (va_dpy == nullptr) { + fprintf(stderr, "Could not initialize VA-API for MJPEG encoding: %s. JPEGs will be encoded in software if needed.\n", error.c_str()); + } + + running = true; +} + +void MJPEGEncoder::stop() +{ + if (!running) { + return; + } + running = false; + should_quit = true; + any_frames_to_be_encoded.notify_all(); + encoder_thread.join(); +} + +unique_ptr MJPEGEncoder::try_open_va(const string &va_display, string *error, VAConfigID *config_id) +{ + unique_ptr va_dpy = va_open_display(va_display); + if (va_dpy == nullptr) { + if (error) *error = "Opening VA display failed"; + return nullptr; + } + int major_ver, minor_ver; + VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver); + if (va_status != VA_STATUS_SUCCESS) { + char buf[256]; + snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status); + if (error != nullptr) *error = buf; + return nullptr; + } + + VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 }; + va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointEncPicture, + &attr, 1, config_id); + if (va_status == VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT) { + if (error != nullptr) *error = "No hardware support"; + return nullptr; + } else if (va_status != VA_STATUS_SUCCESS) { + char buf[256]; + snprintf(buf, sizeof(buf), "vaCreateConfig() failed with status %d\n", va_status); + if (error != nullptr) *error = buf; + return nullptr; + } + + int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy); + assert(num_formats > 0); + + unique_ptr formats(new VAImageFormat[num_formats]); + va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats); + if (va_status != VA_STATUS_SUCCESS) { + char buf[256]; + snprintf(buf, sizeof(buf), "vaQueryImageFormats() failed with status %d\n", va_status); + if (error != nullptr) *error = buf; + return nullptr; + } + + return va_dpy; +} + +void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset) +{ + PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata; + if (video_format.width == 0 || video_format.height == 0) { + return; + } + if (video_format.interlaced) { + fprintf(stderr, "Card %u: Ignoring JPEG encoding for interlaced frame\n", card_index); + return; + } + if (userdata->pixel_format != PixelFormat_8BitYCbCr || + !frame->interleaved) { + fprintf(stderr, "Card %u: Ignoring JPEG encoding for unsupported pixel format\n", card_index); + return; + } + if (video_format.width > 4096 || video_format.height > 4096) { + fprintf(stderr, "Card %u: Ignoring JPEG encoding for oversized frame\n", card_index); + return; + } + + lock_guard lock(mu); + frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset }); + any_frames_to_be_encoded.notify_all(); +} + +void MJPEGEncoder::encoder_thread_func() +{ + pthread_setname_np(pthread_self(), "MJPEG_Encode"); + posix_memalign((void **)&tmp_y, 4096, 4096 * 8); + posix_memalign((void **)&tmp_cbcr, 4096, 4096 * 8); + posix_memalign((void **)&tmp_cb, 4096, 4096 * 8); + posix_memalign((void **)&tmp_cr, 4096, 4096 * 8); + + unique_lock lock(mu); + for (;;) { + any_frames_to_be_encoded.wait(lock, [this] { return !frames_to_be_encoded.empty() || should_quit; }); + if (should_quit) return; + QueuedFrame qf = move(frames_to_be_encoded.front()); + frames_to_be_encoded.pop(); + + vector jpeg = encode_jpeg(qf); + + AVPacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.buf = nullptr; + pkt.data = &jpeg[0]; + pkt.size = jpeg.size(); + pkt.stream_index = qf.card_index; + pkt.flags = AV_PKT_FLAG_KEY; + pkt.pts = pkt.dts = qf.pts; + + if (av_write_frame(avctx.get(), &pkt) < 0) { + fprintf(stderr, "av_write_frame() failed\n"); + exit(1); + } + } +} + +class VABufferDestroyer { +public: + VABufferDestroyer(VADisplay dpy, VABufferID buf) + : dpy(dpy), buf(buf) {} + + ~VABufferDestroyer() { + VAStatus va_status = vaDestroyBuffer(dpy, buf); + CHECK_VASTATUS(va_status, "vaDestroyBuffer"); + } + +private: + VADisplay dpy; + VABufferID buf; +}; + +MJPEGEncoder::VAResources MJPEGEncoder::get_va_resources(unsigned width, unsigned height) +{ + { + lock_guard lock(va_resources_mutex); + for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) { + if (it->width == width && it->height == height) { + VAResources ret = *it; + va_resources_freelist.erase(it); + return ret; + } + } + } + + VAResources ret; + + ret.width = width; + ret.height = height; + + VASurfaceAttrib attrib; + attrib.flags = VA_SURFACE_ATTRIB_SETTABLE; + attrib.type = VASurfaceAttribPixelFormat; + attrib.value.type = VAGenericValueTypeInteger; + attrib.value.value.i = VA_FOURCC_UYVY; + + VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422, + width, height, + &ret.surface, 1, &attrib, 1); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context); + CHECK_VASTATUS(va_status, "vaCreateContext"); + + va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncCodedBufferType, width * height * 3 + 8192, 1, nullptr, &ret.data_buffer); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + return ret; +} + +void MJPEGEncoder::release_va_resources(MJPEGEncoder::VAResources resources) +{ + lock_guard lock(va_resources_mutex); + if (va_resources_freelist.size() > 10) { + auto it = va_resources_freelist.end(); + --it; + + VAStatus va_status = vaDestroyBuffer(va_dpy->va_dpy, it->data_buffer); + CHECK_VASTATUS(va_status, "vaDestroyBuffer"); + + va_status = vaDestroyContext(va_dpy->va_dpy, it->context); + CHECK_VASTATUS(va_status, "vaDestroyContext"); + + va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1); + CHECK_VASTATUS(va_status, "vaDestroySurfaces"); + + va_resources_freelist.erase(it); + } + + va_resources_freelist.push_front(resources); +} + +void MJPEGEncoder::init_jpeg_422(unsigned width, unsigned height, VectorDestinationManager *dest, jpeg_compress_struct *cinfo) +{ + jpeg_error_mgr jerr; + cinfo->err = jpeg_std_error(&jerr); + jpeg_create_compress(cinfo); + + cinfo->dest = (jpeg_destination_mgr *)dest; + + cinfo->input_components = 3; + jpeg_set_defaults(cinfo); + jpeg_set_quality(cinfo, quality, /*force_baseline=*/false); + + cinfo->image_width = width; + cinfo->image_height = height; + cinfo->raw_data_in = true; + jpeg_set_colorspace(cinfo, JCS_YCbCr); + cinfo->comp_info[0].h_samp_factor = 2; + cinfo->comp_info[0].v_samp_factor = 1; + cinfo->comp_info[1].h_samp_factor = 1; + cinfo->comp_info[1].v_samp_factor = 1; + cinfo->comp_info[2].h_samp_factor = 1; + cinfo->comp_info[2].v_samp_factor = 1; + cinfo->CCIR601_sampling = true; // Seems to be mostly ignored by libjpeg, though. + jpeg_start_compress(cinfo, true); +} + +vector MJPEGEncoder::get_jpeg_header(unsigned width, unsigned height, jpeg_compress_struct *cinfo) +{ + VectorDestinationManager dest; + init_jpeg_422(width, height, &dest, cinfo); + + // Make a dummy black image; there's seemingly no other easy way of + // making libjpeg outputting all of its headers. + JSAMPROW yptr[8], cbptr[8], crptr[8]; + JSAMPARRAY data[3] = { yptr, cbptr, crptr }; + memset(tmp_y, 0, 4096); + memset(tmp_cb, 0, 4096); + memset(tmp_cr, 0, 4096); + for (unsigned yy = 0; yy < 8; ++yy) { + yptr[yy] = tmp_y; + cbptr[yy] = tmp_cb; + crptr[yy] = tmp_cr; + } + for (unsigned y = 0; y < height; y += 8) { + jpeg_write_raw_data(cinfo, data, /*num_lines=*/8); + } + jpeg_finish_compress(cinfo); + + // We're only interested in the header, not the data after it. + dest.term_destination(); + for (size_t i = 0; i < dest.dest.size() - 1; ++i) { + if (dest.dest[i] == 0xff && dest.dest[i + 1] == 0xda) { // Start of scan (SOS). + unsigned len = dest.dest[i + 2] * 256 + dest.dest[i + 3]; + dest.dest.resize(i + len + 2); + break; + } + } + + return dest.dest; +} + +MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_resolution(unsigned width, unsigned height) +{ + pair key(width, height); + if (va_data_for_resolution.count(key)) { + return va_data_for_resolution[key]; + } + + // Use libjpeg to generate a header and set sane defaults for e.g. + // quantization tables. Then do the actual encode with VA-API. + jpeg_compress_struct cinfo; + vector jpeg_header = get_jpeg_header(width, height, &cinfo); + + // Picture parameters. + VAEncPictureParameterBufferJPEG pic_param; + memset(&pic_param, 0, sizeof(pic_param)); + pic_param.reconstructed_picture = VA_INVALID_ID; + pic_param.picture_width = cinfo.image_width; + pic_param.picture_height = cinfo.image_height; + for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) { + const jpeg_component_info *comp = &cinfo.comp_info[component_idx]; + pic_param.component_id[component_idx] = comp->component_id; + pic_param.quantiser_table_selector[component_idx] = comp->quant_tbl_no; + } + pic_param.num_components = cinfo.num_components; + pic_param.num_scan = 1; + pic_param.sample_bit_depth = 8; + pic_param.coded_buf = VA_INVALID_ID; // To be filled out by caller. + pic_param.pic_flags.bits.huffman = 1; + pic_param.quality = 50; // Don't scale the given quantization matrices. (See gen8_mfc_jpeg_fqm_state) + + // Quantization matrices. + VAQMatrixBufferJPEG q; + memset(&q, 0, sizeof(q)); + + q.load_lum_quantiser_matrix = true; + q.load_chroma_quantiser_matrix = true; + for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) { + const JQUANT_TBL *qtbl = cinfo.quant_tbl_ptrs[quant_tbl_idx]; + assert((qtbl == nullptr) == (quant_tbl_idx >= 2)); + if (qtbl == nullptr) continue; + + uint8_t *qmatrix = (quant_tbl_idx == 0) ? q.lum_quantiser_matrix : q.chroma_quantiser_matrix; + for (int i = 0; i < 64; ++i) { + if (qtbl->quantval[i] > 255) { + fprintf(stderr, "Baseline JPEG only!\n"); + abort(); + } + qmatrix[i] = qtbl->quantval[jpeg_natural_order[i]]; + } + } + + // Huffman tables (arithmetic is not supported). + VAHuffmanTableBufferJPEGBaseline huff; + memset(&huff, 0, sizeof(huff)); + + for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) { + const JHUFF_TBL *ac_hufftbl = cinfo.ac_huff_tbl_ptrs[huff_tbl_idx]; + const JHUFF_TBL *dc_hufftbl = cinfo.dc_huff_tbl_ptrs[huff_tbl_idx]; + if (ac_hufftbl == nullptr) { + assert(dc_hufftbl == nullptr); + huff.load_huffman_table[huff_tbl_idx] = 0; + } else { + assert(dc_hufftbl != nullptr); + huff.load_huffman_table[huff_tbl_idx] = 1; + + for (int i = 0; i < 16; ++i) { + huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1]; + } + for (int i = 0; i < 12; ++i) { + huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i]; + } + for (int i = 0; i < 16; ++i) { + huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1]; + } + for (int i = 0; i < 162; ++i) { + huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i]; + } + } + } + + // Slice parameters (metadata about the slice). + VAEncSliceParameterBufferJPEG parms; + memset(&parms, 0, sizeof(parms)); + for (int component_idx = 0; component_idx < cinfo.num_components; ++component_idx) { + const jpeg_component_info *comp = &cinfo.comp_info[component_idx]; + parms.components[component_idx].component_selector = comp->component_id; + parms.components[component_idx].dc_table_selector = comp->dc_tbl_no; + parms.components[component_idx].ac_table_selector = comp->ac_tbl_no; + if (parms.components[component_idx].dc_table_selector > 1 || + parms.components[component_idx].ac_table_selector > 1) { + fprintf(stderr, "Uses too many Huffman tables\n"); + abort(); + } + } + parms.num_components = cinfo.num_components; + parms.restart_interval = cinfo.restart_interval; + + jpeg_destroy_compress(&cinfo); + + VAData ret; + ret.jpeg_header = move(jpeg_header); + ret.pic_param = pic_param; + ret.q = q; + ret.huff = huff; + ret.parms = parms; + va_data_for_resolution[key] = ret; + return ret; +} + +vector MJPEGEncoder::encode_jpeg(const QueuedFrame &qf) +{ + if (va_dpy != nullptr) { + return encode_jpeg_va(qf); + } else { + return encode_jpeg_libjpeg(qf); + } +} + +vector MJPEGEncoder::encode_jpeg_va(const QueuedFrame &qf) +{ + unsigned width = qf.video_format.width; + unsigned height = qf.video_format.height; + + VAResources resources = get_va_resources(width, height); + ReleaseVAResources release(this, resources); + + VAData va_data = get_va_data_for_resolution(width, height); + va_data.pic_param.coded_buf = resources.data_buffer; + + VABufferID pic_param_buffer; + VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPictureParameterBufferType, sizeof(va_data.pic_param), 1, &va_data.pic_param, &pic_param_buffer); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer); + + VABufferID q_buffer; + va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAQMatrixBufferType, sizeof(va_data.q), 1, &va_data.q, &q_buffer); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + VABufferDestroyer destroy_iq(va_dpy->va_dpy, q_buffer); + + VABufferID huff_buffer; + va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAHuffmanTableBufferType, sizeof(va_data.huff), 1, &va_data.huff, &huff_buffer); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer); + + VABufferID slice_param_buffer; + va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncSliceParameterBufferType, sizeof(va_data.parms), 1, &va_data.parms, &slice_param_buffer); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer); + + VAImage image; + va_status = vaDeriveImage(va_dpy->va_dpy, resources.surface, &image); + CHECK_VASTATUS(va_status, "vaDeriveImage"); + + // Upload the pixel data. + uint8_t *surface_p = nullptr; + vaMapBuffer(va_dpy->va_dpy, image.buf, (void **)&surface_p); + + size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support. + size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2; + + { + const uint8_t *src = qf.frame->data_copy + field_start; + uint8_t *dst = (unsigned char *)surface_p + image.offsets[0]; + memcpy_with_pitch(dst, src, qf.video_format.width * 2, image.pitches[0], qf.video_format.height); + } + + va_status = vaUnmapBuffer(va_dpy->va_dpy, image.buf); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); + va_status = vaDestroyImage(va_dpy->va_dpy, image.image_id); + CHECK_VASTATUS(va_status, "vaDestroyImage"); + + // Finally, stick in the JPEG header. + VAEncPackedHeaderParameterBuffer header_parm; + header_parm.type = VAEncPackedHeaderRawData; + header_parm.bit_length = 8 * va_data.jpeg_header.size(); + + VABufferID header_parm_buffer; + va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPackedHeaderParameterBufferType, sizeof(header_parm), 1, &header_parm, &header_parm_buffer); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + VABufferDestroyer destroy_header(va_dpy->va_dpy, header_parm_buffer); + + VABufferID header_data_buffer; + va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncPackedHeaderDataBufferType, va_data.jpeg_header.size(), 1, va_data.jpeg_header.data(), &header_data_buffer); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + VABufferDestroyer destroy_header_data(va_dpy->va_dpy, header_data_buffer); + + va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface); + CHECK_VASTATUS(va_status, "vaBeginPicture"); + va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture(pic_param)"); + va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &q_buffer, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture(q)"); + va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture(huff)"); + va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture(slice_param)"); + va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_parm_buffer, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture(header_parm)"); + va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &header_data_buffer, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture(header_data)"); + va_status = vaEndPicture(va_dpy->va_dpy, resources.context); + CHECK_VASTATUS(va_status, "vaEndPicture"); + + va_status = vaSyncSurface(va_dpy->va_dpy, resources.surface); + CHECK_VASTATUS(va_status, "vaSyncSurface"); + + VACodedBufferSegment *segment; + va_status = vaMapBuffer(va_dpy->va_dpy, resources.data_buffer, (void **)&segment); + CHECK_VASTATUS(va_status, "vaMapBuffer"); + + const char *coded_buf = reinterpret_cast(segment->buf); + vector jpeg(coded_buf, coded_buf + segment->size); + + va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.data_buffer); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); + + return jpeg; +} + +vector MJPEGEncoder::encode_jpeg_libjpeg(const QueuedFrame &qf) +{ + unsigned width = qf.video_format.width; + unsigned height = qf.video_format.height; + + VectorDestinationManager dest; + jpeg_compress_struct cinfo; + init_jpeg_422(width, height, &dest, &cinfo); + + size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support. + size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2; + + JSAMPROW yptr[8], cbptr[8], crptr[8]; + JSAMPARRAY data[3] = { yptr, cbptr, crptr }; + for (unsigned y = 0; y < qf.video_format.height; y += 8) { + const uint8_t *src = qf.frame->data_copy + field_start + y * qf.video_format.width * 2; + + memcpy_interleaved(tmp_y, tmp_cbcr, src, qf.video_format.width * 8 * 2); + memcpy_interleaved(tmp_cb, tmp_cr, tmp_cbcr, qf.video_format.width * 8); + for (unsigned yy = 0; yy < 8; ++yy) { + yptr[yy] = tmp_y + yy * width; + cbptr[yy] = tmp_cb + yy * width / 2; + crptr[yy] = tmp_cr + yy * width / 2; + } + jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8); + } + jpeg_finish_compress(&cinfo); + + return dest.dest; +} diff --git a/mjpeg_encoder.h b/mjpeg_encoder.h new file mode 100644 index 0000000..ab8632a --- /dev/null +++ b/mjpeg_encoder.h @@ -0,0 +1,121 @@ +#ifndef _MJPEG_ENCODER_H +#define _MJPEG_ENCODER_H 1 + +#include "ffmpeg_raii.h" +#include "ref_counted_frame.h" + +extern "C" { + +#include + +} // extern "C" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +class HTTPD; +struct jpeg_compress_struct; +struct VADisplayWithCleanup; +struct VectorDestinationManager; + +class MJPEGEncoder { +public: + MJPEGEncoder(HTTPD *httpd, const std::string &va_display); + void stop(); + void upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset); + +private: + static constexpr int quality = 90; + + struct QueuedFrame { + int64_t pts; + unsigned card_index; + RefCountedFrame frame; + bmusb::VideoFormat video_format; + size_t y_offset, cbcr_offset; + }; + + void encoder_thread_func(); + std::vector encode_jpeg(const QueuedFrame &qf); + std::vector encode_jpeg_va(const QueuedFrame &qf); + std::vector encode_jpeg_libjpeg(const QueuedFrame &qf); + void init_jpeg_422(unsigned width, unsigned height, VectorDestinationManager *dest, jpeg_compress_struct *cinfo); + std::vector get_jpeg_header(unsigned width, unsigned height, jpeg_compress_struct *cinfo); + + static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time); + int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time); + + std::thread encoder_thread; + + std::mutex mu; + std::queue frames_to_be_encoded; // Under mu. + std::condition_variable any_frames_to_be_encoded; + + std::queue frames_encoding; // Under mu. + std::condition_variable any_frames_encoding; + + AVFormatContextWithCloser avctx; + HTTPD *httpd; + std::string mux_header; + std::atomic should_quit{false}; + bool running = false; + + std::unique_ptr va_dpy; + VAConfigID config_id; + + struct VAData { + std::vector jpeg_header; + VAEncPictureParameterBufferJPEG pic_param; + VAQMatrixBufferJPEG q; + VAHuffmanTableBufferJPEGBaseline huff; + VAEncSliceParameterBufferJPEG parms; + }; + std::map, VAData> va_data_for_resolution; + VAData get_va_data_for_resolution(unsigned width, unsigned height); + + struct VAResources { + unsigned width, height; + VASurfaceID surface; + VAContextID context; + VABufferID data_buffer; + }; + std::list va_resources_freelist; + std::mutex va_resources_mutex; + VAResources get_va_resources(unsigned width, unsigned height); + void release_va_resources(VAResources resources); + + // RAII wrapper to release VAResources on return (even on error). + class ReleaseVAResources { + public: + ReleaseVAResources(MJPEGEncoder *mjpeg, const VAResources &resources) + : mjpeg(mjpeg), resources(resources) {} + ~ReleaseVAResources() + { + if (!committed) { + mjpeg->release_va_resources(resources); + } + } + + void commit() { committed = true; } + + private: + MJPEGEncoder * const mjpeg; + const VAResources &resources; + bool committed = false; + }; + + static std::unique_ptr try_open_va(const std::string &va_display, std::string *error, VAConfigID *config_id); + + uint8_t *tmp_y, *tmp_cbcr, *tmp_cb, *tmp_cr; // Private to the encoder thread. +}; + +#endif // !defined(_MJPEG_ENCODER_H) diff --git a/pbo_frame_allocator.cpp b/pbo_frame_allocator.cpp index ea17f12..c868702 100644 --- a/pbo_frame_allocator.cpp +++ b/pbo_frame_allocator.cpp @@ -52,6 +52,7 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w Frame frame; frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT); frame.data2 = frame.data + frame_size / 2; + frame.data_copy = new uint8_t[frame_size]; check_error(); frame.size = frame_size; frame.userdata = &userdata[frame_idx]; @@ -214,6 +215,8 @@ PBOFrameAllocator::~PBOFrameAllocator() void PBOFrameAllocator::destroy_frame(Frame *frame) { + delete[] frame->data_copy; + GLuint pbo = ((Userdata *)frame->userdata)->pbo; glBindBuffer(buffer, pbo); check_error(); diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 5200ce6..bca7ffd 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -1939,8 +1939,6 @@ void QuickSyncEncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_ stream_mux->add_packet(pkt, pts, pts); } -namespace { - void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height) { if (src_width == dst_pitch) { @@ -1954,8 +1952,6 @@ void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_ } } -} // namespace - void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration) { // Wait for the GPU to be done with the frame. diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h index 7645b99..0317b6a 100644 --- a/quicksync_encoder_impl.h +++ b/quicksync_encoder_impl.h @@ -20,6 +20,7 @@ #include "timebase.h" #include "print_latency.h" #include "ref_counted_gl_sync.h" +#include "va_display_with_cleanup.h" #define SURFACE_NUM 16 /* 16 surfaces for source YUV */ #define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM @@ -39,16 +40,6 @@ class DiskSpaceEstimator; class QSurface; class X264Encoder; -struct VADisplayWithCleanup { - ~VADisplayWithCleanup(); - - VADisplay va_dpy; - Display *x11_display = nullptr; - bool can_use_zerocopy = true; - int drm_fd = -1; -}; -std::unique_ptr va_open_display(const std::string &va_display); // Can return nullptr on failure. - class QuickSyncEncoderImpl { public: QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator); diff --git a/va_display_with_cleanup.h b/va_display_with_cleanup.h new file mode 100644 index 0000000..3f9b1c5 --- /dev/null +++ b/va_display_with_cleanup.h @@ -0,0 +1,19 @@ +#ifndef _VA_DISPLAY_WITH_CLEANUP +#define _VA_DISPLAY_WITH_CLEANUP 1 + +#include +#include + +#include + +struct VADisplayWithCleanup { + ~VADisplayWithCleanup(); + + VADisplay va_dpy; + Display *x11_display = nullptr; + bool can_use_zerocopy = true; + int drm_fd = -1; +}; +std::unique_ptr va_open_display(const std::string &va_display); // Can return nullptr on failure. + +#endif // !defined(_VA_DISPLAY_WITH_CLEANUP) diff --git a/video_encoder.cpp b/video_encoder.cpp index 6344b8c..2b8fcd5 100644 --- a/video_encoder.cpp +++ b/video_encoder.cpp @@ -216,9 +216,9 @@ int VideoEncoder::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType t if (type == AVIO_DATA_MARKER_HEADER) { stream_mux_header.append((char *)buf, buf_size); - httpd->set_header(stream_mux_header); + httpd->set_header(HTTPD::MAIN_STREAM, stream_mux_header); } else { - httpd->add_data((char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 }); + httpd->add_data(HTTPD::MAIN_STREAM, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 }); } return buf_size; }