From 3be00c8dd8b841cecc44f57234b9fc2d3a94cb45 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 25 Apr 2016 23:04:59 +0200 Subject: [PATCH] Set x264 global headers (Quick Sync global headers are still not there). Should fix the H.264 stream in newer Firefox with some luck. --- audio_encoder.cpp | 6 ++++-- audio_encoder.h | 2 +- mux.cpp | 9 ++++++--- mux.h | 2 +- quicksync_encoder.cpp | 30 ++++++++++++++++++++++-------- quicksync_encoder.h | 7 ++++++- video_encoder.cpp | 31 +++++++++++++++++++------------ video_encoder.h | 1 + x264_encoder.cpp | 29 ++++++++++++++++++++++++++--- x264_encoder.h | 12 +++++++++++- 10 files changed, 97 insertions(+), 32 deletions(-) diff --git a/audio_encoder.cpp b/audio_encoder.cpp index 2b735e4..ac1c8f5 100644 --- a/audio_encoder.cpp +++ b/audio_encoder.cpp @@ -21,7 +21,7 @@ extern "C" { using namespace std; -AudioEncoder::AudioEncoder(const string &codec_name, int bit_rate) +AudioEncoder::AudioEncoder(const string &codec_name, int bit_rate, const AVOutputFormat *oformat) { AVCodec *codec = avcodec_find_encoder_by_name(codec_name.c_str()); if (codec == nullptr) { @@ -36,7 +36,9 @@ AudioEncoder::AudioEncoder(const string &codec_name, int bit_rate) ctx->channels = 2; ctx->channel_layout = AV_CH_LAYOUT_STEREO; ctx->time_base = AVRational{1, TIMEBASE}; - ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; + if (oformat->flags & AVFMT_GLOBALHEADER) { + ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; + } if (avcodec_open2(ctx, codec, NULL) < 0) { fprintf(stderr, "Could not open codec '%s'\n", codec_name.c_str()); exit(1); diff --git a/audio_encoder.h b/audio_encoder.h index d627a9c..786d364 100644 --- a/audio_encoder.h +++ b/audio_encoder.h @@ -16,7 +16,7 @@ extern "C" { class AudioEncoder { public: - AudioEncoder(const std::string &codec_name, int bit_rate); + AudioEncoder(const std::string &codec_name, int bit_rate, const AVOutputFormat *oformat); ~AudioEncoder(); void add_mux(Mux *mux) { // Does not take ownership. diff --git a/mux.cpp b/mux.cpp index ece11e1..e169438 100644 --- a/mux.cpp +++ b/mux.cpp @@ -10,7 +10,7 @@ using namespace std; -Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver) +Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver) : avctx(avctx), keyframe_signal_receiver(keyframe_signal_receiver) { AVCodec *codec_video = avcodec_find_encoder((video_codec == CODEC_H264) ? AV_CODEC_ID_H264 : AV_CODEC_ID_RAWVIDEO); @@ -43,8 +43,11 @@ Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const avstream_video->codec->color_range = AVCOL_RANGE_MPEG; // Full vs. limited range (output_ycbcr_format.full_range). avstream_video->codec->chroma_sample_location = AVCHROMA_LOC_LEFT; // Chroma sample location. See chroma_offset_0[] in Mixer::subsample_chroma(). avstream_video->codec->field_order = AV_FIELD_PROGRESSIVE; - if (avctx->oformat->flags & AVFMT_GLOBALHEADER) { - avstream_video->codec->flags = AV_CODEC_FLAG_GLOBAL_HEADER; + + if (!video_extradata.empty()) { + avstream_video->codec->extradata = (uint8_t *)av_malloc(video_extradata.size()); + avstream_video->codec->extradata_size = video_extradata.size(); + memcpy(avstream_video->codec->extradata, video_extradata.data(), video_extradata.size()); } avstream_audio = avformat_new_stream(avctx, nullptr); diff --git a/mux.h b/mux.h index 1dd967c..c161b29 100644 --- a/mux.h +++ b/mux.h @@ -25,7 +25,7 @@ public: }; // Takes ownership of avctx. can be nullptr. - Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver); + Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver); ~Mux(); void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts); diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index fbbde94..b81cb53 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -194,12 +194,16 @@ FrameReorderer::Frame FrameReorderer::get_first_frame() class QuickSyncEncoderImpl { public: - QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder); + QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder); ~QuickSyncEncoderImpl(); void add_audio(int64_t pts, vector audio); bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector &input_frames); void shutdown(); + void set_stream_mux(Mux *mux) + { + stream_mux = mux; + } private: struct storage_task { @@ -280,7 +284,7 @@ private: unique_ptr reorderer; X264Encoder *x264_encoder; // nullptr if not using x264. - Mux* stream_mux; // To HTTP. + Mux* stream_mux = nullptr; // To HTTP. unique_ptr file_mux; // To local disk. Display *x11_display = nullptr; @@ -1727,10 +1731,10 @@ namespace { } // namespace -QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder) - : current_storage_frame(0), resource_pool(resource_pool), surface(surface), stream_audio_encoder(stream_audio_encoder), x264_encoder(x264_encoder), stream_mux(stream_mux), frame_width(width), frame_height(height) +QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder) + : current_storage_frame(0), resource_pool(resource_pool), surface(surface), stream_audio_encoder(stream_audio_encoder), x264_encoder(x264_encoder), frame_width(width), frame_height(height) { - file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE)); + file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); open_output_file(filename); file_audio_encoder->add_mux(file_mux.get()); @@ -1949,7 +1953,8 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename) exit(1); } - file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, file_audio_encoder->get_ctx(), TIMEBASE, nullptr)); + string video_extradata = ""; // FIXME: See other comment about global headers. + file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, file_audio_encoder->get_ctx(), TIMEBASE, nullptr)); } void QuickSyncEncoderImpl::encode_thread_func() @@ -2141,6 +2146,9 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame CHECK_VASTATUS(va_status, "vaBeginPicture"); if (frame_type == FRAME_IDR) { + // FIXME: If the mux wants global headers, we should not put the + // SPS/PPS before each IDR frame, but rather put it into the + // codec extradata (formatted differently?). render_sequence(); render_picture(frame_type, display_frame_num, gop_start_display_frame_num); if (h264_packedheader) { @@ -2170,8 +2178,8 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame } // Proxy object. -QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder) - : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, stream_mux, stream_audio_encoder, x264_encoder)) {} +QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder) + : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, stream_audio_encoder, x264_encoder)) {} // Must be defined here because unique_ptr<> destructor needs to know the impl. QuickSyncEncoder::~QuickSyncEncoder() {} @@ -2195,3 +2203,9 @@ void QuickSyncEncoder::shutdown() { impl->shutdown(); } + +void QuickSyncEncoder::set_stream_mux(Mux *mux) +{ + impl->set_stream_mux(mux); +} + diff --git a/quicksync_encoder.h b/quicksync_encoder.h index 4f2bca5..52aaf77 100644 --- a/quicksync_encoder.h +++ b/quicksync_encoder.h @@ -33,6 +33,10 @@ #include #include +extern "C" { +#include +} + #include "ref_counted_frame.h" #include "ref_counted_gl_sync.h" @@ -51,9 +55,10 @@ class ResourcePool; // .cpp file. class QuickSyncEncoder { public: - QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder); + QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder); ~QuickSyncEncoder(); + void set_stream_mux(Mux *mux); // Does not take ownership. Must be called unless x264 is used for the stream. void add_audio(int64_t pts, std::vector audio); bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector &input_frames); diff --git a/video_encoder.cpp b/video_encoder.cpp index cae4328..96d4932 100644 --- a/video_encoder.cpp +++ b/video_encoder.cpp @@ -38,21 +38,24 @@ string generate_local_dump_filename(int frame) VideoEncoder::VideoEncoder(ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, HTTPD *httpd) : resource_pool(resource_pool), surface(surface), va_display(va_display), width(width), height(height), httpd(httpd) { - open_output_stream(); - + oformat = av_guess_format(global_flags.stream_mux_name.c_str(), nullptr, nullptr); + assert(oformat != nullptr); if (global_flags.stream_audio_codec_name.empty()) { - stream_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE)); + stream_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); } else { - stream_audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate)); + stream_audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat)); } - stream_audio_encoder->add_mux(stream_mux.get()); - if (global_flags.x264_video_to_http) { - x264_encoder.reset(new X264Encoder(stream_mux.get())); + x264_encoder.reset(new X264Encoder(oformat)); } string filename = generate_local_dump_filename(/*frame=*/0); - quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, stream_mux.get(), stream_audio_encoder.get(), x264_encoder.get())); + quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, oformat, stream_audio_encoder.get(), x264_encoder.get())); + + open_output_stream(); + stream_audio_encoder->add_mux(stream_mux.get()); + quicksync_encoder->set_stream_mux(stream_mux.get()); + x264_encoder->set_mux(stream_mux.get()); } VideoEncoder::~VideoEncoder() @@ -66,7 +69,8 @@ void VideoEncoder::do_cut(int frame) string filename = generate_local_dump_filename(frame); printf("Starting new recording: %s\n", filename.c_str()); quicksync_encoder->shutdown(); - quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, stream_mux.get(), stream_audio_encoder.get(), x264_encoder.get())); + quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, oformat, stream_audio_encoder.get(), x264_encoder.get())); + quicksync_encoder->set_stream_mux(stream_mux.get()); } void VideoEncoder::add_audio(int64_t pts, std::vector audio) @@ -87,8 +91,6 @@ RefCountedGLsync VideoEncoder::end_frame(int64_t pts, int64_t duration, const st void VideoEncoder::open_output_stream() { AVFormatContext *avctx = avformat_alloc_context(); - AVOutputFormat *oformat = av_guess_format(global_flags.stream_mux_name.c_str(), nullptr, nullptr); - assert(oformat != nullptr); avctx->oformat = oformat; uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE); @@ -103,9 +105,14 @@ void VideoEncoder::open_output_stream() avctx->flags = AVFMT_FLAG_CUSTOM_IO; + string video_extradata; + if (global_flags.x264_video_to_http) { + video_extradata = x264_encoder->get_global_headers(); + } + int time_base = global_flags.stream_coarse_timebase ? COARSE_TIMEBASE : TIMEBASE; stream_mux_writing_header = true; - stream_mux.reset(new Mux(avctx, width, height, video_codec, stream_audio_encoder->get_ctx(), time_base, this)); + stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_ctx(), time_base, this)); stream_mux_writing_header = false; httpd->set_header(stream_mux_header); stream_mux_header.clear(); diff --git a/video_encoder.h b/video_encoder.h index bb1be55..78162e9 100644 --- a/video_encoder.h +++ b/video_encoder.h @@ -46,6 +46,7 @@ private: static int write_packet_thunk(void *opaque, uint8_t *buf, int buf_size); int write_packet(uint8_t *buf, int buf_size); + AVOutputFormat *oformat; std::unique_ptr quicksync_encoder; movit::ResourcePool *resource_pool; QSurface *surface; diff --git a/x264_encoder.cpp b/x264_encoder.cpp index 189da20..bbbb1ba 100644 --- a/x264_encoder.cpp +++ b/x264_encoder.cpp @@ -13,8 +13,8 @@ extern "C" { using namespace std; -X264Encoder::X264Encoder(Mux *mux) - : mux(mux) +X264Encoder::X264Encoder(AVOutputFormat *oformat) + : wants_global_headers(oformat->flags & AVFMT_GLOBALHEADER) { frame_pool.reset(new uint8_t[WIDTH * HEIGHT * 2 * X264_QUEUE_LENGTH]); for (unsigned i = 0; i < X264_QUEUE_LENGTH; ++i) { @@ -88,11 +88,29 @@ void X264Encoder::init_x264() x264_param_apply_profile(¶m, "high"); + param.b_repeat_headers = !wants_global_headers; + x264 = x264_encoder_open(¶m); if (x264 == nullptr) { fprintf(stderr, "ERROR: x264 initialization failed.\n"); exit(1); } + + if (wants_global_headers) { + x264_nal_t *nal; + int num_nal; + + x264_encoder_headers(x264, &nal, &num_nal); + + for (int i = 0; i < num_nal; ++i) { + if (nal[i].i_type == NAL_SEI) { + // Don't put the SEI in extradata; make it part of the first frame instead. + buffered_sei += string((const char *)nal[i].p_payload, nal[i].i_payload); + } else { + global_headers += string((const char *)nal[i].p_payload, nal[i].i_payload); + } + } + } } void X264Encoder::encoder_thread_func() @@ -160,7 +178,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) // We really need one AVPacket for the entire frame, it seems, // so combine it all. - size_t num_bytes = 0; + size_t num_bytes = buffered_sei.size(); for (int i = 0; i < num_nal; ++i) { num_bytes += nal[i].i_payload; } @@ -168,6 +186,11 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) unique_ptr data(new uint8_t[num_bytes]); uint8_t *ptr = data.get(); + if (!buffered_sei.empty()) { + memcpy(ptr, buffered_sei.data(), buffered_sei.size()); + ptr += buffered_sei.size(); + buffered_sei.clear(); + } for (int i = 0; i < num_nal; ++i) { memcpy(ptr, nal[i].p_payload, nal[i].i_payload); ptr += nal[i].i_payload; diff --git a/x264_encoder.h b/x264_encoder.h index e146cd2..729cb7f 100644 --- a/x264_encoder.h +++ b/x264_encoder.h @@ -27,22 +27,28 @@ extern "C" { #include "x264.h" +#include } class Mux; class X264Encoder { public: - X264Encoder(Mux *httpd); // Does not take ownership. + X264Encoder(AVOutputFormat *oformat); // Does not take ownership. // Called after the last frame. Will block; once this returns, // the last data is flushed. ~X264Encoder(); + // Must be called before first frame. Does not take ownership. + void set_mux(Mux *mux) { this->mux = mux; } + // is taken to be raw NV12 data of WIDTHxHEIGHT resolution. // Does not block. void add_frame(int64_t pts, int64_t duration, const uint8_t *data); + std::string get_global_headers() const { return global_headers; } + private: struct QueuedFrame { int64_t pts, duration; @@ -58,6 +64,10 @@ private: std::unique_ptr frame_pool; Mux *mux = nullptr; + bool wants_global_headers; + + std::string global_headers; + std::string buffered_sei; // Will be output before first frame, if any. std::thread encoder_thread; std::atomic should_quit{false}; -- 2.39.2