]> git.sesse.net Git - nageru/commitdiff
Set x264 global headers (Quick Sync global headers are still not there).
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 25 Apr 2016 21:04:59 +0000 (23:04 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 25 Apr 2016 21:18:21 +0000 (23:18 +0200)
Should fix the H.264 stream in newer Firefox with some luck.

audio_encoder.cpp
audio_encoder.h
mux.cpp
mux.h
quicksync_encoder.cpp
quicksync_encoder.h
video_encoder.cpp
video_encoder.h
x264_encoder.cpp
x264_encoder.h

index 2b735e4d44f5ff27b95588032fd3e73459ff7e64..ac1c8f509f9aae5057335e3234dd2c2b123e6aa4 100644 (file)
@@ -21,7 +21,7 @@ extern "C" {
 
 using namespace std;
 
-AudioEncoder::AudioEncoder(const string &codec_name, int bit_rate)
+AudioEncoder::AudioEncoder(const string &codec_name, int bit_rate, const AVOutputFormat *oformat)
 {
        AVCodec *codec = avcodec_find_encoder_by_name(codec_name.c_str());
        if (codec == nullptr) {
@@ -36,7 +36,9 @@ AudioEncoder::AudioEncoder(const string &codec_name, int bit_rate)
        ctx->channels = 2;
        ctx->channel_layout = AV_CH_LAYOUT_STEREO;
        ctx->time_base = AVRational{1, TIMEBASE};
-       ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
+       if (oformat->flags & AVFMT_GLOBALHEADER) {
+               ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
+       }
        if (avcodec_open2(ctx, codec, NULL) < 0) {
                fprintf(stderr, "Could not open codec '%s'\n", codec_name.c_str());
                exit(1);
index d627a9c281a6abd10bd42cc5e51f098a46b79412..786d36498bec525def5786e702f200bf79f189fb 100644 (file)
@@ -16,7 +16,7 @@ extern "C" {
 
 class AudioEncoder {
 public:
-       AudioEncoder(const std::string &codec_name, int bit_rate);
+       AudioEncoder(const std::string &codec_name, int bit_rate, const AVOutputFormat *oformat);
        ~AudioEncoder();
 
        void add_mux(Mux *mux) {  // Does not take ownership.
diff --git a/mux.cpp b/mux.cpp
index ece11e117057167777023459bcc051e34b983c00..e16943828fcccaa8f9d5e65ad9008cc9bb1456dd 100644 (file)
--- a/mux.cpp
+++ b/mux.cpp
@@ -10,7 +10,7 @@
 
 using namespace std;
 
-Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver)
+Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver)
        : avctx(avctx), keyframe_signal_receiver(keyframe_signal_receiver)
 {
        AVCodec *codec_video = avcodec_find_encoder((video_codec == CODEC_H264) ? AV_CODEC_ID_H264 : AV_CODEC_ID_RAWVIDEO);
@@ -43,8 +43,11 @@ Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const
        avstream_video->codec->color_range = AVCOL_RANGE_MPEG;  // Full vs. limited range (output_ycbcr_format.full_range).
        avstream_video->codec->chroma_sample_location = AVCHROMA_LOC_LEFT;  // Chroma sample location. See chroma_offset_0[] in Mixer::subsample_chroma().
        avstream_video->codec->field_order = AV_FIELD_PROGRESSIVE;
-       if (avctx->oformat->flags & AVFMT_GLOBALHEADER) {
-               avstream_video->codec->flags = AV_CODEC_FLAG_GLOBAL_HEADER;
+
+       if (!video_extradata.empty()) {
+               avstream_video->codec->extradata = (uint8_t *)av_malloc(video_extradata.size());
+               avstream_video->codec->extradata_size = video_extradata.size();
+               memcpy(avstream_video->codec->extradata, video_extradata.data(), video_extradata.size());
        }
 
        avstream_audio = avformat_new_stream(avctx, nullptr);
diff --git a/mux.h b/mux.h
index 1dd967c40f2e7f3d379c168cf4485d10f18c2def..c161b298d2e27408b4bbb23fc3f596abfd456515 100644 (file)
--- a/mux.h
+++ b/mux.h
@@ -25,7 +25,7 @@ public:
        };
 
        // Takes ownership of avctx. <keyframe_signal_receiver> can be nullptr.
-       Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver);
+       Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver);
        ~Mux();
        void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts);
 
index fbbde94cb4abfb37ccb820ee39ee43128e194384..b81cb53dba2173448ff8fb06e31e8dc85816bfc0 100644 (file)
@@ -194,12 +194,16 @@ FrameReorderer::Frame FrameReorderer::get_first_frame()
 
 class QuickSyncEncoderImpl {
 public:
-       QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder);
+       QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder);
        ~QuickSyncEncoderImpl();
        void add_audio(int64_t pts, vector<float> audio);
        bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
        RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames);
        void shutdown();
+       void set_stream_mux(Mux *mux)
+       {
+               stream_mux = mux;
+       }
 
 private:
        struct storage_task {
@@ -280,7 +284,7 @@ private:
        unique_ptr<FrameReorderer> reorderer;
        X264Encoder *x264_encoder;  // nullptr if not using x264.
 
-       Mux* stream_mux;  // To HTTP.
+       Mux* stream_mux = nullptr;  // To HTTP.
        unique_ptr<Mux> file_mux;  // To local disk.
 
        Display *x11_display = nullptr;
@@ -1727,10 +1731,10 @@ namespace {
 
 }  // namespace
 
-QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder)
-       : current_storage_frame(0), resource_pool(resource_pool), surface(surface), stream_audio_encoder(stream_audio_encoder), x264_encoder(x264_encoder), stream_mux(stream_mux), frame_width(width), frame_height(height)
+QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder)
+       : current_storage_frame(0), resource_pool(resource_pool), surface(surface), stream_audio_encoder(stream_audio_encoder), x264_encoder(x264_encoder), frame_width(width), frame_height(height)
 {
-       file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE));
+       file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
        open_output_file(filename);
        file_audio_encoder->add_mux(file_mux.get());
 
@@ -1949,7 +1953,8 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
                exit(1);
        }
 
-       file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, file_audio_encoder->get_ctx(), TIMEBASE, nullptr));
+       string video_extradata = "";  // FIXME: See other comment about global headers.
+       file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, file_audio_encoder->get_ctx(), TIMEBASE, nullptr));
 }
 
 void QuickSyncEncoderImpl::encode_thread_func()
@@ -2141,6 +2146,9 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
        CHECK_VASTATUS(va_status, "vaBeginPicture");
 
        if (frame_type == FRAME_IDR) {
+               // FIXME: If the mux wants global headers, we should not put the
+               // SPS/PPS before each IDR frame, but rather put it into the
+               // codec extradata (formatted differently?).
                render_sequence();
                render_picture(frame_type, display_frame_num, gop_start_display_frame_num);
                if (h264_packedheader) {
@@ -2170,8 +2178,8 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
 }
 
 // Proxy object.
-QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder)
-       : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, stream_mux, stream_audio_encoder, x264_encoder)) {}
+QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder)
+       : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, stream_audio_encoder, x264_encoder)) {}
 
 // Must be defined here because unique_ptr<> destructor needs to know the impl.
 QuickSyncEncoder::~QuickSyncEncoder() {}
@@ -2195,3 +2203,9 @@ void QuickSyncEncoder::shutdown()
 {
        impl->shutdown();
 }
+
+void QuickSyncEncoder::set_stream_mux(Mux *mux)
+{
+       impl->set_stream_mux(mux);
+}
+
index 4f2bca5a3236e73fa6a3e12742922bed80cf2076..52aaf77dd48d4ac4007c121189ce3d295dd14640 100644 (file)
 #include <string>
 #include <vector>
 
+extern "C" {
+#include <libavformat/avformat.h>
+}
+
 #include "ref_counted_frame.h"
 #include "ref_counted_gl_sync.h"
 
@@ -51,9 +55,10 @@ class ResourcePool;
 // .cpp file.
 class QuickSyncEncoder {
 public:
-        QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, Mux *stream_mux, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder);
+        QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, AudioEncoder *stream_audio_encoder, X264Encoder *x264_encoder);
         ~QuickSyncEncoder();
 
+       void set_stream_mux(Mux *mux);  // Does not take ownership. Must be called unless x264 is used for the stream.
        void add_audio(int64_t pts, std::vector<float> audio);
        bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
        RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector<RefCountedFrame> &input_frames);
index cae4328f2a5e4fe974ffde3dd52494fabc43223a..96d493252a02e1946363d552bf9908326803b7ee 100644 (file)
@@ -38,21 +38,24 @@ string generate_local_dump_filename(int frame)
 VideoEncoder::VideoEncoder(ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, HTTPD *httpd)
        : resource_pool(resource_pool), surface(surface), va_display(va_display), width(width), height(height), httpd(httpd)
 {
-       open_output_stream();
-
+       oformat = av_guess_format(global_flags.stream_mux_name.c_str(), nullptr, nullptr);
+       assert(oformat != nullptr);
        if (global_flags.stream_audio_codec_name.empty()) {
-               stream_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE));
+               stream_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
        } else {
-               stream_audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate));
+               stream_audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
        }
-       stream_audio_encoder->add_mux(stream_mux.get());
-
        if (global_flags.x264_video_to_http) {
-               x264_encoder.reset(new X264Encoder(stream_mux.get()));
+               x264_encoder.reset(new X264Encoder(oformat));
        }
 
        string filename = generate_local_dump_filename(/*frame=*/0);
-       quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, stream_mux.get(), stream_audio_encoder.get(), x264_encoder.get()));
+       quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, oformat, stream_audio_encoder.get(), x264_encoder.get()));
+
+       open_output_stream();
+       stream_audio_encoder->add_mux(stream_mux.get());
+       quicksync_encoder->set_stream_mux(stream_mux.get());
+       x264_encoder->set_mux(stream_mux.get());
 }
 
 VideoEncoder::~VideoEncoder()
@@ -66,7 +69,8 @@ void VideoEncoder::do_cut(int frame)
        string filename = generate_local_dump_filename(frame);
        printf("Starting new recording: %s\n", filename.c_str());
        quicksync_encoder->shutdown();
-       quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, stream_mux.get(), stream_audio_encoder.get(), x264_encoder.get()));
+       quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, oformat, stream_audio_encoder.get(), x264_encoder.get()));
+       quicksync_encoder->set_stream_mux(stream_mux.get());
 }
 
 void VideoEncoder::add_audio(int64_t pts, std::vector<float> audio)
@@ -87,8 +91,6 @@ RefCountedGLsync VideoEncoder::end_frame(int64_t pts, int64_t duration, const st
 void VideoEncoder::open_output_stream()
 {
        AVFormatContext *avctx = avformat_alloc_context();
-       AVOutputFormat *oformat = av_guess_format(global_flags.stream_mux_name.c_str(), nullptr, nullptr);
-       assert(oformat != nullptr);
        avctx->oformat = oformat;
 
        uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
@@ -103,9 +105,14 @@ void VideoEncoder::open_output_stream()
 
        avctx->flags = AVFMT_FLAG_CUSTOM_IO;
 
+       string video_extradata;
+       if (global_flags.x264_video_to_http) {
+               video_extradata = x264_encoder->get_global_headers();
+       }
+
        int time_base = global_flags.stream_coarse_timebase ? COARSE_TIMEBASE : TIMEBASE;
        stream_mux_writing_header = true;
-       stream_mux.reset(new Mux(avctx, width, height, video_codec, stream_audio_encoder->get_ctx(), time_base, this));
+       stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_ctx(), time_base, this));
        stream_mux_writing_header = false;
        httpd->set_header(stream_mux_header);
        stream_mux_header.clear();
index bb1be55c836641beacb2dcea2898c778ae331d58..78162e9c91badb1f18273d7b27ee0f430c3734da 100644 (file)
@@ -46,6 +46,7 @@ private:
        static int write_packet_thunk(void *opaque, uint8_t *buf, int buf_size);
        int write_packet(uint8_t *buf, int buf_size);
 
+       AVOutputFormat *oformat;
        std::unique_ptr<QuickSyncEncoder> quicksync_encoder;
        movit::ResourcePool *resource_pool;
        QSurface *surface;
index 189da20ad7f0c64f0fff92b044b324a957cf44e5..bbbb1ba33df907babd72c9f2a9c91d1596950936 100644 (file)
@@ -13,8 +13,8 @@ extern "C" {
 
 using namespace std;
 
-X264Encoder::X264Encoder(Mux *mux)
-       : mux(mux)
+X264Encoder::X264Encoder(AVOutputFormat *oformat)
+       : wants_global_headers(oformat->flags & AVFMT_GLOBALHEADER)
 {
        frame_pool.reset(new uint8_t[WIDTH * HEIGHT * 2 * X264_QUEUE_LENGTH]);
        for (unsigned i = 0; i < X264_QUEUE_LENGTH; ++i) {
@@ -88,11 +88,29 @@ void X264Encoder::init_x264()
 
        x264_param_apply_profile(&param, "high");
 
+       param.b_repeat_headers = !wants_global_headers;
+
        x264 = x264_encoder_open(&param);
        if (x264 == nullptr) {
                fprintf(stderr, "ERROR: x264 initialization failed.\n");
                exit(1);
        }
+
+       if (wants_global_headers) {
+               x264_nal_t *nal;
+               int num_nal;
+
+               x264_encoder_headers(x264, &nal, &num_nal);
+
+               for (int i = 0; i < num_nal; ++i) {
+                       if (nal[i].i_type == NAL_SEI) {
+                               // Don't put the SEI in extradata; make it part of the first frame instead.
+                               buffered_sei += string((const char *)nal[i].p_payload, nal[i].i_payload);
+                       } else {
+                               global_headers += string((const char *)nal[i].p_payload, nal[i].i_payload);
+                       }
+               }
+       }
 }
 
 void X264Encoder::encoder_thread_func()
@@ -160,7 +178,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
 
        // We really need one AVPacket for the entire frame, it seems,
        // so combine it all.
-       size_t num_bytes = 0;
+       size_t num_bytes = buffered_sei.size();
        for (int i = 0; i < num_nal; ++i) {
                num_bytes += nal[i].i_payload;
        }
@@ -168,6 +186,11 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
        unique_ptr<uint8_t[]> data(new uint8_t[num_bytes]);
        uint8_t *ptr = data.get();
 
+       if (!buffered_sei.empty()) {
+               memcpy(ptr, buffered_sei.data(), buffered_sei.size());
+               ptr += buffered_sei.size();
+               buffered_sei.clear();
+       }
        for (int i = 0; i < num_nal; ++i) {
                memcpy(ptr, nal[i].p_payload, nal[i].i_payload);
                ptr += nal[i].i_payload;
index e146cd2b2b6a3044a5ab20d3d9195fa05f831e32..729cb7f02bcc0063861aa946ec700e6321e566b8 100644 (file)
 
 extern "C" {
 #include "x264.h"
+#include <libavformat/avformat.h>
 }
 
 class Mux;
 
 class X264Encoder {
 public:
-       X264Encoder(Mux *httpd);  // Does not take ownership.
+       X264Encoder(AVOutputFormat *oformat);  // Does not take ownership.
 
        // Called after the last frame. Will block; once this returns,
        // the last data is flushed.
        ~X264Encoder();
 
+       // Must be called before first frame. Does not take ownership.
+       void set_mux(Mux *mux) { this->mux = mux; }
+
        // <data> is taken to be raw NV12 data of WIDTHxHEIGHT resolution.
        // Does not block.
        void add_frame(int64_t pts, int64_t duration, const uint8_t *data);
 
+       std::string get_global_headers() const { return global_headers; }
+
 private:
        struct QueuedFrame {
                int64_t pts, duration;
@@ -58,6 +64,10 @@ private:
        std::unique_ptr<uint8_t[]> frame_pool;
 
        Mux *mux = nullptr;
+       bool wants_global_headers;
+
+       std::string global_headers;
+       std::string buffered_sei;  // Will be output before first frame, if any.
 
        std::thread encoder_thread;
        std::atomic<bool> should_quit{false};