From 61919d1071d4501106bfba9edef95a714b025c8e Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Sat, 25 Jun 2016 23:08:11 +0200
Subject: [PATCH] Use the new libavformat functionality for marking keyframes.

Note: This depends on code only recently in mainline avformat;
the commit in question is dbbaad3 in libav, 4e7a921 in ffmpeg,
merged Jun 23.

This removes our need for manually flushing before each keyframe
to to make sure we get the fragment boundaries correct. In turn,
this means we no longer need to interleave ourselves.

The lack of flushing (and that we start using libavformat's interleaving
queue again) also means that the muxer no longer has to guess at the
duration of the last frame in each fragment, which would sometimes
cause non-monotonic pts, which would confuse players in various ways.
---
 README                |  4 ++++
 mux.cpp               | 43 ++++++-------------------------------------
 mux.h                 | 23 ++---------------------
 quicksync_encoder.cpp |  2 +-
 video_encoder.cpp     | 22 ++++++++++------------
 video_encoder.h       | 15 +++------------
 6 files changed, 26 insertions(+), 83 deletions(-)
diff --git a/README b/README
index 4e9f746..acccff3 100644
--- a/README
+++ b/README
@@ -84,6 +84,10 @@ Exceptions as of June 2016:
   - There is a critical bug fix with x264 speed control in
     928bd9d5def4f0ca5071ea176a11b816a01e6495, pushed to git mid-June 2016.
 
+  - Nageru depends on an avformat API for marking block boundaries in the
+    muxed byte stream that didn't enter ffmpeg before
+    4e7a9212820a56bc731c09b2f11ae1422d070837, pushed to git late June 2016.
+
 
 The patches/ directory contains a patch that helps zita-resampler performance.
 It is meant for upstream, but was not in at the time Nageru was released.
diff --git a/mux.cpp b/mux.cpp
index 184fa1e..cc67eb6 100644
--- a/mux.cpp
+++ b/mux.cpp
@@ -29,8 +29,8 @@ struct PacketBefore {
 	const AVFormatContext * const ctx;
 };
 
-Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver)
-	: avctx(avctx), keyframe_signal_receiver(keyframe_signal_receiver)
+Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecContext *audio_ctx, int time_base)
+	: avctx(avctx)
 {
 	AVCodec *codec_video = avcodec_find_encoder((video_codec == CODEC_H264) ? AV_CODEC_ID_H264 : AV_CODEC_ID_RAWVIDEO);
 	avstream_video = avformat_new_stream(avctx, codec_video);
@@ -123,47 +123,16 @@ void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts)
 		if (plug_count > 0) {
 			plugged_packets.push_back(av_packet_clone(&pkt_copy));
 		} else {
-			add_interleaved_packet(pkt_copy);
+			write_packet_or_die(pkt_copy);
 		}
 	}
 
 	av_packet_unref(&pkt_copy);
 }
 
-void Mux::add_interleaved_packet(const AVPacket &pkt)
+void Mux::write_packet_or_die(const AVPacket &pkt)
 {
-	if (waiting_packets.empty() || waiting_packets.front()->stream_index == pkt.stream_index) {
-		// We could still get packets of the other type with earlier pts/dts,
-		// so we'll have to queue and wait.
-		waiting_packets.push(av_packet_clone(const_cast<AVPacket *>(&pkt)));
-		return;
-	}
-
-	// Flush all the queued packets that are supposed to go before this.
-	PacketBefore before(avctx);
-	while (!waiting_packets.empty() && !before(&pkt, waiting_packets.front())) {
-		AVPacket *queued_pkt = waiting_packets.front();
-		waiting_packets.pop();
-		write_packet_with_signal(*queued_pkt);
-		av_packet_free(&queued_pkt);
-	}
-
-	if (waiting_packets.empty()) {
-		waiting_packets.push(av_packet_clone(const_cast<AVPacket *>(&pkt)));
-	} else {
-		write_packet_with_signal(pkt);
-	}
-}
-
-void Mux::write_packet_with_signal(const AVPacket &pkt)
-{
-	if (keyframe_signal_receiver) {
-		if (pkt.flags & AV_PKT_FLAG_KEY) {
-			av_write_frame(avctx, nullptr);
-			keyframe_signal_receiver->signal_keyframe();
-		}
-	}
-	if (av_write_frame(avctx, const_cast<AVPacket *>(&pkt)) < 0) {
+	if (av_interleaved_write_frame(avctx, const_cast<AVPacket *>(&pkt)) < 0) {
 		fprintf(stderr, "av_interleaved_write_frame() failed\n");
 		exit(1);
 	}
@@ -187,7 +156,7 @@ void Mux::unplug()
 	sort(plugged_packets.begin(), plugged_packets.end(), PacketBefore(avctx));
 
 	for (AVPacket *pkt : plugged_packets) {
-		add_interleaved_packet(*pkt);
+		write_packet_or_die(*pkt);
 		av_packet_free(&pkt);
 	}
 	plugged_packets.clear();
diff --git a/mux.h b/mux.h
index 45eab34..8303021 100644
--- a/mux.h
+++ b/mux.h
@@ -13,12 +13,6 @@ extern "C" {
 #include <queue>
 #include <vector>
 
-class KeyFrameSignalReceiver {
-public:
-	// Needs to automatically turn the flag off again after actually receiving data.
-	virtual void signal_keyframe() = 0;
-};
-
 class Mux {
 public:
 	enum Codec {
@@ -27,7 +21,7 @@ public:
 	};
 
 	// Takes ownership of avctx. <keyframe_signal_receiver> can be nullptr.
-	Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecContext *audio_ctx, int time_base, KeyFrameSignalReceiver *keyframe_signal_receiver);
+	Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecContext *audio_ctx, int time_base);
 	~Mux();
 	void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts);
 
@@ -43,27 +37,14 @@ public:
 	void unplug();
 
 private:
-	void add_interleaved_packet(const AVPacket &pkt);  // Must be called with <mu> held.
-	void write_packet_with_signal(const AVPacket &pkt);  // Must be called with <mu> held.
+	void write_packet_or_die(const AVPacket &pkt);  // Must be called with <mu> held.
 
 	std::mutex mu;
 	AVFormatContext *avctx;  // Protected by <mu>.
 	int plug_count = 0;  // Protected by <mu>.
 	std::vector<AVPacket *> plugged_packets;  // Protected by <mu>.
 
-	// We need to do our own interleaving since we do explicit flushes
-	// before each keyframe. This queue contains every packet that we
-	// couldn't send yet, in add order. Essentially, we can't send a packet
-	// before we know we cannot receive an earlier (dts-wise) packet
-	// from another stream. This means that this queue will either contain
-	// video packets only or audio packets only, and as soon as a packet
-	// of the other type comes in, we can empty the flush the queue up
-	// to that point.
-	// Protected by <mu>.
-	std::queue<AVPacket *> waiting_packets;
-
 	AVStream *avstream_video, *avstream_audio;
-	KeyFrameSignalReceiver *keyframe_signal_receiver;
 };
 
 #endif  // !defined(_MUX_H)
diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp
index 041ac07..11d1c0d 100644
--- a/quicksync_encoder.cpp
+++ b/quicksync_encoder.cpp
@@ -1942,7 +1942,7 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
 	}
 
 	string video_extradata = "";  // FIXME: See other comment about global headers.
-	file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, file_audio_encoder->get_ctx(), TIMEBASE, nullptr));
+	file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, file_audio_encoder->get_ctx(), TIMEBASE));
 }
 
 void QuickSyncEncoderImpl::encode_thread_func()
diff --git a/video_encoder.cpp b/video_encoder.cpp
index eb1cd67..4b62e9b 100644
--- a/video_encoder.cpp
+++ b/video_encoder.cpp
@@ -120,7 +120,9 @@ void VideoEncoder::open_output_stream()
 	avctx->oformat = oformat;
 
 	uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
-	avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, &VideoEncoder::write_packet_thunk, nullptr);
+	avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
+	avctx->pb->write_data_type = &VideoEncoder::write_packet2_thunk;
+	avctx->pb->ignore_boundary_point = 1;
 
 	Mux::Codec video_codec;
 	if (global_flags.uncompressed_video_to_http) {
@@ -137,26 +139,22 @@ void VideoEncoder::open_output_stream()
 	}
 
 	int time_base = global_flags.stream_coarse_timebase ? COARSE_TIMEBASE : TIMEBASE;
-	stream_mux_writing_header = true;
-	stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_ctx(), time_base, this));
-	stream_mux_writing_header = false;
-	httpd->set_header(stream_mux_header);
-	stream_mux_header.clear();
+	stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_ctx(), time_base));
 }
 
-int VideoEncoder::write_packet_thunk(void *opaque, uint8_t *buf, int buf_size)
+int VideoEncoder::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
 {
 	VideoEncoder *video_encoder = (VideoEncoder *)opaque;
-	return video_encoder->write_packet(buf, buf_size);
+	return video_encoder->write_packet2(buf, buf_size, type, time);
 }
 
-int VideoEncoder::write_packet(uint8_t *buf, int buf_size)
+int VideoEncoder::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
 {
-	if (stream_mux_writing_header) {
+	if (type == AVIO_DATA_MARKER_HEADER) {
 		stream_mux_header.append((char *)buf, buf_size);
+		httpd->set_header(stream_mux_header);
 	} else {
-		httpd->add_data((char *)buf, buf_size, stream_mux_writing_keyframes);
-		stream_mux_writing_keyframes = false;
+		httpd->add_data((char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT);
 	}
 	return buf_size;
 }
diff --git a/video_encoder.h b/video_encoder.h
index 92f8357..fc3a570 100644
--- a/video_encoder.h
+++ b/video_encoder.h
@@ -25,7 +25,7 @@ namespace movit {
 class ResourcePool;
 }  // namespace movit
 
-class VideoEncoder : public KeyFrameSignalReceiver {
+class VideoEncoder {
 public:
 	VideoEncoder(movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, HTTPD *httpd);
 	~VideoEncoder();
@@ -37,14 +37,10 @@ public:
 	// Does a cut of the disk stream immediately ("frame" is used for the filename only).
 	void do_cut(int frame);
 
-	virtual void signal_keyframe() override {
-		stream_mux_writing_keyframes = true;
-	}
-
 private:
 	void open_output_stream();
-	static int write_packet_thunk(void *opaque, uint8_t *buf, int buf_size);
-	int write_packet(uint8_t *buf, int buf_size);
+	static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+	int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
 
 	AVOutputFormat *oformat;
 	std::mutex qs_mu;
@@ -59,13 +55,8 @@ private:
 	std::unique_ptr<AudioEncoder> stream_audio_encoder;
 	std::unique_ptr<X264Encoder> x264_encoder;  // nullptr if not using x264.
 
-	// While Mux object is constructing, <stream_mux_writing_header> is true,
-	// and the header is being collected into stream_mux_header.
-	bool stream_mux_writing_header;
 	std::string stream_mux_header;
 
-	bool stream_mux_writing_keyframes = false;
-
 	std::atomic<int> quicksync_encoders_in_shutdown{0};
 
 	// Encoders that are shutdown, but need to call release_gl_resources()
-- 
2.39.2