From: Steinar H. Gunderson <sgunderson@bigfoot.com>
Date: Tue, 19 Apr 2016 21:46:26 +0000 (+0200)
Subject: Set duration for all video frames.
X-Git-Tag: 1.3.0~68
X-Git-Url: https://git.sesse.net/?p=nageru;a=commitdiff_plain;h=d4ffc0e71f0908d3ecc2e88b8675cfe4e93febe4

Set duration for all video frames.

Generally the muxer will ignore these, but there's one specific case
where it cannot: Since we flush before keyframes, the mov (MP4) mux
will not be able to set a proper duration based on the next frame
(usually it just does next_dts - dts), and thus guesses that it will be
the same as the previous one. If we dropped a frame between those two,
the duration of said last frame will be wrong -- and the keyframe
(starting in the next fragment) will get the wrong pts, possibly
seemingly even going backwards.

If we lose a frame between the last frame and the keyframe, the pts
of the keyframe will still be a bit wonky, but according to wbs
(who wrote the mux), it's much better like so.
---

diff --git a/h264encode.cpp b/h264encode.cpp
index 5f4ace9..3380ba4 100644
--- a/h264encode.cpp
+++ b/h264encode.cpp
@@ -133,14 +133,22 @@ class FrameReorderer {
 public:
 	FrameReorderer(unsigned queue_length, int width, int height);
 
+	struct Frame {
+		int64_t pts, duration;
+		uint8_t *data;
+
+		// Invert to get the smallest pts first.
+		bool operator< (const Frame &other) const { return pts > other.pts; }
+	};
+
 	// Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr.
 	// Does _not_ take ownership of data; a copy is taken if needed.
 	// The returned pointer is valid until the next call to reorder_frame, or destruction.
 	// As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
-	pair<int64_t, const uint8_t *> reorder_frame(int64_t pts, const uint8_t *data);
+	Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data);
 
 	// The same as reorder_frame, but without inserting anything. Used to empty the queue.
-	pair<int64_t, const uint8_t *> get_first_frame();
+	Frame get_first_frame();
 
 	bool empty() const { return frames.empty(); }
 
@@ -148,7 +156,7 @@ private:
 	unsigned queue_length;
 	int width, height;
 
-	priority_queue<pair<int64_t, uint8_t *>> frames;
+	priority_queue<Frame> frames;
 	stack<uint8_t *> freelist;  // Includes the last value returned from reorder_frame.
 
 	// Owns all the pointers. Normally, freelist and frames could do this themselves,
@@ -165,33 +173,32 @@ FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height)
 	}
 }
 
-pair<int64_t, const uint8_t *> FrameReorderer::reorder_frame(int64_t pts, const uint8_t *data)
+FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data)
 {
 	if (queue_length == 0) {
-		return make_pair(pts, data);
+		return Frame{pts, duration, data};
 	}
 
 	assert(!freelist.empty());
 	uint8_t *storage = freelist.top();
 	freelist.pop();
 	memcpy(storage, data, width * height * 2);
-	frames.emplace(-pts, storage);  // Invert pts to get smallest first.
+	frames.push(Frame{pts, duration, storage});
 
 	if (frames.size() >= queue_length) {
 		return get_first_frame();
 	} else {
-		return make_pair(-1, nullptr);
+		return Frame{-1, -1, nullptr};
 	}
 }
 
-pair<int64_t, const uint8_t *> FrameReorderer::get_first_frame()
+FrameReorderer::Frame FrameReorderer::get_first_frame()
 {
 	assert(!frames.empty());
-	pair<int64_t, uint8_t *> storage = frames.top();
+	Frame storage = frames.top();
 	frames.pop();
-	int64_t pts = storage.first;
-	freelist.push(storage.second);
-	return make_pair(-pts, storage.second);  // Re-invert pts (see reorder_frame()).
+	freelist.push(storage.data);
+	return storage;
 }
 
 class H264EncoderImpl : public KeyFrameSignalReceiver {
@@ -200,7 +207,7 @@ public:
 	~H264EncoderImpl();
 	void add_audio(int64_t pts, vector<float> audio);
 	bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
-	RefCountedGLsync end_frame(int64_t pts, const vector<RefCountedFrame> &input_frames);
+	RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames);
 	void shutdown();
 	void open_output_file(const std::string &filename);
 	void close_output_file();
@@ -214,12 +221,12 @@ private:
 		unsigned long long display_order;
 		int frame_type;
 		vector<float> audio;
-		int64_t pts, dts;
+		int64_t pts, dts, duration;
 	};
 	struct PendingFrame {
 		RefCountedGLsync fence;
 		vector<RefCountedFrame> input_frames;
-		int64_t pts;
+		int64_t pts, duration;
 	};
 
 	// So we never get negative dts.
@@ -229,9 +236,9 @@ private:
 
 	void encode_thread_func();
 	void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
-	void add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data);
+	void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data);
 	void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
-	                  int frame_type, int64_t pts, int64_t dts);
+	                  int frame_type, int64_t pts, int64_t dts, int64_t duration);
 	void storage_task_thread();
 	void encode_audio(const vector<float> &audio,
 	                  vector<float> *audio_queue,
@@ -1671,7 +1678,7 @@ void H264EncoderImpl::save_codeddata(storage_task task)
 		} else {
 			pkt.flags = 0;
 		}
-		//pkt.duration = 1;
+		pkt.duration = task.duration;
 		if (file_mux) {
 			file_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
 		}
@@ -2079,7 +2086,7 @@ void H264EncoderImpl::add_audio(int64_t pts, vector<float> audio)
 	frame_queue_nonempty.notify_all();
 }
 
-RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, const vector<RefCountedFrame> &input_frames)
+RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames)
 {
 	assert(!is_shutdown);
 
@@ -2118,7 +2125,7 @@ RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, const vector<RefCounted
 
 	{
 		unique_lock<mutex> lock(frame_queue_mutex);
-		pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts };
+		pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts, duration };
 		++current_storage_frame;
 	}
 	frame_queue_nonempty.notify_all();
@@ -2285,7 +2292,7 @@ void H264EncoderImpl::encode_thread_func()
 		}
 		last_dts = dts;
 
-		encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts);
+		encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration);
 	}
 }
 
@@ -2301,7 +2308,7 @@ void H264EncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int g
 		PendingFrame frame = move(pending_frame.second);
 		int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
 		printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
-		encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts);
+		encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration);
 		last_dts = dts;
 	}
 
@@ -2309,12 +2316,12 @@ void H264EncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int g
 	    global_flags.x264_video_to_http) {
 		// Add frames left in reorderer.
 		while (!reorderer->empty()) {
-			pair<int64_t, const uint8_t *> output_frame = reorderer->get_first_frame();
+			FrameReorderer::Frame output_frame = reorderer->get_first_frame();
 			if (global_flags.uncompressed_video_to_http) {
-				add_packet_for_uncompressed_frame(output_frame.first, output_frame.second);
+				add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
 			} else {
 				assert(global_flags.x264_video_to_http);
-				x264_encoder->add_frame(output_frame.first, output_frame.second);
+				x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
 			}
 		}
 	}
@@ -2346,7 +2353,7 @@ void H264EncoderImpl::encode_remaining_audio()
 	}
 }
 
-void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data)
+void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data)
 {
 	AVPacket pkt;
 	memset(&pkt, 0, sizeof(pkt));
@@ -2355,6 +2362,7 @@ void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, const uint8
 	pkt.size = frame_width * frame_height * 2;
 	pkt.stream_index = 0;
 	pkt.flags = AV_PKT_FLAG_KEY;
+	pkt.duration = duration;
 	stream_mux->add_packet(pkt, pts, pts);
 }
 
@@ -2376,7 +2384,7 @@ void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_
 }  // namespace
 
 void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
-                                   int frame_type, int64_t pts, int64_t dts)
+                                   int frame_type, int64_t pts, int64_t dts, int64_t duration)
 {
 	// Wait for the GPU to be done with the frame.
 	GLenum sync_status;
@@ -2414,13 +2422,13 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco
 		    global_flags.x264_video_to_http) {
 			// Add uncompressed video. (Note that pts == dts here.)
 			// Delay needs to match audio.
-			pair<int64_t, const uint8_t *> output_frame = reorderer->reorder_frame(pts + global_delay(), reinterpret_cast<uint8_t *>(surf->y_ptr));
-			if (output_frame.second != nullptr) {
+			FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr));
+			if (output_frame.data != nullptr) {
 				if (global_flags.uncompressed_video_to_http) {
-					add_packet_for_uncompressed_frame(output_frame.first, output_frame.second);
+					add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
 				} else {
 					assert(global_flags.x264_video_to_http);
-					x264_encoder->add_frame(output_frame.first, output_frame.second);
+					x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
 				}
 			}
 		}
@@ -2457,6 +2465,7 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco
 	tmp.frame_type = frame_type;
 	tmp.pts = pts;
 	tmp.dts = dts;
+	tmp.duration = duration;
 	storage_task_enqueue(move(tmp));
 
 	update_ReferenceFrames(frame_type);
@@ -2479,9 +2488,9 @@ bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
 	return impl->begin_frame(y_tex, cbcr_tex);
 }
 
-RefCountedGLsync H264Encoder::end_frame(int64_t pts, const vector<RefCountedFrame> &input_frames)
+RefCountedGLsync H264Encoder::end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames)
 {
-	return impl->end_frame(pts, input_frames);
+	return impl->end_frame(pts, duration, input_frames);
 }
 
 void H264Encoder::shutdown()
diff --git a/h264encode.h b/h264encode.h
index 527074e..aeeabb8 100644
--- a/h264encode.h
+++ b/h264encode.h
@@ -50,7 +50,7 @@ public:
 
 	void add_audio(int64_t pts, std::vector<float> audio);
 	bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
-	RefCountedGLsync end_frame(int64_t pts, const std::vector<RefCountedFrame> &input_frames);
+	RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector<RefCountedFrame> &input_frames);
 	void shutdown();  // Blocking.
 
 	// You can only have one going at the same time.
diff --git a/mixer.cpp b/mixer.cpp
index 3d4cccb..3134dd1 100644
--- a/mixer.cpp
+++ b/mixer.cpp
@@ -676,9 +676,10 @@ void Mixer::thread_func()
 			}
 		}
 
-		render_one_frame();
+		int64_t duration = new_frames[master_card_index].length;
+		render_one_frame(duration);
 		++frame;
-		pts_int += new_frames[master_card_index].length;
+		pts_int += duration;
 
 		clock_gettime(CLOCK_MONOTONIC, &now);
 		double elapsed = now.tv_sec - start.tv_sec +
@@ -774,7 +775,7 @@ void Mixer::schedule_audio_resampling_tasks(unsigned dropped_frames, int num_sam
 	}
 }
 
-void Mixer::render_one_frame()
+void Mixer::render_one_frame(int64_t duration)
 {
 	// Get the main chain from the theme, and set its state immediately.
 	Theme::Chain theme_main_chain = theme->get_chain(0, pts(), WIDTH, HEIGHT, input_state);
@@ -805,7 +806,7 @@ void Mixer::render_one_frame()
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 
 	const int64_t av_delay = TIMEBASE / 10;  // Corresponds to the fixed delay in resampling_queue.h. TODO: Make less hard-coded.
-	RefCountedGLsync fence = h264_encoder->end_frame(pts_int + av_delay, theme_main_chain.input_frames);
+	RefCountedGLsync fence = h264_encoder->end_frame(pts_int + av_delay, duration, theme_main_chain.input_frames);
 
 	// The live frame just shows the RGBA texture we just rendered.
 	// It owns rgba_tex now.
diff --git a/mixer.h b/mixer.h
index 6f04c67..c7d7e18 100644
--- a/mixer.h
+++ b/mixer.h
@@ -356,7 +356,7 @@ private:
 	void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1);
 	void thread_func();
 	void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame);
-	void render_one_frame();
+	void render_one_frame(int64_t duration);
 	void send_audio_level_callback();
 	void audio_thread_func();
 	void process_audio_one_frame(int64_t frame_pts_int, int num_samples);
diff --git a/x264encode.cpp b/x264encode.cpp
index 7812793..25b4423 100644
--- a/x264encode.cpp
+++ b/x264encode.cpp
@@ -30,10 +30,11 @@ X264Encoder::~X264Encoder()
 	encoder_thread.join();
 }
 
-void X264Encoder::add_frame(int64_t pts, const uint8_t *data)
+void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data)
 {
 	QueuedFrame qf;
 	qf.pts = pts;
+	qf.duration = duration;
 
 	{
 		lock_guard<mutex> lock(mu);
@@ -113,6 +114,7 @@ void X264Encoder::encoder_thread_func()
 				queued_frames.pop();
 			} else {
 				qf.pts = -1;
+				qf.duration = -1;
 				qf.data = nullptr;
 			}
 
@@ -149,6 +151,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
 		pic.img.i_stride[0] = WIDTH;
 		pic.img.plane[1] = qf.data + WIDTH * HEIGHT;
 		pic.img.i_stride[1] = WIDTH / 2 * sizeof(uint16_t);
+		pic.opaque = reinterpret_cast<void *>(intptr_t(qf.duration));
 
 		x264_encoder_encode(x264, &nal, &num_nal, &pic, &pic);
 	} else {
@@ -181,6 +184,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
 	} else {
 		pkt.flags = 0;
 	}
+	pkt.duration = reinterpret_cast<intptr_t>(pic.opaque);
 
 	mux->add_packet(pkt, pic.i_pts, pic.i_dts);
-}	
+}
diff --git a/x264encode.h b/x264encode.h
index 6f0d6a5..e146cd2 100644
--- a/x264encode.h
+++ b/x264encode.h
@@ -41,11 +41,11 @@ public:
 
 	// <data> is taken to be raw NV12 data of WIDTHxHEIGHT resolution.
 	// Does not block.
-	void add_frame(int64_t pts, const uint8_t *data);
+	void add_frame(int64_t pts, int64_t duration, const uint8_t *data);
 
 private:
 	struct QueuedFrame {
-		int64_t pts;
+		int64_t pts, duration;
 		uint8_t *data;
 	};
 	void encoder_thread_func();