From 312ed2563ea113dc56960cbfedffd2ca32011a7d Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Fri, 23 Sep 2016 19:36:51 +0200
Subject: [PATCH] Do not use the timing of dropped frames as part of the video
 master clock.

Hopefully improves resampling somewhat when we are dropping frames;
it is tricky to deal with such an uneven master clock, though.
---
 bmusb                |   2 +-
 mixer.cpp            |  30 ++++++++++--
 mixer.h              |   3 +-
 resampling_queue.cpp | 109 ++++++++++++++++++++++---------------------
 resampling_queue.h   |  11 ++++-
 5 files changed, 95 insertions(+), 60 deletions(-)
diff --git a/bmusb b/bmusb
index e0837a1..a765e06 160000
--- a/bmusb
+++ b/bmusb
@@ -1 +1 @@
-Subproject commit e0837a17b5a497476d67237c768836e51f8a4ce7
+Subproject commit a765e066b74ac52ff0abf239d430d6f8d83f792e
diff --git a/mixer.cpp b/mixer.cpp
index 9e281cb..f8ad584 100644
--- a/mixer.cpp
+++ b/mixer.cpp
@@ -855,13 +855,27 @@ void Mixer::schedule_audio_resampling_tasks(unsigned dropped_frames, int num_sam
 	// Resample the audio as needed, including from previously dropped frames.
 	assert(num_cards > 0);
 	for (unsigned frame_num = 0; frame_num < dropped_frames + 1; ++frame_num) {
+		const bool dropped_frame = (frame_num != dropped_frames);
 		{
 			// Signal to the audio thread to process this frame.
+			// Note that if the frame is a dropped frame, we signal that
+			// we don't want to use this frame as base for adjusting
+			// the resampler rate. The reason for this is that the timing
+			// of these frames is often way too late; they typically don't
+			// âarriveâ before we synthesize them. Thus, we could end up
+			// in a situation where we have inserted e.g. five audio frames
+			// into the queue before we then start pulling five of them
+			// back out. This makes ResamplingQueue overestimate the delay,
+			// causing undue resampler changes. (We _do_ use the last,
+			// non-dropped frame; perhaps we should just discard that as well,
+			// since dropped frames are expected to be rare, and it might be
+			// better to just wait until we have a slightly more normal situation).
 			unique_lock<mutex> lock(audio_mutex);
-			audio_task_queue.push(AudioTask{pts_int, num_samples_per_frame});
+			bool adjust_rate = !dropped_frame;
+			audio_task_queue.push(AudioTask{pts_int, num_samples_per_frame, adjust_rate});
 			audio_task_queue_changed.notify_one();
 		}
-		if (frame_num != dropped_frames) {
+		if (dropped_frame) {
 			// For dropped frames, increase the pts. Note that if the format changed
 			// in the meantime, we have no way of detecting that; we just have to
 			// assume the frame length is always the same.
@@ -961,11 +975,11 @@ void Mixer::audio_thread_func()
 			audio_task_queue.pop();
 		}
 
-		process_audio_one_frame(task.pts_int, task.num_samples);
+		process_audio_one_frame(task.pts_int, task.num_samples, task.adjust_rate);
 	}
 }
 
-void Mixer::process_audio_one_frame(int64_t frame_pts_int, int num_samples)
+void Mixer::process_audio_one_frame(int64_t frame_pts_int, int num_samples, bool adjust_rate)
 {
 	vector<float> samples_card;
 	vector<float> samples_out;
@@ -978,7 +992,13 @@ void Mixer::process_audio_one_frame(int64_t frame_pts_int, int num_samples)
 		samples_card.resize(num_samples * 2);
 		{
 			unique_lock<mutex> lock(cards[card_index].audio_mutex);
-			cards[card_index].resampling_queue->get_output_samples(double(frame_pts_int) / TIMEBASE, &samples_card[0], num_samples);
+			ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy =
+				adjust_rate ? ResamplingQueue::ADJUST_RATE : ResamplingQueue::DO_NOT_ADJUST_RATE;
+			cards[card_index].resampling_queue->get_output_samples(
+				double(frame_pts_int) / TIMEBASE,
+				&samples_card[0],
+				num_samples,
+				rate_adjustment_policy);
 		}
 		if (card_index == selected_audio_card) {
 			samples_out = move(samples_card);
diff --git a/mixer.h b/mixer.h
index 560827a..75867cc 100644
--- a/mixer.h
+++ b/mixer.h
@@ -418,7 +418,7 @@ private:
 	void render_one_frame(int64_t duration);
 	void send_audio_level_callback();
 	void audio_thread_func();
-	void process_audio_one_frame(int64_t frame_pts_int, int num_samples);
+	void process_audio_one_frame(int64_t frame_pts_int, int num_samples, bool adjust_rate);
 	void subsample_chroma(GLuint src_tex, GLuint dst_dst);
 	void release_display_frame(DisplayFrame *frame);
 	double pts() { return double(pts_int) / TIMEBASE; }
@@ -553,6 +553,7 @@ private:
 	struct AudioTask {
 		int64_t pts_int;
 		int num_samples;
+		bool adjust_rate;
 	};
 	std::mutex audio_mutex;
 	std::condition_variable audio_task_queue_changed;
diff --git a/resampling_queue.cpp b/resampling_queue.cpp
index d329447..88b711e 100644
--- a/resampling_queue.cpp
+++ b/resampling_queue.cpp
@@ -61,7 +61,7 @@ void ResamplingQueue::add_input_samples(double pts, const float *samples, ssize_
 	}
 }
 
-bool ResamplingQueue::get_output_samples(double pts, float *samples, ssize_t num_samples)
+bool ResamplingQueue::get_output_samples(double pts, float *samples, ssize_t num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy)
 {
 	assert(num_samples > 0);
 	if (first_input) {
@@ -70,58 +70,63 @@ bool ResamplingQueue::get_output_samples(double pts, float *samples, ssize_t num
 		return true;
 	}
 
-	double last_output_len;
-	if (first_output) {
-		// Synthesize a fake length.
-		last_output_len = double(num_samples) / freq_out;
-	} else {
-		last_output_len = pts - last_output_pts;
-	}
-	last_output_pts = pts;
-
-	// Using the time point since just before the last call to add_input_samples() as a base,
-	// estimate actual delay based on activity since then, measured in number of input samples:
-	double actual_delay = 0.0;
-	assert(last_input_len != 0);
-	actual_delay += (k_a1 - k_a0) * last_output_len / last_input_len;    // Inserted samples since k_a0, rescaled for the different time periods.
-	actual_delay += k_a0 - total_consumed_samples;                       // Samples inserted before k_a0 but not consumed yet.
-	actual_delay += vresampler.inpdist();                                // Delay in the resampler itself.
-	double err = actual_delay - expected_delay;
-	if (first_output && err < 0.0) {
-		// Before the very first block, insert artificial delay based on our initial estimate,
-		// so that we don't need a long period to stabilize at the beginning.
-		int delay_samples_to_add = lrintf(-err);
-		for (ssize_t i = 0; i < delay_samples_to_add * num_channels; ++i) {
-			buffer.push_front(0.0f);
+	double rcorr = -1.0;
+	if (rate_adjustment_policy == ADJUST_RATE) {
+		double last_output_len;
+		if (first_output) {
+			// Synthesize a fake length.
+			last_output_len = double(num_samples) / freq_out;
+		} else {
+			last_output_len = pts - last_output_pts;
 		}
-		total_consumed_samples -= delay_samples_to_add;  // Equivalent to increasing k_a0 and k_a1.
-		err += delay_samples_to_add;
-	}
-	first_output = false;
-
-	// Compute loop filter coefficients for the two filters. We need to compute them
-	// every time, since they depend on the number of samples the user asked for.
-	//
-	// The loop bandwidth is at 0.02 Hz; we trust the initial estimate quite well,
-	// and our jitter is pretty large since none of the threads involved run at
-	// real-time priority.
-	double loop_bandwidth_hz = 0.02;
-
-	// Set filters. The first filter much wider than the first one (20x as wide).
-	double w = (2.0 * M_PI) * loop_bandwidth_hz * num_samples / freq_out;
-	double w0 = 1.0 - exp(-20.0 * w);
-	double w1 = w * 1.5 / num_samples / ratio;
-	double w2 = w / 1.5;
-
-	// Filter <err> through the loop filter to find the correction ratio.
-	z1 += w0 * (w1 * err - z1);
-	z2 += w0 * (z1 - z2);
-	z3 += w2 * z2;
-	double rcorr = 1.0 - z2 - z3;
-	if (rcorr > 1.05) rcorr = 1.05;
-	if (rcorr < 0.95) rcorr = 0.95;
-	assert(!isnan(rcorr));
-	vresampler.set_rratio(rcorr);
+		last_output_pts = pts;
+
+		// Using the time point since just before the last call to add_input_samples() as a base,
+		// estimate actual delay based on activity since then, measured in number of input samples:
+		double actual_delay = 0.0;
+		assert(last_input_len != 0);
+		actual_delay += (k_a1 - k_a0) * last_output_len / last_input_len;    // Inserted samples since k_a0, rescaled for the different time periods.
+		actual_delay += k_a0 - total_consumed_samples;                       // Samples inserted before k_a0 but not consumed yet.
+		actual_delay += vresampler.inpdist();                                // Delay in the resampler itself.
+		double err = actual_delay - expected_delay;
+		if (first_output && err < 0.0) {
+			// Before the very first block, insert artificial delay based on our initial estimate,
+			// so that we don't need a long period to stabilize at the beginning.
+			int delay_samples_to_add = lrintf(-err);
+			for (ssize_t i = 0; i < delay_samples_to_add * num_channels; ++i) {
+				buffer.push_front(0.0f);
+			}
+			total_consumed_samples -= delay_samples_to_add;  // Equivalent to increasing k_a0 and k_a1.
+			err += delay_samples_to_add;
+		}
+		first_output = false;
+
+		// Compute loop filter coefficients for the two filters. We need to compute them
+		// every time, since they depend on the number of samples the user asked for.
+		//
+		// The loop bandwidth is at 0.02 Hz; we trust the initial estimate quite well,
+		// and our jitter is pretty large since none of the threads involved run at
+		// real-time priority.
+		double loop_bandwidth_hz = 0.02;
+
+		// Set filters. The first filter much wider than the first one (20x as wide).
+		double w = (2.0 * M_PI) * loop_bandwidth_hz * num_samples / freq_out;
+		double w0 = 1.0 - exp(-20.0 * w);
+		double w1 = w * 1.5 / num_samples / ratio;
+		double w2 = w / 1.5;
+
+		// Filter <err> through the loop filter to find the correction ratio.
+		z1 += w0 * (w1 * err - z1);
+		z2 += w0 * (z1 - z2);
+		z3 += w2 * z2;
+		rcorr = 1.0 - z2 - z3;
+		if (rcorr > 1.05) rcorr = 1.05;
+		if (rcorr < 0.95) rcorr = 0.95;
+		assert(!isnan(rcorr));
+		vresampler.set_rratio(rcorr);
+	} else {
+		assert(rate_adjustment_policy == DO_NOT_ADJUST_RATE);
+	};
 
 	// Finally actually resample, consuming exactly <num_samples> output samples.
 	vresampler.out_data = samples;
diff --git a/resampling_queue.h b/resampling_queue.h
index cd5b44a..339e41b 100644
--- a/resampling_queue.h
+++ b/resampling_queue.h
@@ -52,9 +52,18 @@ public:
 	// card_num is for debugging outputs only.
 	ResamplingQueue(unsigned card_num, unsigned freq_in, unsigned freq_out, unsigned num_channels = 2);
 
+	// If policy is DO_NOT_ADJUST_RATE, the resampling rate will not be changed.
+	// This is primarily useful if you have an extraordinary situation, such as
+	// dropped frames.
+	enum RateAdjustmentPolicy {
+		DO_NOT_ADJUST_RATE,
+		ADJUST_RATE
+	};
+
 	// Note: pts is always in seconds.
 	void add_input_samples(double pts, const float *samples, ssize_t num_samples);
-	bool get_output_samples(double pts, float *samples, ssize_t num_samples);  // Returns false if underrun.
+	// Returns false if underrun.
+	bool get_output_samples(double pts, float *samples, ssize_t num_samples, RateAdjustmentPolicy rate_adjustment_policy);
 
 private:
 	void init_loop_filter(double bandwidth_hz);
-- 
2.39.2