From 80a2892bbe07a4e065704830e9e7244d2b1139fa Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Sun, 22 Apr 2018 15:23:35 +0200
Subject: [PATCH] Add support for using FFmpeg video sources as audio.

---
 audio_mixer.cpp          | 47 +++++++++++++++++++++++++++++++++++-----
 audio_mixer.h            |  5 +++--
 ffmpeg_capture.cpp       |  1 -
 ffmpeg_capture.h         |  1 -
 input_mapping.cpp        | 10 ++++++++-
 input_mapping.h          |  2 +-
 input_mapping_dialog.cpp |  4 +++-
 mixer.cpp                | 38 +++++++++++++++++++++-----------
 mixer.h                  |  6 ++---
 state.proto              |  2 +-
 10 files changed, 87 insertions(+), 29 deletions(-)
diff --git a/audio_mixer.cpp b/audio_mixer.cpp
index 887b105..d2617f6 100644
--- a/audio_mixer.cpp
+++ b/audio_mixer.cpp
@@ -167,8 +167,10 @@ void deinterleave_samples(const vector<float> &in, vector<float> *out_l, vector<
 
 }  // namespace
 
-AudioMixer::AudioMixer(unsigned num_cards)
-	: num_cards(num_cards),
+AudioMixer::AudioMixer(unsigned num_capture_cards, unsigned num_ffmpeg_inputs)
+	: num_capture_cards(num_capture_cards),
+	  num_ffmpeg_inputs(num_ffmpeg_inputs),
+	  ffmpeg_inputs(new AudioDevice[num_ffmpeg_inputs]),
 	  limiter(OUTPUT_FREQUENCY),
 	  correlation(OUTPUT_FREQUENCY)
 {
@@ -387,6 +389,8 @@ AudioMixer::AudioDevice *AudioMixer::find_audio_device(DeviceSpec device)
 		return &video_cards[device.index];
 	case InputSourceType::ALSA_INPUT:
 		return &alsa_inputs[device.index];
+	case InputSourceType::FFMPEG_VIDEO_INPUT:
+		return &ffmpeg_inputs[device.index];
 	case InputSourceType::SILENCE:
 	default:
 		assert(false);
@@ -425,7 +429,8 @@ void AudioMixer::fill_audio_bus(const map<DeviceSpec, vector<float>> &samples_ca
 		memset(output, 0, num_samples * 2 * sizeof(*output));
 	} else {
 		assert(bus.device.type == InputSourceType::CAPTURE_CARD ||
-		       bus.device.type == InputSourceType::ALSA_INPUT);
+		       bus.device.type == InputSourceType::ALSA_INPUT ||
+		       bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT);
 		const float *lsrc, *rsrc;
 		unsigned lstride, rstride;
 		float *dptr = output;
@@ -455,6 +460,12 @@ vector<DeviceSpec> AudioMixer::get_active_devices() const
 			ret.push_back(device_spec);
 		}
 	}
+	for (unsigned card_index = 0; card_index < num_ffmpeg_inputs; ++card_index) {
+		const DeviceSpec device_spec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index};
+		if (!find_audio_device(device_spec)->interesting_channels.empty()) {
+			ret.push_back(device_spec);
+		}
+	}
 	return ret;
 }
 
@@ -878,7 +889,7 @@ map<DeviceSpec, DeviceInfo> AudioMixer::get_devices()
 	lock_guard<timed_mutex> lock(audio_mutex);
 
 	map<DeviceSpec, DeviceInfo> devices;
-	for (unsigned card_index = 0; card_index < num_cards; ++card_index) {
+	for (unsigned card_index = 0; card_index < num_capture_cards; ++card_index) {
 		const DeviceSpec spec{ InputSourceType::CAPTURE_CARD, card_index };
 		const AudioDevice *device = &video_cards[card_index];
 		DeviceInfo info;
@@ -898,6 +909,14 @@ map<DeviceSpec, DeviceInfo> AudioMixer::get_devices()
 		info.alsa_address = device.address;
 		devices.insert(make_pair(spec, info));
 	}
+	for (unsigned card_index = 0; card_index < num_ffmpeg_inputs; ++card_index) {
+		const DeviceSpec spec{ InputSourceType::FFMPEG_VIDEO_INPUT, card_index };
+		const AudioDevice *device = &ffmpeg_inputs[card_index];
+		DeviceInfo info;
+		info.display_name = device->display_name;
+		info.num_channels = 2;
+		devices.insert(make_pair(spec, info));
+	}
 	return devices;
 }
 
@@ -924,6 +943,11 @@ void AudioMixer::serialize_device(DeviceSpec device_spec, DeviceSpecProto *devic
 		case InputSourceType::ALSA_INPUT:
 			alsa_pool.serialize_device(device_spec.index, device_spec_proto);
 			break;
+		case InputSourceType::FFMPEG_VIDEO_INPUT:
+			device_spec_proto->set_type(DeviceSpecProto::FFMPEG_VIDEO_INPUT);
+			device_spec_proto->set_index(device_spec.index);
+			device_spec_proto->set_display_name(ffmpeg_inputs[device_spec.index].display_name);
+			break;
 	}
 }
 
@@ -976,12 +1000,15 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi
 	map<DeviceSpec, set<unsigned>> interesting_channels;
 	for (const InputMapping::Bus &bus : new_input_mapping.buses) {
 		if (bus.device.type == InputSourceType::CAPTURE_CARD ||
-		    bus.device.type == InputSourceType::ALSA_INPUT) {
+		    bus.device.type == InputSourceType::ALSA_INPUT ||
+		    bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT) {
 			for (unsigned channel = 0; channel < 2; ++channel) {
 				if (bus.source_channel[channel] != -1) {
 					interesting_channels[bus.device].insert(bus.source_channel[channel]);
 				}
 			}
+		} else {
+			assert(bus.device.type == InputSourceType::SILENCE);
 		}
 	}
 
@@ -1021,6 +1048,8 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi
 			metrics.labels.emplace_back("source_type", "capture_card");
 		} else if (bus.device.type == InputSourceType::ALSA_INPUT) {
 			metrics.labels.emplace_back("source_type", "alsa_input");
+		} else if (bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT) {
+			metrics.labels.emplace_back("source_type", "ffmpeg_video_input");
 		} else {
 			assert(false);
 		}
@@ -1064,6 +1093,14 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi
 			reset_resampler_mutex_held(device_spec);
 		}
 	}
+	for (unsigned card_index = 0; card_index < num_ffmpeg_inputs; ++card_index) {
+		const DeviceSpec device_spec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index};
+		AudioDevice *device = find_audio_device(device_spec);
+		if (device->interesting_channels != interesting_channels[device_spec]) {
+			device->interesting_channels = interesting_channels[device_spec];
+			reset_resampler_mutex_held(device_spec);
+		}
+	}
 
 	input_mapping = new_input_mapping;
 }
diff --git a/audio_mixer.h b/audio_mixer.h
index 17f9528..ebe142a 100644
--- a/audio_mixer.h
+++ b/audio_mixer.h
@@ -46,7 +46,7 @@ enum EQBand {
 
 class AudioMixer {
 public:
-	AudioMixer(unsigned num_cards);
+	AudioMixer(unsigned num_capture_cards, unsigned num_ffmpeg_inputs);
 	void reset_resampler(DeviceSpec device_spec);
 	void reset_meters();
 
@@ -327,13 +327,14 @@ private:
 	std::vector<DeviceSpec> get_active_devices() const;
 	void set_input_mapping_lock_held(const InputMapping &input_mapping);
 
-	unsigned num_cards;
+	unsigned num_capture_cards, num_ffmpeg_inputs;
 
 	mutable std::timed_mutex audio_mutex;
 
 	ALSAPool alsa_pool;
 	AudioDevice video_cards[MAX_VIDEO_CARDS];  // Under audio_mutex.
 	AudioDevice alsa_inputs[MAX_ALSA_CARDS];  // Under audio_mutex.
+	std::unique_ptr<AudioDevice[]> ffmpeg_inputs;  // Under audio_mutex.
 
 	std::atomic<float> locut_cutoff_hz{120};
 	StereoFilter locut[MAX_BUSES];  // Default cutoff 120 Hz, 24 dB/oct.
diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp
index c5f71b6..a0904dc 100644
--- a/ffmpeg_capture.cpp
+++ b/ffmpeg_capture.cpp
@@ -208,7 +208,6 @@ YCbCrFormat decode_ycbcr_format(const AVPixFmtDescriptor *desc, const AVFrame *f
 FFmpegCapture::FFmpegCapture(const string &filename, unsigned width, unsigned height)
 	: filename(filename), width(width), height(height), video_timebase{1, 1}
 {
-	// Not really used for anything.
 	description = "Video: " + filename;
 
 	last_frame = steady_clock::now();
diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h
index 0fbc3e8..336f6bd 100644
--- a/ffmpeg_capture.h
+++ b/ffmpeg_capture.h
@@ -17,7 +17,6 @@
 // changes parameters midway, which is allowed in some formats.
 //
 // You can get out the audio either as decoded or in raw form (Kaeru uses this).
-// However, the rest of Nageru can't really use the audio for anything yet.
 
 #include <assert.h>
 #include <stdint.h>
diff --git a/input_mapping.cpp b/input_mapping.cpp
index f894c95..45b6009 100644
--- a/input_mapping.cpp
+++ b/input_mapping.cpp
@@ -27,6 +27,9 @@ string spec_to_string(DeviceSpec device_spec)
 	case InputSourceType::ALSA_INPUT:
 		snprintf(buf, sizeof(buf), "ALSA input %u", device_spec.index);
 		return buf;
+	case InputSourceType::FFMPEG_VIDEO_INPUT:
+		snprintf(buf, sizeof(buf), "FFmpeg input %u", device_spec.index);
+		return buf;
 	default:
 		assert(false);
 	}
@@ -103,10 +106,15 @@ bool load_input_mapping_from_file(const map<DeviceSpec, DeviceInfo> &devices, co
 		case DeviceSpecProto::SILENCE:
 			device_mapping.push_back(DeviceSpec{InputSourceType::SILENCE, 0});
 			break;
+		case DeviceSpecProto::FFMPEG_VIDEO_INPUT:
 		case DeviceSpecProto::CAPTURE_CARD: {
 			// First see if there's a card that matches on both index and name.
-			DeviceSpec spec{InputSourceType::CAPTURE_CARD, unsigned(device_proto.index())};
+			DeviceSpec spec;
+			spec.type = (device_proto.type() == DeviceSpecProto::CAPTURE_CARD) ?
+				InputSourceType::CAPTURE_CARD : InputSourceType::FFMPEG_VIDEO_INPUT;
+			spec.index = unsigned(device_proto.index());
 			assert(devices.count(spec));
+
 			const DeviceInfo &dev = devices.find(spec)->second;
 			if (remaining_devices.count(spec) &&
 			    dev.display_name == device_proto.display_name()) {
diff --git a/input_mapping.h b/input_mapping.h
index 540fde3..67af0f4 100644
--- a/input_mapping.h
+++ b/input_mapping.h
@@ -6,7 +6,7 @@
 #include <string>
 #include <vector>
 
-enum class InputSourceType { SILENCE, CAPTURE_CARD, ALSA_INPUT };
+enum class InputSourceType { SILENCE, CAPTURE_CARD, ALSA_INPUT, FFMPEG_VIDEO_INPUT };
 struct DeviceSpec {
 	InputSourceType type;
 	unsigned index;
diff --git a/input_mapping_dialog.cpp b/input_mapping_dialog.cpp
index 9e12c98..e26649d 100644
--- a/input_mapping_dialog.cpp
+++ b/input_mapping_dialog.cpp
@@ -148,7 +148,8 @@ void InputMappingDialog::setup_channel_choices_from_bus(unsigned row, const Inpu
 		QComboBox *channel_combo = new QComboBox;
 		channel_combo->addItem(QString("(none)"));
 		if (bus.device.type == InputSourceType::CAPTURE_CARD ||
-		    bus.device.type == InputSourceType::ALSA_INPUT) {
+		    bus.device.type == InputSourceType::ALSA_INPUT ||
+		    bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT) {
 			auto device_it = devices.find(bus.device);
 			assert(device_it != devices.end());
 			unsigned num_device_channels = device_it->second.num_channels;
@@ -159,6 +160,7 @@ void InputMappingDialog::setup_channel_choices_from_bus(unsigned row, const Inpu
 			}
 			channel_combo->setCurrentIndex(bus.source_channel[channel] + 1);
 		} else {
+			assert(bus.device.type == InputSourceType::SILENCE);
 			channel_combo->setCurrentIndex(0);
 		}
 		connect(channel_combo, static_cast<void(QComboBox::*)(int)>(&QComboBox::currentIndexChanged),
diff --git a/mixer.cpp b/mixer.cpp
index cbc66d4..2365810 100644
--- a/mixer.cpp
+++ b/mixer.cpp
@@ -304,8 +304,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
 	  num_cards(num_cards),
 	  mixer_surface(create_surface(format)),
 	  h264_encoder_surface(create_surface(format)),
-	  decklink_output_surface(create_surface(format)),
-	  audio_mixer(num_cards)
+	  decklink_output_surface(create_surface(format))
 {
 	memcpy(ycbcr_interpretation, global_flags.ycbcr_interpretation, sizeof(ycbcr_interpretation));
 	CHECK(init_movit(MOVIT_SHADER_DIR, MOVIT_DEBUG_OFF));
@@ -361,6 +360,10 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
 	// Must be instantiated after VideoEncoder has initialized global_flags.use_zerocopy.
 	theme.reset(new Theme(global_flags.theme_filename, global_flags.theme_dirs, resource_pool.get(), num_cards));
 
+	// Must be instantiated after the theme, as the theme decides the number of FFmpeg inputs.
+	std::vector<FFmpegCapture *> video_inputs = theme->get_video_inputs();
+	audio_mixer.reset(new AudioMixer(num_cards, video_inputs.size()));
+
 	httpd.add_endpoint("/channels", bind(&Mixer::get_channels_json, this), HTTPD::ALLOW_ALL_ORIGINS);
 	for (int channel_idx = 2; channel_idx < theme->get_num_channels(); ++channel_idx) {
 		char url[256];
@@ -421,7 +424,6 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
 
 	// Initialize all video inputs the theme asked for. Note that these are
 	// all put _after_ the regular cards, which stop at <num_cards> - 1.
-	std::vector<FFmpegCapture *> video_inputs = theme->get_video_inputs();
 	for (unsigned video_card_index = 0; video_card_index < video_inputs.size(); ++card_index, ++video_card_index) {
 		if (card_index >= MAX_VIDEO_CARDS) {
 			fprintf(stderr, "ERROR: Not enough card slots available for the videos the theme requested.\n");
@@ -558,10 +560,15 @@ void Mixer::configure_card(unsigned card_index, CaptureInterface *capture, CardT
 
 	// NOTE: start_bm_capture() happens in thread_func().
 
-	DeviceSpec device{InputSourceType::CAPTURE_CARD, card_index};
-	audio_mixer.reset_resampler(device);
-	audio_mixer.set_display_name(device, card->capture->get_description());
-	audio_mixer.trigger_state_changed_callback();
+	DeviceSpec device;
+	if (card_type == CardType::FFMPEG_INPUT) {
+		device = DeviceSpec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index - num_cards};
+	} else {
+		device = DeviceSpec{InputSourceType::CAPTURE_CARD, card_index};
+	}
+	audio_mixer->reset_resampler(device);
+	audio_mixer->set_display_name(device, card->capture->get_description());
+	audio_mixer->trigger_state_changed_callback();
 
 	// Unregister old metrics, if any.
 	if (!card->labels.empty()) {
@@ -688,7 +695,12 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                      FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format,
 		     FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format)
 {
-	DeviceSpec device{InputSourceType::CAPTURE_CARD, card_index};
+	DeviceSpec device;
+	if (card_index >= num_cards) {
+		device = DeviceSpec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index - num_cards};
+	} else {
+		device = DeviceSpec{InputSourceType::CAPTURE_CARD, card_index};
+	}
 	CaptureCard *card = &cards[card_index];
 
 	++card->metric_input_received_frames;
@@ -723,7 +735,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 	assert(frame_length > 0);
 
 	size_t num_samples = (audio_frame.len > audio_offset) ? (audio_frame.len - audio_offset) / audio_format.num_channels / (audio_format.bits_per_sample / 8) : 0;
-	if (num_samples > OUTPUT_FREQUENCY / 10) {
+	if (num_samples > OUTPUT_FREQUENCY / 10 && card->type != CardType::FFMPEG_INPUT) {
 		printf("%s: Dropping frame with implausible audio length (len=%d, offset=%d) [timecode=0x%04x video_len=%d video_offset=%d video_format=%x)\n",
 			spec_to_string(device).c_str(), int(audio_frame.len), int(audio_offset),
 			timecode, int(video_frame.len), int(video_offset), video_format.id);
@@ -748,7 +760,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 	if (dropped_frames > MAX_FPS * 2) {
 		fprintf(stderr, "%s lost more than two seconds (or time code jumping around; from 0x%04x to 0x%04x), resetting resampler\n",
 			spec_to_string(device).c_str(), card->last_timecode, timecode);
-		audio_mixer.reset_resampler(device);
+		audio_mixer->reset_resampler(device);
 		dropped_frames = 0;
 		++card->metric_input_resets;
 	} else if (dropped_frames > 0) {
@@ -759,12 +771,12 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
 
 		bool success;
 		do {
-			success = audio_mixer.add_silence(device, silence_samples, dropped_frames, frame_length);
+			success = audio_mixer->add_silence(device, silence_samples, dropped_frames, frame_length);
 		} while (!success);
 	}
 
 	if (num_samples > 0) {
-		audio_mixer.add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, frame_length, audio_frame.received_timestamp);
+		audio_mixer->add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, frame_length, audio_frame.received_timestamp);
 	}
 
 	// Done with the audio, so release it.
@@ -1488,7 +1500,7 @@ void Mixer::audio_thread_func()
 
 		ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy =
 			task.adjust_rate ? ResamplingQueue::ADJUST_RATE : ResamplingQueue::DO_NOT_ADJUST_RATE;
-		vector<float> samples_out = audio_mixer.get_output(
+		vector<float> samples_out = audio_mixer->get_output(
 			task.frame_timestamp,
 			task.num_samples,
 			rate_adjustment_policy);
diff --git a/mixer.h b/mixer.h
index adfb7cf..d8a9c8b 100644
--- a/mixer.h
+++ b/mixer.h
@@ -288,8 +288,8 @@ public:
 	}
 
 	// Note: You can also get this through the global variable global_audio_mixer.
-	AudioMixer *get_audio_mixer() { return &audio_mixer; }
-	const AudioMixer *get_audio_mixer() const { return &audio_mixer; }
+	AudioMixer *get_audio_mixer() { return audio_mixer.get(); }
+	const AudioMixer *get_audio_mixer() const { return audio_mixer.get(); }
 
 	void schedule_cut()
 	{
@@ -548,7 +548,7 @@ private:
 	JitterHistory output_jitter_history;
 	CaptureCard cards[MAX_VIDEO_CARDS];  // Protected by <card_mutex>.
 	YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS];  // Protected by <card_mutex>.
-	AudioMixer audio_mixer;  // Same as global_audio_mixer (see audio_mixer.h).
+	std::unique_ptr<AudioMixer> audio_mixer;  // Same as global_audio_mixer (see audio_mixer.h).
 	bool input_card_is_master_clock(unsigned card_index, unsigned master_card_index) const;
 	struct OutputFrameInfo {
 		int dropped_frames;  // Since last frame.
diff --git a/state.proto b/state.proto
index 8ea6b97..6372e61 100644
--- a/state.proto
+++ b/state.proto
@@ -8,7 +8,7 @@ syntax = "proto2";
 // to the right device even if the devices have moved around.
 message DeviceSpecProto {
 	// Members from DeviceSpec itself.
-	enum InputSourceType { SILENCE = 0; CAPTURE_CARD = 1; ALSA_INPUT = 2; };
+	enum InputSourceType { SILENCE = 0; CAPTURE_CARD = 1; ALSA_INPUT = 2; FFMPEG_VIDEO_INPUT = 3; };
 	optional InputSourceType type = 1;
 	optional int32 index = 2;
 
-- 
2.39.2