X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=nageru%2Faudio_mixer.cpp;h=07d10dad3650a9ca037263d661b4d5deca4adbf2;hb=4c23038d9c18d22a9bfe577244a2d0fc293a7e6a;hp=9e7dd59a0dbc64ab6398e30824c8dae6d676b912;hpb=9b7d691b4cc5db7dbfc18c82e86c1207fcac4722;p=nageru

diff --git a/nageru/audio_mixer.cpp b/nageru/audio_mixer.cpp
index 9e7dd59..07d10da 100644
--- a/nageru/audio_mixer.cpp
+++ b/nageru/audio_mixer.cpp
@@ -18,11 +18,12 @@
 #include <limits>
 #include <utility>
 
-#include "db.h"
+#include "decibel.h"
+#include "delay_analyzer.h"
 #include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 #include "state.pb.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 
 using namespace bmusb;
 using namespace std;
@@ -52,6 +53,26 @@ void convert_fixed16_to_fp32(float *dst, size_t out_channel, size_t out_num_chan
 	}
 }
 
+void convert_fixed16_to_fixed32(int32_t *dst, size_t out_channel, size_t out_num_channels,
+                                const uint8_t *src, size_t in_channel, size_t in_num_channels,
+                                size_t num_samples)
+{
+	assert(in_channel < in_num_channels);
+	assert(out_channel < out_num_channels);
+	src += in_channel * 2;
+	dst += out_channel;
+
+	for (size_t i = 0; i < num_samples; ++i) {
+		uint32_t s = uint32_t(uint16_t(le16toh(*(int16_t *)src))) << 16;
+
+		// Keep the sign bit in place, repeat the other 15 bits as far as they go.
+		*dst = s | ((s & 0x7fffffff) >> 15) | ((s & 0x7fffffff) >> 30);
+
+		src += 2 * in_num_channels;
+		dst += out_num_channels;
+	}
+}
+
 void convert_fixed24_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
                              size_t num_samples)
@@ -65,8 +86,31 @@ void convert_fixed24_to_fp32(float *dst, size_t out_channel, size_t out_num_chan
 		uint32_t s1 = src[0];
 		uint32_t s2 = src[1];
 		uint32_t s3 = src[2];
-		uint32_t s = s1 | (s1 << 8) | (s2 << 16) | (s3 << 24);
-		*dst = int(s) * (1.0f / 2147483648.0f);
+		uint32_t s = (s1 << 8) | (s2 << 16) | (s3 << 24);  // Note: The bottom eight bits are zero; s3 includes the sign bit.
+		*dst = int(s) * (1.0f / (256.0f * 8388608.0f));  // 256 for signed down-shift by 8, then 2^23 for the actual conversion.
+
+		src += 3 * in_num_channels;
+		dst += out_num_channels;
+	}
+}
+
+void convert_fixed24_to_fixed32(int32_t *dst, size_t out_channel, size_t out_num_channels,
+                                const uint8_t *src, size_t in_channel, size_t in_num_channels,
+                                size_t num_samples)
+{
+	assert(in_channel < in_num_channels);
+	assert(out_channel < out_num_channels);
+	src += in_channel * 3;
+	dst += out_channel;
+
+	for (size_t i = 0; i < num_samples; ++i) {
+		uint32_t s1 = src[0];
+		uint32_t s2 = src[1];
+		uint32_t s3 = src[2];
+		uint32_t s = (s1 << 8) | (s2 << 16) | (s3 << 24);
+
+		// Keep the sign bit in place, repeat the other 23 bits as far as they go.
+		*dst = s | ((s & 0x7fffffff) >> 23);
 
 		src += 3 * in_num_channels;
 		dst += out_num_channels;
@@ -91,6 +135,25 @@ void convert_fixed32_to_fp32(float *dst, size_t out_channel, size_t out_num_chan
 	}
 }
 
+// Basically just a reinterleave.
+void convert_fixed32_to_fixed32(int32_t *dst, size_t out_channel, size_t out_num_channels,
+                                const uint8_t *src, size_t in_channel, size_t in_num_channels,
+                                size_t num_samples)
+{
+	assert(in_channel < in_num_channels);
+	assert(out_channel < out_num_channels);
+	src += in_channel * 4;
+	dst += out_channel;
+
+	for (size_t i = 0; i < num_samples; ++i) {
+		int32_t s = le32toh(*(int32_t *)src);
+		*dst = s;
+
+		src += 4 * in_num_channels;
+		dst += out_num_channels;
+	}
+}
+
 float find_peak_plain(const float *samples, size_t num_samples) __attribute__((unused));
 
 float find_peak_plain(const float *samples, size_t num_samples)
@@ -165,6 +228,14 @@ void deinterleave_samples(const vector<float> &in, vector<float> *out_l, vector<
 	}
 }
 
+double get_delay_seconds(double extra_delay_ms)
+{
+	// Make sure we never get negative delay. Even 1 ms is probably way less than we
+	// could ever hope to actually have; this is just a failsafe.
+	double delay_ms = max(global_flags.audio_queue_length_ms + extra_delay_ms, 1.0);
+	return delay_ms * 0.001;
+}
+
 }  // namespace
 
 AudioMixer::AudioMixer(unsigned num_capture_cards, unsigned num_ffmpeg_inputs)
@@ -201,12 +272,12 @@ AudioMixer::AudioMixer(unsigned num_capture_cards, unsigned num_ffmpeg_inputs)
 		// Must happen after ALSAPool is initialized, as it needs to know the card list.
 		current_mapping_mode = MappingMode::MULTICHANNEL;
 		InputMapping new_input_mapping;
-		if (!load_input_mapping_from_file(get_devices(),
+		if (!load_input_mapping_from_file(get_devices(HOLD_ALSA_DEVICES),
 		                                  global_flags.input_mapping_filename,
 		                                  &new_input_mapping)) {
 			fprintf(stderr, "Failed to load input mapping from '%s', exiting.\n",
 				global_flags.input_mapping_filename.c_str());
-			exit(1);
+			abort();
 		}
 		set_input_mapping(new_input_mapping);
 	} else {
@@ -240,12 +311,16 @@ void AudioMixer::reset_resampler_mutex_held(DeviceSpec device_spec)
 	} else {
 		device->resampling_queue.reset(new ResamplingQueue(
 			device_spec, device->capture_frequency, OUTPUT_FREQUENCY, device->interesting_channels.size(),
-			global_flags.audio_queue_length_ms * 0.001));
+			get_delay_seconds(device->extra_delay_ms)));
 	}
 }
 
-bool AudioMixer::add_audio(DeviceSpec device_spec, const uint8_t *data, unsigned num_samples, AudioFormat audio_format, int64_t frame_length, steady_clock::time_point frame_time)
+bool AudioMixer::add_audio(DeviceSpec device_spec, const uint8_t *data, unsigned num_samples, AudioFormat audio_format, steady_clock::time_point frame_time)
 {
+	if (delay_analyzer != nullptr && delay_analyzer->is_grabbing()) {
+		delay_analyzer->add_audio(device_spec, data, num_samples, audio_format, frame_time);
+	}
+
 	AudioDevice *device = find_audio_device(device_spec);
 
 	unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
@@ -264,18 +339,43 @@ bool AudioMixer::add_audio(DeviceSpec device_spec, const uint8_t *data, unsigned
 	unique_ptr<float[]> audio(new float[num_samples * num_channels]);
 	unsigned channel_index = 0;
 	for (auto channel_it = device->interesting_channels.cbegin(); channel_it != device->interesting_channels.end(); ++channel_it, ++channel_index) {
+		convert_audio_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format, num_samples);
+	}
+
+	// If we changed frequency since last frame, we'll need to reset the resampler.
+	if (audio_format.sample_rate != device->capture_frequency) {
+		device->capture_frequency = audio_format.sample_rate;
+		reset_resampler_mutex_held(device_spec);
+	}
+
+	// Now add it.
+	device->resampling_queue->add_input_samples(frame_time, audio.get(), num_samples, ResamplingQueue::ADJUST_RATE);
+	return true;
+}
+
+// Converts all channels.
+vector<int32_t> convert_audio_to_fixed32(const uint8_t *data, unsigned num_samples, bmusb::AudioFormat audio_format, unsigned num_channels)
+{
+	vector<int32_t> audio;
+
+	if (num_channels > audio_format.num_channels) {
+		audio.resize(num_samples * num_channels, 0);
+	} else {
+		audio.resize(num_samples * num_channels);
+	}
+	for (unsigned channel_index = 0; channel_index < num_channels && channel_index < audio_format.num_channels; ++channel_index) {
 		switch (audio_format.bits_per_sample) {
 		case 0:
 			assert(num_samples == 0);
 			break;
 		case 16:
-			convert_fixed16_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
+			convert_fixed16_to_fixed32(&audio[0], channel_index, num_channels, data, channel_index, audio_format.num_channels, num_samples);
 			break;
 		case 24:
-			convert_fixed24_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
+			convert_fixed24_to_fixed32(&audio[0], channel_index, num_channels, data, channel_index, audio_format.num_channels, num_samples);
 			break;
 		case 32:
-			convert_fixed32_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
+			convert_fixed32_to_fixed32(&audio[0], channel_index, num_channels, data, channel_index, audio_format.num_channels, num_samples);
 			break;
 		default:
 			fprintf(stderr, "Cannot handle audio with %u bits per sample\n", audio_format.bits_per_sample);
@@ -283,18 +383,34 @@ bool AudioMixer::add_audio(DeviceSpec device_spec, const uint8_t *data, unsigned
 		}
 	}
 
-	// If we changed frequency since last frame, we'll need to reset the resampler.
-	if (audio_format.sample_rate != device->capture_frequency) {
-		device->capture_frequency = audio_format.sample_rate;
-		reset_resampler_mutex_held(device_spec);
-	}
+	return audio;
+}
 
-	// Now add it.
-	device->resampling_queue->add_input_samples(frame_time, audio.get(), num_samples, ResamplingQueue::ADJUST_RATE);
-	return true;
+// Converts only one channel.
+void convert_audio_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
+                           const uint8_t *src, size_t in_channel, bmusb::AudioFormat in_audio_format,
+                           size_t num_samples)
+{
+	switch (in_audio_format.bits_per_sample) {
+	case 0:
+		assert(num_samples == 0);
+		break;
+	case 16:
+		convert_fixed16_to_fp32(dst, out_channel, out_num_channels, src, in_channel, in_audio_format.num_channels, num_samples);
+		break;
+	case 24:
+		convert_fixed24_to_fp32(dst, out_channel, out_num_channels, src, in_channel, in_audio_format.num_channels, num_samples);
+		break;
+	case 32:
+		convert_fixed32_to_fp32(dst, out_channel, out_num_channels, src, in_channel, in_audio_format.num_channels, num_samples);
+		break;
+	default:
+		fprintf(stderr, "Cannot handle audio with %u bits per sample\n", in_audio_format.bits_per_sample);
+		assert(false);
+	}
 }
 
-bool AudioMixer::add_silence(DeviceSpec device_spec, unsigned samples_per_frame, unsigned num_frames, int64_t frame_length)
+bool AudioMixer::add_silence(DeviceSpec device_spec, unsigned samples_per_frame, unsigned num_frames)
 {
 	AudioDevice *device = find_audio_device(device_spec);
 
@@ -923,7 +1039,7 @@ void AudioMixer::send_audio_level_callback()
 		correlation.get_correlation());
 }
 
-map<DeviceSpec, DeviceInfo> AudioMixer::get_devices()
+map<DeviceSpec, DeviceInfo> AudioMixer::get_devices(HoldDevices hold_devices)
 {
 	lock_guard<timed_mutex> lock(audio_mutex);
 
@@ -936,7 +1052,7 @@ map<DeviceSpec, DeviceInfo> AudioMixer::get_devices()
 		info.num_channels = 8;
 		devices.insert(make_pair(spec, info));
 	}
-	vector<ALSAPool::Device> available_alsa_devices = alsa_pool.get_devices();
+	vector<ALSAPool::Device> available_alsa_devices = alsa_pool.get_devices(hold_devices);
 	for (unsigned card_index = 0; card_index < available_alsa_devices.size(); ++card_index) {
 		const DeviceSpec spec{ InputSourceType::ALSA_INPUT, card_index };
 		const ALSAPool::Device &device = available_alsa_devices[card_index];
@@ -1006,6 +1122,8 @@ void AudioMixer::set_simple_input(unsigned card_index)
 
 	new_input_mapping.buses.push_back(input);
 
+	// NOTE: Delay is implicitly at 0.0 ms, since none has been set in the mapping.
+
 	lock_guard<timed_mutex> lock(audio_mutex);
 	current_mapping_mode = MappingMode::SIMPLE;
 	set_input_mapping_lock_held(new_input_mapping);
@@ -1119,17 +1237,25 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi
 	}
 
 	// Reset resamplers for all cards that don't have the exact same state as before.
+	map<DeviceSpec, double> new_extra_delay_ms = new_input_mapping.extra_delay_ms;  // Convenience so we can use [].
 	for (unsigned card_index = 0; card_index < MAX_VIDEO_CARDS; ++card_index) {
 		const DeviceSpec device_spec{InputSourceType::CAPTURE_CARD, card_index};
 		AudioDevice *device = find_audio_device(device_spec);
+		double extra_delay_ms = new_extra_delay_ms[device_spec];
 		if (device->interesting_channels != interesting_channels[device_spec]) {
 			device->interesting_channels = interesting_channels[device_spec];
+			device->extra_delay_ms = extra_delay_ms;
 			reset_resampler_mutex_held(device_spec);
+		} else if (device->extra_delay_ms != extra_delay_ms &&
+		           device->resampling_queue != nullptr) {
+			device->extra_delay_ms = extra_delay_ms;
+			device->resampling_queue->change_expected_delay(get_delay_seconds(extra_delay_ms));
 		}
 	}
 	for (unsigned card_index = 0; card_index < MAX_ALSA_CARDS; ++card_index) {
 		const DeviceSpec device_spec{InputSourceType::ALSA_INPUT, card_index};
 		AudioDevice *device = find_audio_device(device_spec);
+		double extra_delay_ms = new_extra_delay_ms[device_spec];
 		if (interesting_channels[device_spec].empty()) {
 			alsa_pool.release_device(card_index);
 		} else {
@@ -1137,16 +1263,27 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi
 		}
 		if (device->interesting_channels != interesting_channels[device_spec]) {
 			device->interesting_channels = interesting_channels[device_spec];
+			device->extra_delay_ms = extra_delay_ms;
 			alsa_pool.reset_device(device_spec.index);
 			reset_resampler_mutex_held(device_spec);
+		} else if (device->extra_delay_ms != extra_delay_ms &&
+		           device->resampling_queue != nullptr) {
+			device->extra_delay_ms = extra_delay_ms;
+			device->resampling_queue->change_expected_delay(get_delay_seconds(extra_delay_ms));
 		}
 	}
 	for (unsigned card_index = 0; card_index < num_ffmpeg_inputs; ++card_index) {
 		const DeviceSpec device_spec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index};
 		AudioDevice *device = find_audio_device(device_spec);
+		double extra_delay_ms = new_extra_delay_ms[device_spec];
 		if (device->interesting_channels != interesting_channels[device_spec]) {
 			device->interesting_channels = interesting_channels[device_spec];
+			device->extra_delay_ms = extra_delay_ms;
 			reset_resampler_mutex_held(device_spec);
+		} else if (device->extra_delay_ms != extra_delay_ms &&
+		           device->resampling_queue != nullptr) {
+			device->extra_delay_ms = extra_delay_ms;
+			device->resampling_queue->change_expected_delay(get_delay_seconds(extra_delay_ms));
 		}
 	}