From 80a2892bbe07a4e065704830e9e7244d2b1139fa Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 22 Apr 2018 15:23:35 +0200 Subject: [PATCH] Add support for using FFmpeg video sources as audio. --- audio_mixer.cpp | 47 +++++++++++++++++++++++++++++++++++----- audio_mixer.h | 5 +++-- ffmpeg_capture.cpp | 1 - ffmpeg_capture.h | 1 - input_mapping.cpp | 10 ++++++++- input_mapping.h | 2 +- input_mapping_dialog.cpp | 4 +++- mixer.cpp | 38 +++++++++++++++++++++----------- mixer.h | 6 ++--- state.proto | 2 +- 10 files changed, 87 insertions(+), 29 deletions(-) diff --git a/audio_mixer.cpp b/audio_mixer.cpp index 887b105..d2617f6 100644 --- a/audio_mixer.cpp +++ b/audio_mixer.cpp @@ -167,8 +167,10 @@ void deinterleave_samples(const vector &in, vector *out_l, vector< } // namespace -AudioMixer::AudioMixer(unsigned num_cards) - : num_cards(num_cards), +AudioMixer::AudioMixer(unsigned num_capture_cards, unsigned num_ffmpeg_inputs) + : num_capture_cards(num_capture_cards), + num_ffmpeg_inputs(num_ffmpeg_inputs), + ffmpeg_inputs(new AudioDevice[num_ffmpeg_inputs]), limiter(OUTPUT_FREQUENCY), correlation(OUTPUT_FREQUENCY) { @@ -387,6 +389,8 @@ AudioMixer::AudioDevice *AudioMixer::find_audio_device(DeviceSpec device) return &video_cards[device.index]; case InputSourceType::ALSA_INPUT: return &alsa_inputs[device.index]; + case InputSourceType::FFMPEG_VIDEO_INPUT: + return &ffmpeg_inputs[device.index]; case InputSourceType::SILENCE: default: assert(false); @@ -425,7 +429,8 @@ void AudioMixer::fill_audio_bus(const map> &samples_ca memset(output, 0, num_samples * 2 * sizeof(*output)); } else { assert(bus.device.type == InputSourceType::CAPTURE_CARD || - bus.device.type == InputSourceType::ALSA_INPUT); + bus.device.type == InputSourceType::ALSA_INPUT || + bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT); const float *lsrc, *rsrc; unsigned lstride, rstride; float *dptr = output; @@ -455,6 +460,12 @@ vector AudioMixer::get_active_devices() const ret.push_back(device_spec); } } + for (unsigned card_index = 0; card_index < num_ffmpeg_inputs; ++card_index) { + const DeviceSpec device_spec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index}; + if (!find_audio_device(device_spec)->interesting_channels.empty()) { + ret.push_back(device_spec); + } + } return ret; } @@ -878,7 +889,7 @@ map AudioMixer::get_devices() lock_guard lock(audio_mutex); map devices; - for (unsigned card_index = 0; card_index < num_cards; ++card_index) { + for (unsigned card_index = 0; card_index < num_capture_cards; ++card_index) { const DeviceSpec spec{ InputSourceType::CAPTURE_CARD, card_index }; const AudioDevice *device = &video_cards[card_index]; DeviceInfo info; @@ -898,6 +909,14 @@ map AudioMixer::get_devices() info.alsa_address = device.address; devices.insert(make_pair(spec, info)); } + for (unsigned card_index = 0; card_index < num_ffmpeg_inputs; ++card_index) { + const DeviceSpec spec{ InputSourceType::FFMPEG_VIDEO_INPUT, card_index }; + const AudioDevice *device = &ffmpeg_inputs[card_index]; + DeviceInfo info; + info.display_name = device->display_name; + info.num_channels = 2; + devices.insert(make_pair(spec, info)); + } return devices; } @@ -924,6 +943,11 @@ void AudioMixer::serialize_device(DeviceSpec device_spec, DeviceSpecProto *devic case InputSourceType::ALSA_INPUT: alsa_pool.serialize_device(device_spec.index, device_spec_proto); break; + case InputSourceType::FFMPEG_VIDEO_INPUT: + device_spec_proto->set_type(DeviceSpecProto::FFMPEG_VIDEO_INPUT); + device_spec_proto->set_index(device_spec.index); + device_spec_proto->set_display_name(ffmpeg_inputs[device_spec.index].display_name); + break; } } @@ -976,12 +1000,15 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi map> interesting_channels; for (const InputMapping::Bus &bus : new_input_mapping.buses) { if (bus.device.type == InputSourceType::CAPTURE_CARD || - bus.device.type == InputSourceType::ALSA_INPUT) { + bus.device.type == InputSourceType::ALSA_INPUT || + bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT) { for (unsigned channel = 0; channel < 2; ++channel) { if (bus.source_channel[channel] != -1) { interesting_channels[bus.device].insert(bus.source_channel[channel]); } } + } else { + assert(bus.device.type == InputSourceType::SILENCE); } } @@ -1021,6 +1048,8 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi metrics.labels.emplace_back("source_type", "capture_card"); } else if (bus.device.type == InputSourceType::ALSA_INPUT) { metrics.labels.emplace_back("source_type", "alsa_input"); + } else if (bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT) { + metrics.labels.emplace_back("source_type", "ffmpeg_video_input"); } else { assert(false); } @@ -1064,6 +1093,14 @@ void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mappi reset_resampler_mutex_held(device_spec); } } + for (unsigned card_index = 0; card_index < num_ffmpeg_inputs; ++card_index) { + const DeviceSpec device_spec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index}; + AudioDevice *device = find_audio_device(device_spec); + if (device->interesting_channels != interesting_channels[device_spec]) { + device->interesting_channels = interesting_channels[device_spec]; + reset_resampler_mutex_held(device_spec); + } + } input_mapping = new_input_mapping; } diff --git a/audio_mixer.h b/audio_mixer.h index 17f9528..ebe142a 100644 --- a/audio_mixer.h +++ b/audio_mixer.h @@ -46,7 +46,7 @@ enum EQBand { class AudioMixer { public: - AudioMixer(unsigned num_cards); + AudioMixer(unsigned num_capture_cards, unsigned num_ffmpeg_inputs); void reset_resampler(DeviceSpec device_spec); void reset_meters(); @@ -327,13 +327,14 @@ private: std::vector get_active_devices() const; void set_input_mapping_lock_held(const InputMapping &input_mapping); - unsigned num_cards; + unsigned num_capture_cards, num_ffmpeg_inputs; mutable std::timed_mutex audio_mutex; ALSAPool alsa_pool; AudioDevice video_cards[MAX_VIDEO_CARDS]; // Under audio_mutex. AudioDevice alsa_inputs[MAX_ALSA_CARDS]; // Under audio_mutex. + std::unique_ptr ffmpeg_inputs; // Under audio_mutex. std::atomic locut_cutoff_hz{120}; StereoFilter locut[MAX_BUSES]; // Default cutoff 120 Hz, 24 dB/oct. diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp index c5f71b6..a0904dc 100644 --- a/ffmpeg_capture.cpp +++ b/ffmpeg_capture.cpp @@ -208,7 +208,6 @@ YCbCrFormat decode_ycbcr_format(const AVPixFmtDescriptor *desc, const AVFrame *f FFmpegCapture::FFmpegCapture(const string &filename, unsigned width, unsigned height) : filename(filename), width(width), height(height), video_timebase{1, 1} { - // Not really used for anything. description = "Video: " + filename; last_frame = steady_clock::now(); diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h index 0fbc3e8..336f6bd 100644 --- a/ffmpeg_capture.h +++ b/ffmpeg_capture.h @@ -17,7 +17,6 @@ // changes parameters midway, which is allowed in some formats. // // You can get out the audio either as decoded or in raw form (Kaeru uses this). -// However, the rest of Nageru can't really use the audio for anything yet. #include #include diff --git a/input_mapping.cpp b/input_mapping.cpp index f894c95..45b6009 100644 --- a/input_mapping.cpp +++ b/input_mapping.cpp @@ -27,6 +27,9 @@ string spec_to_string(DeviceSpec device_spec) case InputSourceType::ALSA_INPUT: snprintf(buf, sizeof(buf), "ALSA input %u", device_spec.index); return buf; + case InputSourceType::FFMPEG_VIDEO_INPUT: + snprintf(buf, sizeof(buf), "FFmpeg input %u", device_spec.index); + return buf; default: assert(false); } @@ -103,10 +106,15 @@ bool load_input_mapping_from_file(const map &devices, co case DeviceSpecProto::SILENCE: device_mapping.push_back(DeviceSpec{InputSourceType::SILENCE, 0}); break; + case DeviceSpecProto::FFMPEG_VIDEO_INPUT: case DeviceSpecProto::CAPTURE_CARD: { // First see if there's a card that matches on both index and name. - DeviceSpec spec{InputSourceType::CAPTURE_CARD, unsigned(device_proto.index())}; + DeviceSpec spec; + spec.type = (device_proto.type() == DeviceSpecProto::CAPTURE_CARD) ? + InputSourceType::CAPTURE_CARD : InputSourceType::FFMPEG_VIDEO_INPUT; + spec.index = unsigned(device_proto.index()); assert(devices.count(spec)); + const DeviceInfo &dev = devices.find(spec)->second; if (remaining_devices.count(spec) && dev.display_name == device_proto.display_name()) { diff --git a/input_mapping.h b/input_mapping.h index 540fde3..67af0f4 100644 --- a/input_mapping.h +++ b/input_mapping.h @@ -6,7 +6,7 @@ #include #include -enum class InputSourceType { SILENCE, CAPTURE_CARD, ALSA_INPUT }; +enum class InputSourceType { SILENCE, CAPTURE_CARD, ALSA_INPUT, FFMPEG_VIDEO_INPUT }; struct DeviceSpec { InputSourceType type; unsigned index; diff --git a/input_mapping_dialog.cpp b/input_mapping_dialog.cpp index 9e12c98..e26649d 100644 --- a/input_mapping_dialog.cpp +++ b/input_mapping_dialog.cpp @@ -148,7 +148,8 @@ void InputMappingDialog::setup_channel_choices_from_bus(unsigned row, const Inpu QComboBox *channel_combo = new QComboBox; channel_combo->addItem(QString("(none)")); if (bus.device.type == InputSourceType::CAPTURE_CARD || - bus.device.type == InputSourceType::ALSA_INPUT) { + bus.device.type == InputSourceType::ALSA_INPUT || + bus.device.type == InputSourceType::FFMPEG_VIDEO_INPUT) { auto device_it = devices.find(bus.device); assert(device_it != devices.end()); unsigned num_device_channels = device_it->second.num_channels; @@ -159,6 +160,7 @@ void InputMappingDialog::setup_channel_choices_from_bus(unsigned row, const Inpu } channel_combo->setCurrentIndex(bus.source_channel[channel] + 1); } else { + assert(bus.device.type == InputSourceType::SILENCE); channel_combo->setCurrentIndex(0); } connect(channel_combo, static_cast(&QComboBox::currentIndexChanged), diff --git a/mixer.cpp b/mixer.cpp index cbc66d4..2365810 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -304,8 +304,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) num_cards(num_cards), mixer_surface(create_surface(format)), h264_encoder_surface(create_surface(format)), - decklink_output_surface(create_surface(format)), - audio_mixer(num_cards) + decklink_output_surface(create_surface(format)) { memcpy(ycbcr_interpretation, global_flags.ycbcr_interpretation, sizeof(ycbcr_interpretation)); CHECK(init_movit(MOVIT_SHADER_DIR, MOVIT_DEBUG_OFF)); @@ -361,6 +360,10 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) // Must be instantiated after VideoEncoder has initialized global_flags.use_zerocopy. theme.reset(new Theme(global_flags.theme_filename, global_flags.theme_dirs, resource_pool.get(), num_cards)); + // Must be instantiated after the theme, as the theme decides the number of FFmpeg inputs. + std::vector video_inputs = theme->get_video_inputs(); + audio_mixer.reset(new AudioMixer(num_cards, video_inputs.size())); + httpd.add_endpoint("/channels", bind(&Mixer::get_channels_json, this), HTTPD::ALLOW_ALL_ORIGINS); for (int channel_idx = 2; channel_idx < theme->get_num_channels(); ++channel_idx) { char url[256]; @@ -421,7 +424,6 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) // Initialize all video inputs the theme asked for. Note that these are // all put _after_ the regular cards, which stop at - 1. - std::vector video_inputs = theme->get_video_inputs(); for (unsigned video_card_index = 0; video_card_index < video_inputs.size(); ++card_index, ++video_card_index) { if (card_index >= MAX_VIDEO_CARDS) { fprintf(stderr, "ERROR: Not enough card slots available for the videos the theme requested.\n"); @@ -558,10 +560,15 @@ void Mixer::configure_card(unsigned card_index, CaptureInterface *capture, CardT // NOTE: start_bm_capture() happens in thread_func(). - DeviceSpec device{InputSourceType::CAPTURE_CARD, card_index}; - audio_mixer.reset_resampler(device); - audio_mixer.set_display_name(device, card->capture->get_description()); - audio_mixer.trigger_state_changed_callback(); + DeviceSpec device; + if (card_type == CardType::FFMPEG_INPUT) { + device = DeviceSpec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index - num_cards}; + } else { + device = DeviceSpec{InputSourceType::CAPTURE_CARD, card_index}; + } + audio_mixer->reset_resampler(device); + audio_mixer->set_display_name(device, card->capture->get_description()); + audio_mixer->trigger_state_changed_callback(); // Unregister old metrics, if any. if (!card->labels.empty()) { @@ -688,7 +695,12 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode, FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format, FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format) { - DeviceSpec device{InputSourceType::CAPTURE_CARD, card_index}; + DeviceSpec device; + if (card_index >= num_cards) { + device = DeviceSpec{InputSourceType::FFMPEG_VIDEO_INPUT, card_index - num_cards}; + } else { + device = DeviceSpec{InputSourceType::CAPTURE_CARD, card_index}; + } CaptureCard *card = &cards[card_index]; ++card->metric_input_received_frames; @@ -723,7 +735,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode, assert(frame_length > 0); size_t num_samples = (audio_frame.len > audio_offset) ? (audio_frame.len - audio_offset) / audio_format.num_channels / (audio_format.bits_per_sample / 8) : 0; - if (num_samples > OUTPUT_FREQUENCY / 10) { + if (num_samples > OUTPUT_FREQUENCY / 10 && card->type != CardType::FFMPEG_INPUT) { printf("%s: Dropping frame with implausible audio length (len=%d, offset=%d) [timecode=0x%04x video_len=%d video_offset=%d video_format=%x)\n", spec_to_string(device).c_str(), int(audio_frame.len), int(audio_offset), timecode, int(video_frame.len), int(video_offset), video_format.id); @@ -748,7 +760,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode, if (dropped_frames > MAX_FPS * 2) { fprintf(stderr, "%s lost more than two seconds (or time code jumping around; from 0x%04x to 0x%04x), resetting resampler\n", spec_to_string(device).c_str(), card->last_timecode, timecode); - audio_mixer.reset_resampler(device); + audio_mixer->reset_resampler(device); dropped_frames = 0; ++card->metric_input_resets; } else if (dropped_frames > 0) { @@ -759,12 +771,12 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode, bool success; do { - success = audio_mixer.add_silence(device, silence_samples, dropped_frames, frame_length); + success = audio_mixer->add_silence(device, silence_samples, dropped_frames, frame_length); } while (!success); } if (num_samples > 0) { - audio_mixer.add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, frame_length, audio_frame.received_timestamp); + audio_mixer->add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, frame_length, audio_frame.received_timestamp); } // Done with the audio, so release it. @@ -1488,7 +1500,7 @@ void Mixer::audio_thread_func() ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy = task.adjust_rate ? ResamplingQueue::ADJUST_RATE : ResamplingQueue::DO_NOT_ADJUST_RATE; - vector samples_out = audio_mixer.get_output( + vector samples_out = audio_mixer->get_output( task.frame_timestamp, task.num_samples, rate_adjustment_policy); diff --git a/mixer.h b/mixer.h index adfb7cf..d8a9c8b 100644 --- a/mixer.h +++ b/mixer.h @@ -288,8 +288,8 @@ public: } // Note: You can also get this through the global variable global_audio_mixer. - AudioMixer *get_audio_mixer() { return &audio_mixer; } - const AudioMixer *get_audio_mixer() const { return &audio_mixer; } + AudioMixer *get_audio_mixer() { return audio_mixer.get(); } + const AudioMixer *get_audio_mixer() const { return audio_mixer.get(); } void schedule_cut() { @@ -548,7 +548,7 @@ private: JitterHistory output_jitter_history; CaptureCard cards[MAX_VIDEO_CARDS]; // Protected by . YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS]; // Protected by . - AudioMixer audio_mixer; // Same as global_audio_mixer (see audio_mixer.h). + std::unique_ptr audio_mixer; // Same as global_audio_mixer (see audio_mixer.h). bool input_card_is_master_clock(unsigned card_index, unsigned master_card_index) const; struct OutputFrameInfo { int dropped_frames; // Since last frame. diff --git a/state.proto b/state.proto index 8ea6b97..6372e61 100644 --- a/state.proto +++ b/state.proto @@ -8,7 +8,7 @@ syntax = "proto2"; // to the right device even if the devices have moved around. message DeviceSpecProto { // Members from DeviceSpec itself. - enum InputSourceType { SILENCE = 0; CAPTURE_CARD = 1; ALSA_INPUT = 2; }; + enum InputSourceType { SILENCE = 0; CAPTURE_CARD = 1; ALSA_INPUT = 2; FFMPEG_VIDEO_INPUT = 3; }; optional InputSourceType type = 1; optional int32 index = 2; -- 2.39.2