From c537308072c50a6b8bba9fb5db7bc837aeae6ce7 Mon Sep 17 00:00:00 2001 From: Helge Norberg Date: Wed, 11 May 2016 18:31:51 +0200 Subject: [PATCH] ffmpeg_producer: Multiple audio streams are now merged (flattened) before the audio mixer. --- modules/ffmpeg/audio_channel_remapper.cpp | 15 +- .../ffmpeg_pipeline_backend_internal.cpp | 359 +++++++++++------- modules/ffmpeg/producer/audio/audio_decoder.h | 2 +- .../ffmpeg/producer/filter/audio_filter.cpp | 255 ++++++------- modules/ffmpeg/producer/filter/audio_filter.h | 52 ++- modules/ffmpeg/producer/filter/filter.h | 5 + 6 files changed, 400 insertions(+), 288 deletions(-) diff --git a/modules/ffmpeg/audio_channel_remapper.cpp b/modules/ffmpeg/audio_channel_remapper.cpp index 4290f481b..a49395a87 100644 --- a/modules/ffmpeg/audio_channel_remapper.cpp +++ b/modules/ffmpeg/audio_channel_remapper.cpp @@ -144,18 +144,13 @@ struct audio_channel_remapper::impl if (!the_same_layouts_) { auto mix_config = mix_repo->get_config(input_layout_.type, output_layout_.type); - auto pan_filter = u8(generate_pan_filter_str(input_layout_, output_layout_, mix_config)); + auto pan_filter = "[a:0] " + u8(generate_pan_filter_str(input_layout_, output_layout_, mix_config)) + " [aout:0]"; CASPAR_LOG(debug) << "[audio_channel_remapper] Using audio filter: " << pan_filter; auto quiet_logging = ffmpeg::temporary_enable_quiet_logging_for_thread(true); filter_.reset(new ffmpeg::audio_filter( - boost::rational(1, 1), - 48000, - AV_SAMPLE_FMT_S32, - ffmpeg::create_channel_layout_bitmask(input_layout_.num_channels), - { 48000 }, - { AV_SAMPLE_FMT_S32 }, - { ffmpeg::create_channel_layout_bitmask(output_layout_.num_channels) }, + { ffmpeg::audio_input_pad(boost::rational(1, 1), 48000, AV_SAMPLE_FMT_S32, ffmpeg::create_channel_layout_bitmask(input_layout_.num_channels)) }, + { ffmpeg::audio_output_pad({ 48000 }, { AV_SAMPLE_FMT_S32 }, { ffmpeg::create_channel_layout_bitmask(output_layout_.num_channels) }) }, pan_filter)); } else @@ -193,9 +188,9 @@ struct audio_channel_remapper::impl static_cast(input_frame->format), 16); - filter_->push(input_frame); + filter_->push(0, input_frame); - auto frames = filter_->poll_all(); + auto frames = filter_->poll_all(0); CASPAR_ENSURE(frames.size() == 1); // Expect 1:1 from pan filter diff --git a/modules/ffmpeg/ffmpeg_pipeline_backend_internal.cpp b/modules/ffmpeg/ffmpeg_pipeline_backend_internal.cpp index 6c59497e9..f512ebfdc 100644 --- a/modules/ffmpeg/ffmpeg_pipeline_backend_internal.cpp +++ b/modules/ffmpeg/ffmpeg_pipeline_backend_internal.cpp @@ -48,9 +48,11 @@ #include #include +#include #include #include +#include namespace caspar { namespace ffmpeg { @@ -112,25 +114,25 @@ struct source { virtual ~source() { } - virtual std::wstring print() const = 0; - virtual void start() { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual void graph(spl::shared_ptr g) { } - virtual void stop() { } - virtual void start_frame(std::uint32_t frame) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } - virtual std::uint32_t start_frame() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } - virtual void loop(bool value) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } - virtual bool loop() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } - virtual void length(std::uint32_t frames) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } - virtual std::uint32_t length() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } - virtual std::string filename() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print())); } - virtual void seek(std::uint32_t frame) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } - virtual bool has_audio() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual int samplerate() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual bool has_video() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual bool eof() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual boost::rational framerate() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual std::uint32_t frame_number() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual std::shared_ptr get_input_frame(AVMediaType type) { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual std::wstring print() const = 0; + virtual void start() { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual void graph(spl::shared_ptr g) { } + virtual void stop() { } + virtual void start_frame(std::uint32_t frame) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } + virtual std::uint32_t start_frame() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } + virtual void loop(bool value) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } + virtual bool loop() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } + virtual void length(std::uint32_t frames) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } + virtual std::uint32_t length() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } + virtual std::string filename() const { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print())); } + virtual void seek(std::uint32_t frame) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not seekable.")); } + virtual bool has_audio() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual int samplerate() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual bool has_video() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual bool eof() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual boost::rational framerate() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual std::uint32_t frame_number() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual std::vector> get_input_frames_for_streams(AVMediaType type) { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } }; struct no_source_selected : public source @@ -143,16 +145,16 @@ struct no_source_selected : public source class file_source : public source { - std::wstring filename_; - spl::shared_ptr graph_; - std::uint32_t start_frame_ = 0; - std::uint32_t length_ = std::numeric_limits::max(); - bool loop_ = false; - mutable boost::mutex pointer_mutex_; - std::shared_ptr input_; - std::shared_ptr audio_decoder_; - std::shared_ptr video_decoder_; - bool started_ = false; + std::wstring filename_; + spl::shared_ptr graph_; + std::uint32_t start_frame_ = 0; + std::uint32_t length_ = std::numeric_limits::max(); + bool loop_ = false; + mutable boost::mutex pointer_mutex_; + std::shared_ptr input_; + std::vector> audio_decoders_; + std::shared_ptr video_decoder_; + bool started_ = false; public: file_source(std::string filename) : filename_(u16(filename)) @@ -175,28 +177,30 @@ public: bool thumbnail_mode = is_logging_quiet_for_thread(); input_.reset(new input(graph_, filename_, loop_, start_frame_, length_, thumbnail_mode)); - try - { - audio_decoder_.reset(new audio_decoder(*input_, core::video_format_desc())); - } - catch (averror_stream_not_found&) - { - CASPAR_LOG(debug) << print() << " No audio-stream found. Running without audio."; - } - catch (...) + for (int i = 0; i < input_->num_audio_streams(); ++i) { - if (is_logging_quiet_for_thread()) + try { - CASPAR_LOG_CURRENT_EXCEPTION_AT_LEVEL(debug); - CASPAR_LOG(info) << print() << " Failed to open audio-stream. Running without audio. Turn on log level debug to see more information."; + audio_decoders_.push_back(spl::make_shared(*input_, core::video_format_desc(), i)); } - else + catch (...) { - CASPAR_LOG_CURRENT_EXCEPTION(); - CASPAR_LOG(warning) << print() << " Failed to open audio-stream. Running without audio."; + if (is_logging_quiet_for_thread()) + { + CASPAR_LOG_CURRENT_EXCEPTION_AT_LEVEL(debug); + CASPAR_LOG(info) << print() << " Failed to open audio-stream. Turn on log level debug to see more information."; + } + else + { + CASPAR_LOG_CURRENT_EXCEPTION(); + CASPAR_LOG(warning) << print() << " Failed to open audio-stream."; + } } } + if (audio_decoders_.empty()) + CASPAR_LOG(debug) << print() << " No audio-stream found. Running without audio."; + try { video_decoder_.reset(new video_decoder(*input_, false)); @@ -271,10 +275,10 @@ public: if (v) return v->nb_frames(); - auto a = get_audio_decoder(); + auto a = get_audio_decoders(); - if (a) - return a->nb_frames(); + if (!a.empty()) + return a.at(0)->nb_frames(); // Should be ok. return length_; } @@ -298,14 +302,12 @@ public: bool has_audio() const override { - return static_cast(get_audio_decoder()); + return !get_audio_decoders().empty(); } int samplerate() const override { - auto decoder = get_audio_decoder(); - - if (!decoder) + if (get_audio_decoders().empty()) return -1; return 48000; @@ -336,23 +338,34 @@ public: return decoder->file_frame_number(); } - std::shared_ptr get_input_frame(AVMediaType type) override + std::vector> get_input_frames_for_streams(AVMediaType type) override { - auto a_decoder = get_audio_decoder(); + auto a_decoders = get_audio_decoders(); auto v_decoder = get_video_decoder(); expect_started(); - if (type == AVMediaType::AVMEDIA_TYPE_AUDIO && a_decoder) + if (type == AVMediaType::AVMEDIA_TYPE_AUDIO && !a_decoders.empty()) { - std::shared_ptr frame; + std::vector> frames; - for (int i = 0; i < 64; ++i) + for (auto& a_decoder : a_decoders) { - frame = (*a_decoder)(); + std::shared_ptr frame; - if (frame && frame->data[0]) - return spl::make_shared_ptr(frame); + for (int i = 0; i < 64; ++i) + { + frame = (*a_decoder)(); + + if (frame && frame->data[0]) + break; + else + frame.reset(); + } + + frames.push_back(std::move(frame)); } + + return frames; } else if (type == AVMediaType::AVMEDIA_TYPE_VIDEO && v_decoder) { @@ -363,14 +376,14 @@ public: frame = (*v_decoder)(); if (frame && frame->data[0]) - return spl::make_shared_ptr(frame); + return { frame }; } } else CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info( print() + L" Unhandled media type " + boost::lexical_cast(type))); - return nullptr; + return { }; } private: void expect_started() const @@ -385,10 +398,10 @@ private: return input_; } - std::shared_ptr get_audio_decoder() const + std::vector> get_audio_decoders() const { boost::lock_guard lock(pointer_mutex_); - return audio_decoder_; + return audio_decoders_; } std::shared_ptr get_video_decoder() const @@ -504,7 +517,7 @@ public: return video_frames_.try_push(std::move(data)); } - std::shared_ptr get_input_frame(AVMediaType type) override + std::vector> get_input_frames_for_streams(AVMediaType type) override { if (!running_) CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not running.")); @@ -515,7 +528,7 @@ public: audio_frames_.pop(samples); if (samples.empty()) - return nullptr; + return { }; spl::shared_ptr av_frame(av_frame_alloc(), [samples](AVFrame* p) { av_frame_free(&p); }); @@ -537,7 +550,7 @@ public: static_cast(av_frame->format), 16)); - return av_frame; + return { av_frame }; } else if (type == AVMediaType::AVMEDIA_TYPE_VIDEO && has_video()) { @@ -545,7 +558,7 @@ public: video_frames_.pop(data); if (data.empty()) - return nullptr; + return {}; spl::shared_ptr av_frame(av_frame_alloc(), [data](AVFrame* p) { av_frame_free(&p); }); avcodec_get_frame_defaults(av_frame.get()); @@ -570,7 +583,7 @@ public: height_, 1)); - return av_frame; + return { av_frame }; } else CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info( @@ -582,19 +595,21 @@ struct sink { virtual ~sink() { } - virtual std::wstring print() const = 0; - virtual void graph(spl::shared_ptr g) { } - virtual void acodec(std::string codec) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } - virtual void vcodec(std::string codec) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } - virtual void format(std::string fmt) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } - virtual void framerate(boost::rational framerate) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } - virtual void start(bool has_audio, bool has_video) { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual void stop() { } - virtual std::vector supported_sample_formats() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual std::vector supported_samplerates() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual std::vector supported_pixel_formats() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual boost::optional try_push(AVMediaType type, spl::shared_ptr frame) { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } - virtual void eof() { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual std::wstring print() const = 0; + virtual void graph(spl::shared_ptr g) { } + virtual void acodec(std::string codec) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } + virtual void vcodec(std::string codec) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } + virtual void format(std::string fmt) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } + virtual void framerate(boost::rational framerate) { CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info(print() + L" not an encoder.")); } + virtual void start(bool has_audio, bool has_video) { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual void stop() { } + virtual std::vector supported_sample_formats() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual std::vector supported_samplerates() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual std::vector supported_pixel_formats() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual int wanted_num_audio_streams() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual boost::optional wanted_num_channels_per_stream() const { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual boost::optional try_push(AVMediaType type, int stream_index, spl::shared_ptr frame) { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } + virtual void eof() { CASPAR_THROW_EXCEPTION(not_implemented() << msg_info(print())); } }; struct no_sink_selected : public sink @@ -713,7 +728,17 @@ public: }; } - boost::optional try_push(AVMediaType type, spl::shared_ptr av_frame) override + int wanted_num_audio_streams() const override + { + return 1; + } + + boost::optional wanted_num_channels_per_stream() const + { + return boost::none; + } + + boost::optional try_push(AVMediaType type, int stream_index, spl::shared_ptr av_frame) override { if (!has_audio_ && !has_video_) CASPAR_THROW_EXCEPTION(invalid_operation()); @@ -832,6 +857,20 @@ public: } }; +struct audio_stream_info +{ + int num_channels = 0; + AVSampleFormat sampleformat = AVSampleFormat::AV_SAMPLE_FMT_NONE; +}; + +struct video_stream_info +{ + int width = 0; + int height = 0; + AVPixelFormat pixelformat = AVPixelFormat::AV_PIX_FMT_NONE; + core::field_mode fieldmode = core::field_mode::progressive; +}; + class ffmpeg_pipeline_backend_internal : public ffmpeg_pipeline_backend { spl::shared_ptr graph_; @@ -840,12 +879,8 @@ class ffmpeg_pipeline_backend_internal : public ffmpeg_pipeline_backend std::function data)> try_push_audio_; std::function data)> try_push_video_; - int source_num_channels_ = 0; - AVSampleFormat source_sampleformat_ = AVSampleFormat::AV_SAMPLE_FMT_NONE; - int source_width_ = 0; - int source_height_ = 0; - AVPixelFormat source_pixelformat_ = AVPixelFormat::AV_PIX_FMT_NONE; - core::field_mode source_fieldmode_ = core::field_mode::progressive; + std::vector source_audio_streams_; + video_stream_info source_video_stream_; std::string afilter_; std::unique_ptr audio_filter_; @@ -856,6 +891,8 @@ class ffmpeg_pipeline_backend_internal : public ffmpeg_pipeline_backend std::function try_pop_frame_; tbb::atomic started_; + tbb::spin_mutex exception_mutex_; + boost::exception_ptr exception_; boost::thread thread_; public: ffmpeg_pipeline_backend_internal() @@ -869,6 +906,14 @@ public: stop(); } + void throw_if_error() + { + boost::lock_guard lock(exception_mutex_); + + if (exception_ != nullptr) + boost::rethrow_exception(exception_); + } + void graph(spl::shared_ptr g) override { graph_ = std::move(g); @@ -944,12 +989,12 @@ public: int width() const override { - return source_width_; + return source_video_stream_.width; } int height() const override { - return source_height_; + return source_video_stream_.height; } boost::rational framerate() const override @@ -1001,6 +1046,8 @@ public: bool try_push_audio(caspar::array data) override { + throw_if_error(); + if (try_push_audio_) return try_push_audio_(std::move(data)); else @@ -1009,6 +1056,8 @@ public: bool try_push_video(caspar::array data) override { + throw_if_error(); + if (try_push_video_) return try_push_video_(std::move(data)); else @@ -1017,6 +1066,8 @@ public: core::draw_frame try_pop_frame() override { + throw_if_error(); + if (!try_pop_frame_) CASPAR_THROW_EXCEPTION(invalid_operation()); @@ -1056,27 +1107,33 @@ private: while (started_ && (source_->has_audio() || source_->has_video())) { - auto needed = *result; - auto input_frame = source_->get_input_frame(needed); + auto needed = *result; + auto input_frames_for_streams = source_->get_input_frames_for_streams(needed); - if (input_frame) + if (!input_frames_for_streams.empty() && input_frames_for_streams.at(0)) { - if (needed == AVMediaType::AVMEDIA_TYPE_AUDIO) + for (int input_stream_index = 0; input_stream_index < input_frames_for_streams.size(); ++input_stream_index) { - result = sink_->try_push(AVMediaType::AVMEDIA_TYPE_AUDIO, spl::make_shared_ptr(std::move(input_frame))); - } - else if (needed == AVMediaType::AVMEDIA_TYPE_VIDEO) - { - initialize_video_filter_if_needed(*input_frame); - video_filter_->push(std::move(input_frame)); + if (needed == AVMediaType::AVMEDIA_TYPE_AUDIO) + { + initialize_audio_filter_if_needed(input_frames_for_streams); + audio_filter_->push(input_stream_index, std::move(input_frames_for_streams.at(input_stream_index))); - for (auto filtered_frame : video_filter_->poll_all()) + for (int output_stream_index = 0; output_stream_index < sink_->wanted_num_audio_streams(); ++output_stream_index) + for (auto filtered_frame : audio_filter_->poll_all(output_stream_index)) + result = sink_->try_push(AVMediaType::AVMEDIA_TYPE_AUDIO, output_stream_index, std::move(filtered_frame)); + } + else if (needed == AVMediaType::AVMEDIA_TYPE_VIDEO) { - result = sink_->try_push(AVMediaType::AVMEDIA_TYPE_VIDEO, std::move(filtered_frame)); + initialize_video_filter_if_needed(*input_frames_for_streams.at(input_stream_index)); + video_filter_->push(std::move(input_frames_for_streams.at(input_stream_index))); + + for (auto filtered_frame : video_filter_->poll_all()) + result = sink_->try_push(AVMediaType::AVMEDIA_TYPE_VIDEO, 0, std::move(filtered_frame)); } + else + CASPAR_THROW_EXCEPTION(not_supported()); } - else - CASPAR_THROW_EXCEPTION(not_supported()); } else if (source_->eof()) { @@ -1104,6 +1161,9 @@ private: { CASPAR_LOG_CURRENT_EXCEPTION(); } + + boost::lock_guard lock(exception_mutex_); + exception_ = boost::current_exception(); } video_filter_.reset(); @@ -1123,12 +1183,19 @@ private: } } - void initialize_audio_filter_if_needed(const AVFrame& av_frame) + void initialize_audio_filter_if_needed(const std::vector>& av_frames_per_stream) { - bool changed = false; + bool changed = av_frames_per_stream.size() != source_audio_streams_.size(); + source_audio_streams_.resize(av_frames_per_stream.size()); - set_if_changed(changed, source_sampleformat_, static_cast(av_frame.format)); - set_if_changed(changed, source_num_channels_, av_frame.channels); + for (int i = 0; i < av_frames_per_stream.size(); ++i) + { + auto& av_frame = *av_frames_per_stream.at(i); + auto& stream = source_audio_streams_.at(i); + + set_if_changed(changed, stream.sampleformat, static_cast(av_frame.format)); + set_if_changed(changed, stream.num_channels, av_frame.channels); + } if (changed) initialize_audio_filter(); @@ -1136,31 +1203,69 @@ private: void initialize_audio_filter() { - audio_filter_.reset(new audio_filter( - boost::rational(1, source_->samplerate()), - source_->samplerate(), - source_sampleformat_, - av_get_default_channel_layout(source_num_channels_), - sink_->supported_samplerates(), - sink_->supported_sample_formats(), - {}, - afilter_)); + std::vector input_pads; + std::vector output_pads; + + for (auto& source_audio_stream : source_audio_streams_) + { + input_pads.emplace_back( + boost::rational(1, source_->samplerate()), + source_->samplerate(), + source_audio_stream.sampleformat, + av_get_default_channel_layout(source_audio_stream.num_channels)); + } + + auto total_num_channels = cpplinq::from(source_audio_streams_) + .select([](const audio_stream_info& info) { return info.num_channels; }) + .aggregate(0, std::plus()); + + if (total_num_channels > 1 && sink_->wanted_num_audio_streams() > 1) + CASPAR_THROW_EXCEPTION(invalid_operation() + << msg_info("only one-to-many or many-to-one audio stream conversion supported.")); + + std::wstring amerge; + + if (sink_->wanted_num_audio_streams() == 1 && !sink_->wanted_num_channels_per_stream()) + { + output_pads.emplace_back( + sink_->supported_samplerates(), + sink_->supported_sample_formats(), + std::vector({ av_get_default_channel_layout(total_num_channels) })); + + if (source_audio_streams_.size() > 1) + { + for (int i = 0; i < source_audio_streams_.size(); ++i) + amerge += L"[a:" + boost::lexical_cast(i) + L"]"; + + amerge += L"amerge=inputs=" + boost::lexical_cast(source_audio_streams_.size()); + } + } + + std::wstring afilter = u16(afilter_); + + if (!amerge.empty()) + { + afilter = prepend_filter(u16(afilter), amerge); + afilter += L"[aout:0]"; + } + + audio_filter_.reset(new audio_filter(input_pads, output_pads, u8(afilter))); } void initialize_video_filter_if_needed(const AVFrame& av_frame) { bool changed = false; - set_if_changed(changed, source_width_, av_frame.width); - set_if_changed(changed, source_height_, av_frame.height); - set_if_changed(changed, source_pixelformat_, static_cast(av_frame.format)); + set_if_changed(changed, source_video_stream_.width, av_frame.width); + set_if_changed(changed, source_video_stream_.height, av_frame.height); + set_if_changed(changed, source_video_stream_.pixelformat, static_cast(av_frame.format)); core::field_mode field_mode = core::field_mode::progressive; if (av_frame.interlaced_frame) field_mode = av_frame.top_field_first ? core::field_mode::upper : core::field_mode::lower; - set_if_changed(changed, source_fieldmode_, field_mode); + set_if_changed(changed, source_video_stream_.fieldmode, field_mode); if (changed) initialize_video_filter(); @@ -1168,22 +1273,22 @@ private: void initialize_video_filter() { - if (source_fieldmode_ != core::field_mode::progressive && !filter::is_deinterlacing(u16(vfilter_))) + if (source_video_stream_.fieldmode != core::field_mode::progressive && !filter::is_deinterlacing(u16(vfilter_))) vfilter_ = u8(append_filter(u16(vfilter_), L"YADIF=1:-1")); - if (source_height_ == 480) // NTSC DV + if (source_video_stream_.height == 480) // NTSC DV { - auto pad_str = L"PAD=" + boost::lexical_cast(source_width_) + L":486:0:2:black"; + auto pad_str = L"PAD=" + boost::lexical_cast(source_video_stream_.width) + L":486:0:2:black"; vfilter_ = u8(append_filter(u16(vfilter_), pad_str)); } video_filter_.reset(new filter( - source_width_, - source_height_, + source_video_stream_.width, + source_video_stream_.height, 1 / source_->framerate(), source_->framerate(), boost::rational(1, 1), // TODO - source_pixelformat_, + source_video_stream_.pixelformat, sink_->supported_pixel_formats(), vfilter_)); sink_->framerate(framerate()); diff --git a/modules/ffmpeg/producer/audio/audio_decoder.h b/modules/ffmpeg/producer/audio/audio_decoder.h index 93cbcccb9..99f6e398b 100644 --- a/modules/ffmpeg/producer/audio/audio_decoder.h +++ b/modules/ffmpeg/producer/audio/audio_decoder.h @@ -38,7 +38,7 @@ namespace caspar { namespace ffmpeg { class audio_decoder : public boost::noncopyable { public: - explicit audio_decoder(class input& input, const core::video_format_desc& format_desc, int audio_stream_index = 0); + explicit audio_decoder(class input& input, const core::video_format_desc& format_desc, int audio_stream_index); audio_decoder(audio_decoder&& other); audio_decoder& operator=(audio_decoder&& other); diff --git a/modules/ffmpeg/producer/filter/audio_filter.cpp b/modules/ffmpeg/producer/filter/audio_filter.cpp index c592023b3..e562fa377 100644 --- a/modules/ffmpeg/producer/filter/audio_filter.cpp +++ b/modules/ffmpeg/producer/filter/audio_filter.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #if defined(_MSC_VER) #pragma warning (push) @@ -57,104 +58,98 @@ extern "C" #endif namespace caspar { namespace ffmpeg { - + +std::string create_sourcefilter_str(const audio_input_pad& input_pad, std::string name) +{ + const auto asrc_options = (boost::format("abuffer=time_base=%1%/%2%:sample_rate=%3%:sample_fmt=%4%:channel_layout=0x%|5$x| [%6%]") + % input_pad.time_base.numerator() % input_pad.time_base.denominator() + % input_pad.sample_rate + % av_get_sample_fmt_name(input_pad.sample_fmt) + % input_pad.audio_channel_layout + % name).str(); + + return asrc_options; +} + +std::string create_filter_list(const std::vector& items) +{ + return boost::join(items, "|"); +} + +std::string channel_layout_to_string(int64_t channel_layout) +{ + return (boost::format("0x%|1$x|") % channel_layout).str(); +} + +std::string create_sinkfilter_str(const audio_output_pad& output_pad, std::string name) +{ + const auto asink_options = (boost::format("[%4%] abuffersink")//=sample_fmts=%1%:channel_layouts=%2%:sample_rates=%3%") + % create_filter_list(cpplinq::from(output_pad.sample_fmts) + .select(&av_get_sample_fmt_name) + .select([](const char* str) { return std::string(str); }) + .to_vector()) + % create_filter_list(cpplinq::from(output_pad.sample_fmts) + .select(&channel_layout_to_string) + .to_vector()) + % create_filter_list(cpplinq::from(output_pad.sample_rates) + .select([](int samplerate) { return boost::lexical_cast(samplerate); }) + .to_vector()) + % name).str(); + + return asink_options; +} + struct audio_filter::implementation { std::string filtergraph_; - std::shared_ptr audio_graph_; - AVFilterContext* audio_graph_in_; - AVFilterContext* audio_graph_out_; + std::shared_ptr audio_graph_; + std::vector audio_graph_inputs_; + std::vector audio_graph_outputs_; implementation( - boost::rational in_time_base, - int in_sample_rate, - AVSampleFormat in_sample_fmt, - std::int64_t in_audio_channel_layout, - std::vector out_sample_rates, - std::vector out_sample_fmts, - std::vector out_audio_channel_layouts, - const std::string& filtergraph) + std::vector input_pads, + std::vector output_pads, + const std::string& filtergraph) : filtergraph_(boost::to_lower_copy(filtergraph)) { - if (out_sample_rates.empty()) - out_sample_rates.push_back(48000); - - out_sample_rates.push_back(-1); + if (input_pads.empty()) + CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("input_pads cannot be empty")); - if (out_sample_fmts.empty()) - out_sample_fmts.push_back(AV_SAMPLE_FMT_S32); + if (output_pads.empty()) + CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("output_pads cannot be empty")); - out_sample_fmts.push_back(AV_SAMPLE_FMT_NONE); + audio_graph_.reset( + avfilter_graph_alloc(), + [](AVFilterGraph* p) + { + avfilter_graph_free(&p); + }); - if (out_audio_channel_layouts.empty()) - out_audio_channel_layouts.push_back(AV_CH_LAYOUT_NATIVE); + std::vector complete_filter_graph; - out_audio_channel_layouts.push_back(-1); + { + int i = 0; + for (auto& input_pad : input_pads) + complete_filter_graph.push_back(create_sourcefilter_str(input_pad, "a:" + boost::lexical_cast(i++))); + } - audio_graph_.reset( - avfilter_graph_alloc(), - [](AVFilterGraph* p) - { - avfilter_graph_free(&p); - }); + if (filtergraph_.empty()) + complete_filter_graph.push_back("[a:0] anull [aout:0]"); + else + complete_filter_graph.push_back(filtergraph_); - const auto asrc_options = (boost::format("time_base=%1%/%2%:sample_rate=%3%:sample_fmt=%4%:channel_layout=0x%|5$x|") - % in_time_base.numerator() % in_time_base.denominator() - % in_sample_rate - % av_get_sample_fmt_name(in_sample_fmt) - % in_audio_channel_layout).str(); - - AVFilterContext* filt_asrc = nullptr; - FF(avfilter_graph_create_filter( - &filt_asrc, - avfilter_get_by_name("abuffer"), - "filter_buffer", - asrc_options.c_str(), - nullptr, - audio_graph_.get())); - - AVFilterContext* filt_asink = nullptr; - FF(avfilter_graph_create_filter( - &filt_asink, - avfilter_get_by_name("abuffersink"), - "filter_buffersink", - nullptr, - nullptr, - audio_graph_.get())); - -#pragma warning (push) -#pragma warning (disable : 4245) - - FF(av_opt_set_int_list( - filt_asink, - "sample_fmts", - out_sample_fmts.data(), - -1, - AV_OPT_SEARCH_CHILDREN)); - FF(av_opt_set_int_list( - filt_asink, - "channel_layouts", - out_audio_channel_layouts.data(), - -1, - AV_OPT_SEARCH_CHILDREN)); - FF(av_opt_set_int_list( - filt_asink, - "sample_rates", - out_sample_rates.data(), - -1, - AV_OPT_SEARCH_CHILDREN)); + { + int i = 0; + for (auto& output_pad : output_pads) + complete_filter_graph.push_back(create_sinkfilter_str(output_pad, "aout:" + boost::lexical_cast(i++))); + } -#pragma warning (pop) - configure_filtergraph( *audio_graph_, - filtergraph_, - *filt_asrc, - *filt_asink); - - audio_graph_in_ = filt_asrc; - audio_graph_out_ = filt_asink; + boost::join(complete_filter_graph, ";"), + audio_graph_inputs_, + audio_graph_outputs_); if (is_logging_quiet_for_thread()) CASPAR_LOG(trace) @@ -173,47 +168,38 @@ struct audio_filter::implementation void configure_filtergraph( AVFilterGraph& graph, const std::string& filtergraph, - AVFilterContext& source_ctx, - AVFilterContext& sink_ctx) + std::vector& source_contexts, + std::vector& sink_contexts) { - AVFilterInOut* outputs = nullptr; - AVFilterInOut* inputs = nullptr; - try { - if(!filtergraph.empty()) - { - outputs = avfilter_inout_alloc(); - inputs = avfilter_inout_alloc(); - - CASPAR_VERIFY(outputs && inputs); - - outputs->name = av_strdup("in"); - outputs->filter_ctx = &source_ctx; - outputs->pad_idx = 0; - outputs->next = nullptr; - - inputs->name = av_strdup("out"); - inputs->filter_ctx = &sink_ctx; - inputs->pad_idx = 0; - inputs->next = nullptr; - - FF(avfilter_graph_parse( - &graph, - filtergraph.c_str(), - inputs, - outputs, - nullptr)); - } - else + AVFilterInOut* outputs = nullptr; + AVFilterInOut* inputs = nullptr; + + FF(avfilter_graph_parse2( + &graph, + filtergraph.c_str(), + &inputs, + &outputs)); + + // Workaround because outputs and inputs are not filled in for some reason + for (unsigned i = 0; i < graph.nb_filters; ++i) { - FF(avfilter_link( - &source_ctx, - 0, - &sink_ctx, - 0)); + auto filter = graph.filters[i]; + + if (std::string(filter->filter->name) == "abuffer") + source_contexts.push_back(filter); + + if (std::string(filter->filter->name) == "abuffersink") + sink_contexts.push_back(filter); } + for (AVFilterInOut* iter = inputs; iter; iter = iter->next) + source_contexts.push_back(iter->filter_ctx); + + for (AVFilterInOut* iter = outputs; iter; iter = iter->next) + sink_contexts.push_back(iter->filter_ctx); + FF(avfilter_graph_config( &graph, nullptr)); @@ -226,14 +212,14 @@ struct audio_filter::implementation } } - void push(const std::shared_ptr& src_av_frame) + void push(int input_pad_id, const std::shared_ptr& src_av_frame) { FF(av_buffersrc_add_frame( - audio_graph_in_, + audio_graph_inputs_.at(input_pad_id), src_av_frame.get())); } - std::shared_ptr poll() + std::shared_ptr poll(int output_pad_id) { std::shared_ptr filt_frame( av_frame_alloc(), @@ -243,7 +229,7 @@ struct audio_filter::implementation }); const auto ret = av_buffersink_get_frame( - audio_graph_out_, + audio_graph_outputs_.at(output_pad_id), filt_frame.get()); if(ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) @@ -256,32 +242,21 @@ struct audio_filter::implementation }; audio_filter::audio_filter( - boost::rational in_time_base, - int in_sample_rate, - AVSampleFormat in_sample_fmt, - std::int64_t in_audio_channel_layout, - std::vector out_sample_rates, - std::vector out_sample_fmts, - std::vector out_audio_channel_layouts, + std::vector input_pads, + std::vector output_pads, const std::string& filtergraph) - : impl_(new implementation( - in_time_base, - in_sample_rate, - in_sample_fmt, - in_audio_channel_layout, - std::move(out_sample_rates), - std::move(out_sample_fmts), - std::move(out_audio_channel_layouts), - filtergraph)){} + : impl_(new implementation(std::move(input_pads), std::move(output_pads), filtergraph)) +{ +} audio_filter::audio_filter(audio_filter&& other) : impl_(std::move(other.impl_)){} audio_filter& audio_filter::operator=(audio_filter&& other){impl_ = std::move(other.impl_); return *this;} -void audio_filter::push(const std::shared_ptr& frame){impl_->push(frame);} -std::shared_ptr audio_filter::poll(){return impl_->poll();} +void audio_filter::push(int input_pad_id, const std::shared_ptr& frame){impl_->push(input_pad_id, frame);} +std::shared_ptr audio_filter::poll(int output_pad_id){return impl_->poll(output_pad_id);} std::wstring audio_filter::filter_str() const{return u16(impl_->filtergraph_);} -std::vector> audio_filter::poll_all() +std::vector> audio_filter::poll_all(int output_pad_id) { std::vector> frames; - for(auto frame = poll(); frame; frame = poll()) + for(auto frame = poll(output_pad_id); frame; frame = poll(output_pad_id)) frames.push_back(spl::make_shared_ptr(frame)); return frames; } diff --git a/modules/ffmpeg/producer/filter/audio_filter.h b/modules/ffmpeg/producer/filter/audio_filter.h index 71b71f0a4..370bed045 100644 --- a/modules/ffmpeg/producer/filter/audio_filter.h +++ b/modules/ffmpeg/producer/filter/audio_filter.h @@ -45,24 +45,56 @@ struct AVFrame; namespace caspar { namespace ffmpeg { +struct audio_input_pad +{ + boost::rational time_base; + int sample_rate; + AVSampleFormat sample_fmt; + std::int64_t audio_channel_layout; + + audio_input_pad( + boost::rational time_base, + int sample_rate, + AVSampleFormat sample_fmt, + std::int64_t audio_channel_layout) + : time_base(std::move(time_base)) + , sample_rate(sample_rate) + , sample_fmt(sample_fmt) + , audio_channel_layout(audio_channel_layout) + { + } +}; + +struct audio_output_pad +{ + std::vector sample_rates; + std::vector sample_fmts; + std::vector audio_channel_layouts; + + audio_output_pad( + std::vector sample_rates, + std::vector sample_fmts, + std::vector audio_channel_layouts) + : sample_rates(std::move(sample_rates)) + , sample_fmts(std::move(sample_fmts)) + , audio_channel_layouts(std::move(audio_channel_layouts)) + { + } +}; + class audio_filter : boost::noncopyable { public: audio_filter( - boost::rational in_time_base, - int in_sample_rate, - AVSampleFormat in_sample_fmt, - std::int64_t in_audio_channel_layout, - std::vector out_sample_rates, - std::vector out_sample_fmts, - std::vector out_audio_channel_layouts, + std::vector input_pads, + std::vector output_pads, const std::string& filtergraph); audio_filter(audio_filter&& other); audio_filter& operator=(audio_filter&& other); - void push(const std::shared_ptr& frame); - std::shared_ptr poll(); - std::vector> poll_all(); + void push(int input_pad_id, const std::shared_ptr& frame); + std::shared_ptr poll(int output_pad_id); + std::vector> poll_all(int output_pad_id); std::wstring filter_str() const; private: diff --git a/modules/ffmpeg/producer/filter/filter.h b/modules/ffmpeg/producer/filter/filter.h index 9eacfceeb..e8d623141 100644 --- a/modules/ffmpeg/producer/filter/filter.h +++ b/modules/ffmpeg/producer/filter/filter.h @@ -51,6 +51,11 @@ static std::wstring append_filter(const std::wstring& filters, const std::wstrin return filters + (filters.empty() ? L"" : L",") + filter; } +static std::wstring prepend_filter(const std::wstring& filters, const std::wstring& filter) +{ + return filter + (filters.empty() ? L"" : L",") + filters; +} + class filter : boost::noncopyable { public: -- 2.39.2