]> git.sesse.net Git - casparcg/blob - modules/ffmpeg/producer/muxer/frame_muxer.cpp
[ffmpeg] Reimplemented support for playing all audio streams in a clip and treating...
[casparcg] / modules / ffmpeg / producer / muxer / frame_muxer.cpp
1 /*
2 * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
3 *
4 * This file is part of CasparCG (www.casparcg.com).
5 *
6 * CasparCG is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * CasparCG is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Author: Robert Nagy, ronag89@gmail.com
20 */
21
22 #include "../../StdAfx.h"
23
24 #include "frame_muxer.h"
25
26 #include "../filter/filter.h"
27 #include "../filter/audio_filter.h"
28 #include "../util/util.h"
29 #include "../../ffmpeg.h"
30
31 #include <core/producer/frame_producer.h>
32 #include <core/frame/draw_frame.h>
33 #include <core/frame/frame_transform.h>
34 #include <core/frame/pixel_format.h>
35 #include <core/frame/frame_factory.h>
36 #include <core/frame/frame.h>
37 #include <core/frame/audio_channel_layout.h>
38
39 #include <common/env.h>
40 #include <common/except.h>
41 #include <common/log.h>
42
43 #if defined(_MSC_VER)
44 #pragma warning (push)
45 #pragma warning (disable : 4244)
46 #endif
47 extern "C"
48 {
49         #define __STDC_CONSTANT_MACROS
50         #define __STDC_LIMIT_MACROS
51         #include <libavcodec/avcodec.h>
52         #include <libavformat/avformat.h>
53 }
54 #if defined(_MSC_VER)
55 #pragma warning (pop)
56 #endif
57
58 #include <common/assert.h>
59 #include <boost/range/algorithm_ext/push_back.hpp>
60 #include <boost/algorithm/string/predicate.hpp>
61 #include <boost/thread/mutex.hpp>
62 #include <boost/optional.hpp>
63
64 #include <deque>
65 #include <queue>
66 #include <vector>
67
68 using namespace caspar::core;
69
70 namespace caspar { namespace ffmpeg {
71
72 struct av_frame_format
73 {
74         int                                                                             pix_format;
75         std::array<int, AV_NUM_DATA_POINTERS>   line_sizes;
76         int                                                                             width;
77         int                                                                             height;
78
79         av_frame_format(const AVFrame& frame)
80                 : pix_format(frame.format)
81                 , width(frame.width)
82                 , height(frame.height)
83         {
84                 boost::copy(frame.linesize, line_sizes.begin());
85         }
86
87         bool operator==(const av_frame_format& other) const
88         {
89                 return pix_format == other.pix_format
90                         && line_sizes == other.line_sizes
91                         && width == other.width
92                         && height == other.height;
93         }
94
95         bool operator!=(const av_frame_format& other) const
96         {
97                 return !(*this == other);
98         }
99 };
100
101 std::unique_ptr<audio_filter> create_amerge_filter(std::vector<audio_input_pad> input_pads, const core::audio_channel_layout& layout)
102 {
103         std::vector<audio_output_pad> output_pads;
104         std::wstring amerge;
105
106         output_pads.emplace_back(
107                         std::vector<int>                        { 48000 },
108                         std::vector<AVSampleFormat>     { AVSampleFormat::AV_SAMPLE_FMT_S32 },
109                         std::vector<uint64_t>           { static_cast<uint64_t>(av_get_default_channel_layout(layout.num_channels)) });
110
111         if (input_pads.size() > 1)
112         {
113                 for (int i = 0; i < input_pads.size(); ++i)
114                         amerge += L"[a:" + boost::lexical_cast<std::wstring>(i) + L"]";
115
116                 amerge += L"amerge=inputs=" + boost::lexical_cast<std::wstring>(input_pads.size());
117         }
118
119         std::wstring afilter;
120
121         if (!amerge.empty())
122         {
123                 afilter = amerge;
124                 afilter += L"[aout:0]";
125         }
126
127         return std::unique_ptr<audio_filter>(new audio_filter(input_pads, output_pads, u8(afilter)));
128 }
129
130 struct frame_muxer::impl : boost::noncopyable
131 {
132         std::queue<std::queue<core::mutable_frame>>             video_streams_;
133         std::queue<core::mutable_audio_buffer>                  audio_streams_;
134         std::queue<core::draw_frame>                                    frame_buffer_;
135         display_mode                                                                    display_mode_                           = display_mode::invalid;
136         const boost::rational<int>                                              in_framerate_;
137         const video_format_desc                                                 format_desc_;
138         const audio_channel_layout                                              audio_channel_layout_;
139
140         std::vector<int>                                                                audio_cadence_                          = format_desc_.audio_cadence;
141
142         spl::shared_ptr<core::frame_factory>                    frame_factory_;
143         boost::optional<av_frame_format>                                previously_filtered_frame_;
144
145         std::unique_ptr<filter>                                                 filter_;
146         const std::wstring                                                              filter_str_;
147         std::unique_ptr<audio_filter>                                   audio_filter_;
148         const bool                                                                              multithreaded_filter_;
149         bool                                                                                    force_deinterlacing_            = env::properties().get(L"configuration.force-deinterlace", false);
150
151         mutable boost::mutex                                                    out_framerate_mutex_;
152         boost::rational<int>                                                    out_framerate_;
153
154         impl(
155                         boost::rational<int> in_framerate,
156                         std::vector<audio_input_pad> audio_input_pads,
157                         const spl::shared_ptr<core::frame_factory>& frame_factory,
158                         const core::video_format_desc& format_desc,
159                         const core::audio_channel_layout& channel_layout,
160                         const std::wstring& filter_str,
161                         bool multithreaded_filter)
162                 : in_framerate_(in_framerate)
163                 , format_desc_(format_desc)
164                 , audio_channel_layout_(channel_layout)
165                 , frame_factory_(frame_factory)
166                 , filter_str_(filter_str)
167                 , multithreaded_filter_(multithreaded_filter)
168         {
169                 video_streams_.push(std::queue<core::mutable_frame>());
170                 audio_streams_.push(core::mutable_audio_buffer());
171
172                 set_out_framerate(in_framerate_);
173
174                 if (!audio_input_pads.empty())
175                 {
176                         audio_filter_ = create_amerge_filter(std::move(audio_input_pads), audio_channel_layout_);
177                 }
178         }
179
180         void push(const std::shared_ptr<AVFrame>& video_frame)
181         {
182                 if (!video_frame)
183                         return;
184
185                 av_frame_format current_frame_format(*video_frame);
186
187                 if (previously_filtered_frame_ && video_frame->data[0] && *previously_filtered_frame_ != current_frame_format)
188                 {
189                         // Fixes bug where avfilter crashes server on some DV files (starts in YUV420p but changes to YUV411p after the first frame).
190                         if (ffmpeg::is_logging_quiet_for_thread())
191                                 CASPAR_LOG(debug) << L"[frame_muxer] Frame format has changed. Resetting display mode.";
192                         else
193                                 CASPAR_LOG(info) << L"[frame_muxer] Frame format has changed. Resetting display mode.";
194
195                         display_mode_ = display_mode::invalid;
196                         filter_.reset();
197                         previously_filtered_frame_ = boost::none;
198                 }
199
200                 if (video_frame == flush_video())
201                 {
202                         video_streams_.push(std::queue<core::mutable_frame>());
203                 }
204                 else if (video_frame == empty_video())
205                 {
206                         video_streams_.back().push(frame_factory_->create_frame(this, core::pixel_format::invalid, audio_channel_layout_));
207                         display_mode_ = display_mode::simple;
208                 }
209                 else
210                 {
211                         if (!filter_ || display_mode_ == display_mode::invalid)
212                                 update_display_mode(video_frame);
213
214                         if (filter_)
215                         {
216                                 filter_->push(video_frame);
217                                 previously_filtered_frame_ = current_frame_format;
218
219                                 for (auto& av_frame : filter_->poll_all())
220                                         video_streams_.back().push(make_frame(this, av_frame, *frame_factory_, audio_channel_layout_));
221                         }
222                 }
223
224                 if (video_streams_.back().size() > 32)
225                         CASPAR_THROW_EXCEPTION(invalid_operation() << source_info("frame_muxer") << msg_info("video-stream overflow. This can be caused by incorrect frame-rate. Check clip meta-data."));
226         }
227
228         void push(const std::vector<std::shared_ptr<core::mutable_audio_buffer>>& audio_samples_per_stream)
229         {
230                 if (audio_samples_per_stream.empty())
231                         return;
232
233                 bool is_flush = boost::count_if(
234                                 audio_samples_per_stream,
235                                 [](std::shared_ptr<core::mutable_audio_buffer> a) { return a == flush_audio(); }) > 0;
236
237                 if (is_flush)
238                 {
239                         audio_streams_.push(core::mutable_audio_buffer());
240                 }
241                 else if (audio_samples_per_stream.at(0) == empty_audio())
242                 {
243                         boost::range::push_back(audio_streams_.back(), core::mutable_audio_buffer(audio_cadence_.front() * audio_channel_layout_.num_channels, 0));
244                 }
245                 else
246                 {
247                         for (int i = 0; i < audio_samples_per_stream.size(); ++i)
248                         {
249                                 auto range = boost::make_iterator_range_n(
250                                                 audio_samples_per_stream.at(i)->data(),
251                                                 audio_samples_per_stream.at(i)->size());
252
253                                 audio_filter_->push(i, range);
254                         }
255
256                         for (auto frame : audio_filter_->poll_all(0))
257                         {
258                                 auto audio = boost::make_iterator_range_n(
259                                                 reinterpret_cast<std::int32_t*>(frame->extended_data[0]),
260                                                 frame->nb_samples * frame->channels);
261
262                                 boost::range::push_back(audio_streams_.back(), audio);
263                         }
264                 }
265
266                 if (audio_streams_.back().size() > 32 * audio_cadence_.front() * audio_channel_layout_.num_channels)
267                         CASPAR_THROW_EXCEPTION(invalid_operation() << source_info("frame_muxer") << msg_info("audio-stream overflow. This can be caused by incorrect frame-rate. Check clip meta-data."));
268         }
269
270         bool video_ready() const
271         {
272                 return video_streams_.size() > 1 || (video_streams_.size() >= audio_streams_.size() && video_ready2());
273         }
274
275         bool audio_ready() const
276         {
277                 return audio_streams_.size() > 1 || (audio_streams_.size() >= video_streams_.size() && audio_ready2());
278         }
279
280         bool video_ready2() const
281         {
282                 return video_streams_.front().size() >= 1;
283         }
284
285         bool audio_ready2() const
286         {
287                 return audio_streams_.front().size() >= audio_cadence_.front() * audio_channel_layout_.num_channels;
288         }
289
290         core::draw_frame poll()
291         {
292                 if (!frame_buffer_.empty())
293                 {
294                         auto frame = frame_buffer_.front();
295                         frame_buffer_.pop();
296                         return frame;
297                 }
298
299                 if (video_streams_.size() > 1 && audio_streams_.size() > 1 && (!video_ready2() || !audio_ready2()))
300                 {
301                         if (!video_streams_.front().empty() || !audio_streams_.front().empty())
302                                 CASPAR_LOG(trace) << "Truncating: " << video_streams_.front().size() << L" video-frames, " << audio_streams_.front().size() << L" audio-samples.";
303
304                         video_streams_.pop();
305                         audio_streams_.pop();
306                 }
307
308                 if (!video_ready2() || !audio_ready2() || display_mode_ == display_mode::invalid)
309                         return core::draw_frame::empty();
310
311                 auto frame                      = pop_video();
312                 frame.audio_data()      = pop_audio();
313
314                 frame_buffer_.push(core::draw_frame(std::move(frame)));
315
316                 return poll();
317         }
318
319         core::mutable_frame pop_video()
320         {
321                 auto frame = std::move(video_streams_.front().front());
322                 video_streams_.front().pop();
323                 return frame;
324         }
325
326         core::mutable_audio_buffer pop_audio()
327         {
328                 CASPAR_VERIFY(audio_streams_.front().size() >= audio_cadence_.front() * audio_channel_layout_.num_channels);
329
330                 auto begin      = audio_streams_.front().begin();
331                 auto end        = begin + (audio_cadence_.front() * audio_channel_layout_.num_channels);
332
333                 core::mutable_audio_buffer samples(begin, end);
334                 audio_streams_.front().erase(begin, end);
335
336                 boost::range::rotate(audio_cadence_, std::begin(audio_cadence_) + 1);
337
338                 return samples;
339         }
340
341         uint32_t calc_nb_frames(uint32_t nb_frames) const
342         {
343                 uint64_t nb_frames2 = nb_frames;
344
345                 if(filter_ && filter_->is_double_rate()) // Take into account transformations in filter.
346                         nb_frames2 *= 2;
347
348                 return static_cast<uint32_t>(nb_frames2);
349         }
350
351         boost::rational<int> out_framerate() const
352         {
353                 boost::lock_guard<boost::mutex> lock(out_framerate_mutex_);
354
355                 return out_framerate_;
356         }
357 private:
358         void update_display_mode(const std::shared_ptr<AVFrame>& frame)
359         {
360                 std::wstring filter_str = filter_str_;
361
362                 display_mode_ = display_mode::simple;
363
364                 auto mode = get_mode(*frame);
365
366                 if (filter::is_deinterlacing(filter_str_))
367                 {
368                         display_mode_ = display_mode::simple;
369                 }
370                 else if (mode != core::field_mode::progressive)
371                 {
372                         if (force_deinterlacing_)
373                         {
374                                 display_mode_ = display_mode::deinterlace_bob;
375                         }
376                         else
377                         {
378                                 bool output_also_interlaced = format_desc_.field_mode != core::field_mode::progressive;
379                                 bool interlaced_output_compatible =
380                                                 output_also_interlaced
381                                                 && (
382                                                                 (frame->height == 480 && format_desc_.height == 486) // don't deinterlace for NTSC DV
383                                                                 || frame->height == format_desc_.height
384                                                 )
385                                                 && in_framerate_ == format_desc_.framerate;
386
387                                 display_mode_ = interlaced_output_compatible ? display_mode::simple : display_mode::deinterlace_bob;
388                         }
389                 }
390
391                 if (display_mode_ == display_mode::deinterlace_bob)
392                         filter_str = append_filter(filter_str, L"YADIF=1:-1");
393
394                 auto out_framerate = in_framerate_;
395
396                 if (filter::is_double_rate(filter_str))
397                         out_framerate *= 2;
398
399                 if (frame->height == 480) // NTSC DV
400                 {
401                         auto pad_str = L"PAD=" + boost::lexical_cast<std::wstring>(frame->width) + L":486:0:2:black";
402                         filter_str = append_filter(filter_str, pad_str);
403                 }
404
405                 filter_.reset (new filter(
406                                 frame->width,
407                                 frame->height,
408                                 1 / in_framerate_,
409                                 in_framerate_,
410                                 boost::rational<int>(frame->sample_aspect_ratio.num, frame->sample_aspect_ratio.den),
411                                 static_cast<AVPixelFormat>(frame->format),
412                                 std::vector<AVPixelFormat>(),
413                                 u8(filter_str)));
414
415                 set_out_framerate(out_framerate);
416
417                 auto in_fps = static_cast<double>(in_framerate_.numerator()) / static_cast<double>(in_framerate_.denominator());
418
419                 if (ffmpeg::is_logging_quiet_for_thread())
420                         CASPAR_LOG(debug) << L"[frame_muxer] " << display_mode_ << L" " << print_mode(frame->width, frame->height, in_fps, frame->interlaced_frame > 0);
421                 else
422                         CASPAR_LOG(info) << L"[frame_muxer] " << display_mode_ << L" " << print_mode(frame->width, frame->height, in_fps, frame->interlaced_frame > 0);
423         }
424
425         void merge()
426         {
427                 while (video_ready() && audio_ready() && display_mode_ != display_mode::invalid)
428                 {
429                         auto frame1 = pop_video();
430                         frame1.audio_data() = pop_audio();
431
432                         frame_buffer_.push(core::draw_frame(std::move(frame1)));
433                 }
434         }
435
436         void set_out_framerate(boost::rational<int> out_framerate)
437         {
438                 boost::lock_guard<boost::mutex> lock(out_framerate_mutex_);
439
440                 bool changed = out_framerate != out_framerate_;
441                 out_framerate_ = std::move(out_framerate);
442
443                 if (changed)
444                         update_audio_cadence();
445         }
446
447         void update_audio_cadence()
448         {
449                 audio_cadence_ = find_audio_cadence(out_framerate_);
450
451                 // Note: Uses 1 step rotated cadence for 1001 modes (1602, 1602, 1601, 1602, 1601)
452                 // This cadence fills the audio mixer most optimally.
453                 boost::range::rotate(audio_cadence_, std::end(audio_cadence_) - 1);
454         }
455 };
456
457 frame_muxer::frame_muxer(
458                 boost::rational<int> in_framerate,
459                 std::vector<audio_input_pad> audio_input_pads,
460                 const spl::shared_ptr<core::frame_factory>& frame_factory,
461                 const core::video_format_desc& format_desc,
462                 const core::audio_channel_layout& channel_layout,
463                 const std::wstring& filter,
464                 bool multithreaded_filter)
465         : impl_(new impl(std::move(in_framerate), std::move(audio_input_pads), frame_factory, format_desc, channel_layout, filter, multithreaded_filter)){}
466 void frame_muxer::push(const std::shared_ptr<AVFrame>& video){impl_->push(video);}
467 void frame_muxer::push(const std::vector<std::shared_ptr<core::mutable_audio_buffer>>& audio_samples_per_stream){impl_->push(audio_samples_per_stream);}
468 core::draw_frame frame_muxer::poll(){return impl_->poll();}
469 uint32_t frame_muxer::calc_nb_frames(uint32_t nb_frames) const {return impl_->calc_nb_frames(nb_frames);}
470 bool frame_muxer::video_ready() const{return impl_->video_ready();}
471 bool frame_muxer::audio_ready() const{return impl_->audio_ready();}
472 boost::rational<int> frame_muxer::out_framerate() const { return impl_->out_framerate(); }
473 }}